From 9efd3db92a4f9400fff0e9c1cc5c6baace0338af Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:24:22 +0530 Subject: [PATCH 01/15] patch: apply 000-MSFT-Patch-Change_Windows_datafile_name_to_omit_version_number Windows OS ICU build uses a versionless data filename (icudtl.dat) instead of versioned (icudtl78l.dat) so the filename does not churn each upgrade. Guarded by ICU_DATA_DIR_WINDOWS; public/SDK build is unaffected. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/ucmndata.h | 8 ++++++-- icu/icu4c/source/tools/toolutil/pkg_gencmn.cpp | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/icu/icu4c/source/common/ucmndata.h b/icu/icu4c/source/common/ucmndata.h index 486b4fd7b5f..36f82f6f9ea 100644 --- a/icu/icu4c/source/common/ucmndata.h +++ b/icu/icu4c/source/common/ucmndata.h @@ -30,8 +30,12 @@ #include "unicode/udata.h" #include "umapfile.h" - -#define COMMON_DATA_NAME U_ICUDATA_NAME +// MSFT-Change: In the Windows OS ICU build, we only have one data package, and we use a versionless name in filename. +#if defined(ICU_DATA_DIR_WINDOWS) +# define COMMON_DATA_NAME "icudtl" +#else +# define COMMON_DATA_NAME U_ICUDATA_NAME +#endif typedef struct { uint16_t headerSize; diff --git a/icu/icu4c/source/tools/toolutil/pkg_gencmn.cpp b/icu/icu4c/source/tools/toolutil/pkg_gencmn.cpp index c1a46e9aed1..054208170af 100644 --- a/icu/icu4c/source/tools/toolutil/pkg_gencmn.cpp +++ b/icu/icu4c/source/tools/toolutil/pkg_gencmn.cpp @@ -22,7 +22,13 @@ #define STRING_STORE_SIZE 200000 -#define COMMON_DATA_NAME U_ICUDATA_NAME +// MSFT-Change: In the Windows OS ICU build, we only have one data package, and we use a versionless name in filename. +#if defined(ICU_DATA_DIR_WINDOWS) +# define COMMON_DATA_NAME "icudtl" +#else +# define COMMON_DATA_NAME U_ICUDATA_NAME +#endif + #define DATA_TYPE "dat" /* ICU package data file format (.dat files) ------------------------------- *** From 806ffb6177806d4d11abcd8756fa651a9357b856 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:26:36 +0530 Subject: [PATCH 02/15] patch: apply 001-MSFT-Patch-ICU_Header_changes_for_Windows Add IGNORE_WINDOWS_HEADERS_START/END markers around regions of putil.h and unistr.h that should be stripped from the Windows OS SDK header. putil.h: data-directory + timezone-files-directory + filesystem-separator constants are not user-mutable in Windows OS ICU. unistr.h: UStringCaseMapper internal callback typedef is meaningless to SDK consumers that don't expose C++ UnicodeString. No C/C++ semantics change; markers are pure comments interpreted by the Windows SDK header-stripping tool only. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/unicode/putil.h | 6 +++++- icu/icu4c/source/common/unicode/unistr.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/icu/icu4c/source/common/unicode/putil.h b/icu/icu4c/source/common/unicode/putil.h index 500c21252fc..c0b34556ebc 100644 --- a/icu/icu4c/source/common/unicode/putil.h +++ b/icu/icu4c/source/common/unicode/putil.h @@ -42,6 +42,9 @@ * functions may have to be re-implemented. */ +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: The Windows OS version of ICU uses a single fixed data file. + /** * Return the ICU data directory. * The data directory is where common format ICU data files (.dat files) @@ -112,7 +115,7 @@ U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status); U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status); #endif /* U_HIDE_INTERNAL_API */ - +// MSFT-TODO: Should these be considered for Windows? /** * @{ * Filesystem file and path separator characters. @@ -134,6 +137,7 @@ U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode * # define U_FILE_ALT_SEP_STRING "/" # define U_PATH_SEP_STRING ":" #endif +//IGNORE_WINDOWS_HEADERS_END /** @} */ diff --git a/icu/icu4c/source/common/unicode/unistr.h b/icu/icu4c/source/common/unicode/unistr.h index 161b84527a6..145bb8ea3ef 100644 --- a/icu/icu4c/source/common/unicode/unistr.h +++ b/icu/icu4c/source/common/unicode/unistr.h @@ -60,6 +60,9 @@ class Edits; U_NAMESPACE_END +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: Hiding the @internal API below, since we don't expose the C++ UnicodeString. + // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. /** * Internal string case mapping function type. @@ -77,6 +80,8 @@ UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::Edits *edits, UErrorCode &errorCode); +//IGNORE_WINDOWS_HEADERS_END + U_NAMESPACE_BEGIN class Locale; // unicode/locid.h From acee30d036cd4e76028aeb78b72fa774c9896d0d Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:42:41 +0530 Subject: [PATCH 03/15] patch: apply 004-MSFT-Patch-ICU_Header_Additional_changes_for_Windows Add IGNORE_WINDOWS_HEADERS_START/END markers around regions of 4 public ICU headers so the Windows SDK header-stripping tool omits them. Reworked from ICU 72 form (8/12 hunks landed at new offsets, 2 hand-ported, 2 dropped): - uchar.h: wraps U_UNICODE_VERSION macro (runtime-variable; SDK consumers should use u_getUnicodeVersion() API instead). - uconfig.h: wraps uconfig_local.h include hook and UCONFIG_USE_WINDOWS_ LCID_MAPPING_API switch (compile-time settings irrelevant to SDK). - utypes.h: wraps ICUDATA naming scheme constants (Windows OS uses a fixed single-data-file layout). - uversion.h: wraps U_NAMESPACE_BEGIN/END and C++ namespace plumbing (Windows OS SDK exposes flat C APIs only). Dropped umachine.h hunks: U_OVERRIDE and U_FINAL macros no longer exist in ICU 78 (upstream removed them in favor of using the C++11 keywords directly). The patch's intent for that file is resolved by upstream. No C/C++ semantics change; markers are comments consumed only by the Windows SDK header-stripping tool. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/unicode/uchar.h | 6 ++++++ icu/icu4c/source/common/unicode/uconfig.h | 11 +++++++++++ icu/icu4c/source/common/unicode/utypes.h | 7 +++++++ icu/icu4c/source/common/unicode/uversion.h | 6 ++++++ 4 files changed, 30 insertions(+) diff --git a/icu/icu4c/source/common/unicode/uchar.h b/icu/icu4c/source/common/unicode/uchar.h index d33b8cf7f3c..047d61c8122 100644 --- a/icu/icu4c/source/common/unicode/uchar.h +++ b/icu/icu4c/source/common/unicode/uchar.h @@ -49,6 +49,10 @@ typedef struct USet USet; U_CDECL_BEGIN +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: The value of these macros can change at runtime, so the API u_getUnicodeVersion +// should be used instead of any version macro. + /*==========================================================================*/ /* Unicode version number */ /*==========================================================================*/ @@ -63,6 +67,8 @@ U_CDECL_BEGIN */ #define U_UNICODE_VERSION "17.0" +//IGNORE_WINDOWS_HEADERS_END + /** * \file * \brief C API: Unicode Properties diff --git a/icu/icu4c/source/common/unicode/uconfig.h b/icu/icu4c/source/common/unicode/uconfig.h index c0488d502bf..3c118288665 100644 --- a/icu/icu4c/source/common/unicode/uconfig.h +++ b/icu/icu4c/source/common/unicode/uconfig.h @@ -45,6 +45,10 @@ * @stable ICU 2.4 */ +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: Since these are compile time settings, it doesn't make sense to +// load a user config header in the Windows OS SDK version. + /** * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" * prior to determining default settings for uconfig variables. @@ -55,6 +59,8 @@ #include "uconfig_local.h" #endif +//IGNORE_WINDOWS_HEADERS_END + /** * \def U_DEBUG * Determines whether to include debugging code. @@ -379,6 +385,9 @@ # define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL #endif +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: We always use the OS LCID mapping API for the Windows OS build of ICU. + /** * \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API * On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines @@ -391,6 +400,8 @@ # define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1 #endif +//IGNORE_WINDOWS_HEADERS_END + /* i18n library switches ---------------------------------------------------- */ /** diff --git a/icu/icu4c/source/common/unicode/utypes.h b/icu/icu4c/source/common/unicode/utypes.h index f53b7536f20..0f6bc70fd54 100644 --- a/icu/icu4c/source/common/unicode/utypes.h +++ b/icu/icu4c/source/common/unicode/utypes.h @@ -104,6 +104,11 @@ /** @} */ +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: For the Windows OS version of ICU, it doesn't make sense to expose these +// constants which are only for loading the main ICU data file. We also don't +// support using a data DLL either, so omit them from the Windows SDK header. + /*===========================================================================*/ /* ICUDATA naming scheme */ /*===========================================================================*/ @@ -191,6 +196,8 @@ #endif #endif /* U_HIDE_INTERNAL_API */ +//IGNORE_WINDOWS_HEADERS_END + /** * \def NULL * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. diff --git a/icu/icu4c/source/common/unicode/uversion.h b/icu/icu4c/source/common/unicode/uversion.h index 450794ac1ba..dc5ed687cf5 100644 --- a/icu/icu4c/source/common/unicode/uversion.h +++ b/icu/icu4c/source/common/unicode/uversion.h @@ -58,6 +58,10 @@ */ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; +//IGNORE_WINDOWS_HEADERS_START +// MSFT-Change: Since the Windows OS ICU headers are for C APIs only, we don't +// need or want any C++ namespace support. + /*===========================================================================*/ /* C++ namespace if supported. Versioned unless versioning is disabled. */ /*===========================================================================*/ @@ -184,6 +188,8 @@ namespace U_HEADER_ONLY_NAMESPACE {} #endif /* __cplusplus */ +//IGNORE_WINDOWS_HEADERS_END + /*===========================================================================*/ /* General version helper functions. Definitions in putil.c */ /*===========================================================================*/ From 0b6d31fb914b0998715d8b1cd3bb4ad1c9f52968 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:45:28 +0530 Subject: [PATCH 04/15] patch: apply 006-MSFT-Patch-ICU_Make_u_cleanup_NOOP_for_OS_ICU Make u_cleanup() a no-op for the Windows OS ICU build to prevent race-condition crashes when multiple threads (Windows.Globalization, default OS sort, app code) are concurrently using ICU. - Real implementation renamed to uprv_u_cleanup() (private; combined DLL can still call it; not exported from DEF). - New public u_cleanup() under ICU_DATA_DIR_WINDOWS returns no-op; otherwise delegates to uprv_u_cleanup() so public/Nuget consumers retain the original behavior. Reworked: ICU 78 modernized the function signature from (void) to () and NULL to nullptr; the rework matches the new style. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/ucln_cmn.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/icu/icu4c/source/common/ucln_cmn.cpp b/icu/icu4c/source/common/ucln_cmn.cpp index c63bd221929..6d7d2536381 100644 --- a/icu/icu4c/source/common/ucln_cmn.cpp +++ b/icu/icu4c/source/common/ucln_cmn.cpp @@ -36,8 +36,9 @@ static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON]; The cleanup order is important in this function. Please be sure that you have read ucln.h ************************************************/ +// MSFT-Change: Make u_cleanup a no-op for the Windows OS ICU version. U_CAPI void U_EXPORT2 -u_cleanup() +uprv_u_cleanup() { UTRACE_ENTRY_OC(UTRACE_U_CLEANUP); icu::umtx_lock(nullptr); /* Force a memory barrier, so that we are sure to see */ @@ -52,6 +53,31 @@ u_cleanup() /*#endif*/ } +U_CAPI void U_EXPORT2 +u_cleanup() +{ +// When ICU is built as an OS component for Windows, we make the public function u_cleanup +// effectively a no-op because of the following: +// - It is not thread-safe and *forcefully* unloads the ICU data file. +// - The ICU library can simultaneously be used by other threads when this happens, +// either by Windows.Globalization, or by sorting functions when ICU sorting is default. +// - This means an App can call the function at any time, which will cause random crashes. :( +// +// We don't completely remove the functionality though, as we still want/need to be able to +// unload resources when the ICU DLL is unloaded. Instead we make it a private (uprv) function +// so that the combined DLL can still call it, and we don't export it in the DEF file. +// +// Note: We don't unconditionally do this though, as we don't want to alter the behavior of the +// public function when ICU when used/consumed in a Nuget package (for example). +// +#if defined(ICU_DATA_DIR_WINDOWS) + ((void)0); // no-op. + return; +#else + uprv_u_cleanup(); +#endif +} + U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) { if (gLibCleanupFunctions[libType]) From c7c3e66345128e487e4ea002bcbc5e9edaff8bd7 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:48:24 +0530 Subject: [PATCH 05/15] patch: apply 007-MSFT-Patch-cldr_to_icu_build_tool_Windows_changes (hunk 1 only) icu4c/source/data/build.xml: Change CLDR_TMP_DIR from cldr-aux to cldr-staging (MS-ICU pipeline uses cldr-staging as its CLDR temp directory rather than vanilla CLDR's cldr-aux default). Dropped hunk 2 (build-icu-data.xml): target file removed in ICU 73+; the cldr-to-icu data build toolchain is now driven by config.xml + Maven + Cldr2Icu.java rather than that Ant build script. The hunk's intent (forceDelete=true; mvn->mvn.cmd for Windows) does not have a direct landing zone in the new toolchain. If Maven-on-Windows breaks during Step 6 data build, fix at the new invocation point then. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/data/build.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu/icu4c/source/data/build.xml b/icu/icu4c/source/data/build.xml index a7efc0675b1..3e3c0425703 100644 --- a/icu/icu4c/source/data/build.xml +++ b/icu/icu4c/source/data/build.xml @@ -43,7 +43,8 @@ - + + From 38dd5f74107c9c9751e619d00112552f73c1f468 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:49:42 +0530 Subject: [PATCH 06/15] patch: apply 008-MSFT-Patch-ICU_Dont_use_extended_ICU_data_in_Windows Under ICU_DATA_DIR_WINDOWS, make extendICUData() return early (false). Windows OS ICU has only one data file (versionless icudtl.dat from patch 000) and never has extended data; running the normal extension path would try to load icudt78l.dat on top of the already-loaded common data, creating redundant work or load conflicts. Reworked: ICU 78 modernized FALSE -> false in this file; the new guard matches that style. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/udata.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/icu/icu4c/source/common/udata.cpp b/icu/icu4c/source/common/udata.cpp index b9b737f177e..38a501719e3 100644 --- a/icu/icu4c/source/common/udata.cpp +++ b/icu/icu4c/source/common/udata.cpp @@ -809,6 +809,16 @@ openCommonData(const char *path, /* Path from OpenChoice? */ *----------------------------------------------------------------------*/ static UBool extendICUData(UErrorCode *pErr) { +// MSFT-Change: For the Windows OS build of ICU, we only have one data file +// and we don't use the extended data at all. We make this function a no-op +// in order to save a few cycles for perf, but more importantly so that +// we don't try to load a versioned data file (ex: icudt78l.dat) after +// already loading the non-versioned common data file. +#if defined(ICU_DATA_DIR_WINDOWS) + (void)pErr; // suppress unused variable. + return false; +#endif + UDataMemory *pData; UDataMemory copyPData; UBool didUpdate = false; From 2dfc0780e2e66b7da6ed969b4f5b7035b0aac098 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:51:27 +0530 Subject: [PATCH 07/15] patch: apply 010-MSFT-PATCH-ICU_Modify_make_dist_for_Linux Two Linux make-dist adjustments: - DISTY_FILES strips DISTY_DOC_ZIP. MS-ICU does not build Doxygen docs in its release pipeline; the dist target would otherwise fail looking for a non-existent docs.zip. - git archive path adapted for MS-ICU GitHub layout: cd ../.. (two levels up instead of one) and HEAD:icu/icu4c/ (extra icu/ prefix) because microsoft/icu has its icu4c tree at icu/icu4c/ rather than vanilla ICU's top-level icu4c/. Reworked: ICU 78 fixed an upstream typo (we watn -> we want) and added testdata/ copy logic to dist.mk (lines 72-73); both are preserved. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/config/dist.mk | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/icu/icu4c/source/config/dist.mk b/icu/icu4c/source/config/dist.mk index a8bda27d8cd..319470143c6 100644 --- a/icu/icu4c/source/config/dist.mk +++ b/icu/icu4c/source/config/dist.mk @@ -37,7 +37,9 @@ DISTY_DATA_ZIP=$(DISTY_FILE_DIR)/$(DISTY_PREFIX)-data.zip DISTY_DAT:=$(firstword $(wildcard data/out/tmp/icudt$(SO_TARGET_VERSION_MAJOR)*.dat)) DISTY_FILES_SRC=$(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) -DISTY_FILES=$(DISTY_FILES_SRC) $(DISTY_DOC_ZIP) +# MSFT-Change: We only want the tgz for now, as we don't currently build the docs. +#DISTY_FILES=$(DISTY_FILES_SRC) $(DISTY_DOC_ZIP) +DISTY_FILES=$(DISTY_FILES_SRC) # colon-equals because we want to run this once! EXCLUDES_FILE:=$(shell mktemp) @@ -66,7 +68,8 @@ $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP): $(DISTY_DAT) $(DISTY_TMP @echo Export icu4c@$(GITVER) to "$(DISTY_TMP)/icu" -$(RMV) $(DISTY_FILE) $(DISTY_TMP) $(MKINSTALLDIRS) $(DISTY_TMP) - ( cd $(ICU4CTOP)/.. && git archive --format=tar --prefix=icu/ HEAD:icu4c/ ) | ( cd "$(DISTY_TMP)" && tar xf - ) + # MSFT-Change: Adjust the path for the GitHub repo location. + ( cd $(ICU4CTOP)/../.. && git archive --format=tar --prefix=icu/ HEAD:icu/icu4c/ ) | ( cd "$(DISTY_TMP)" && tar xf - ) # special handling for LICENSE file. The symlinks will be included as files by tar and zip. cp -fv $(ICU4CTOP)/LICENSE "$(DISTY_TMP)/LICENSE" # Copy top-level testdata directory so it's a sibling of the source/ directory From 1d57b24e8dd5f7c0544582f94a99d86e38b808f2 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 10:51:59 +0530 Subject: [PATCH 08/15] patch: apply 011-MSFT-Patch_change-tests-data-to-not-include-blocked-regions Remove test entries for blocked region codes (DG, EA, EH, IC, etc.) from ICU's region tests. MS-ICU strips these codes from data via GeoPol policy; the tests would otherwise fail looking up regions that no longer exist in MS-ICU's region data. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/test/cintltst/uregiontest.c | 4 ---- icu/icu4c/source/test/intltest/regiontst.cpp | 4 ---- 2 files changed, 8 deletions(-) diff --git a/icu/icu4c/source/test/cintltst/uregiontest.c b/icu/icu4c/source/test/cintltst/uregiontest.c index 0dd896817c0..decac11892a 100644 --- a/icu/icu4c/source/test/cintltst/uregiontest.c +++ b/icu/icu4c/source/test/cintltst/uregiontest.c @@ -152,17 +152,14 @@ static KnownRegion knownRegions[] = { { "CZ" , 203, "151", URGN_TERRITORY, "150" }, { "DD" , 276, "155", URGN_TERRITORY, "150" }, { "DE" , 276, "155", URGN_TERRITORY, "150" }, - { "DG" , -1 , "QO" , URGN_TERRITORY, "009" }, { "DJ" , 262, "014", URGN_TERRITORY, "002" }, { "DK" , 208, "154", URGN_TERRITORY, "150" }, { "DM" , 212, "029", URGN_TERRITORY, "019" }, { "DO" , 214, "029", URGN_TERRITORY, "019" }, { "DZ" , 12, "015", URGN_TERRITORY, "002" }, - { "EA" , -1, "015", URGN_TERRITORY, "002" }, { "EC" , 218, "005", URGN_TERRITORY, "019" }, { "EE" , 233, "154", URGN_TERRITORY, "150" }, { "EG" , 818, "015", URGN_TERRITORY, "002" }, - { "EH" , 732, "015", URGN_TERRITORY, "002" }, { "ER" , 232, "014", URGN_TERRITORY, "002" }, { "ES" , 724, "039", URGN_TERRITORY, "150" }, { "ET" , 231, "014", URGN_TERRITORY, "002" }, @@ -199,7 +196,6 @@ static KnownRegion knownRegions[] = { { "HR" , 191, "039", URGN_TERRITORY, "150" }, { "HT" , 332, "029", URGN_TERRITORY, "019" }, { "HU" , 348, "151", URGN_TERRITORY, "150" }, - { "IC" , -1, "015", URGN_TERRITORY, "002" }, { "ID" , 360, "035", URGN_TERRITORY, "142" }, { "IE" , 372, "154", URGN_TERRITORY, "150" }, { "IL" , 376, "145", URGN_TERRITORY, "142" }, diff --git a/icu/icu4c/source/test/intltest/regiontst.cpp b/icu/icu4c/source/test/intltest/regiontst.cpp index cdbeac793a7..f2b06fd3bb8 100644 --- a/icu/icu4c/source/test/intltest/regiontst.cpp +++ b/icu/icu4c/source/test/intltest/regiontst.cpp @@ -124,17 +124,14 @@ static KnownRegion knownRegions[] = { { "CZ" , 203, "151", URGN_TERRITORY, "150" }, { "DD" , 276, "155", URGN_TERRITORY, "150" }, { "DE" , 276, "155", URGN_TERRITORY, "150" }, - { "DG" , -1 , "QO" , URGN_TERRITORY, "009" }, { "DJ" , 262, "014", URGN_TERRITORY, "002" }, { "DK" , 208, "154", URGN_TERRITORY, "150" }, { "DM" , 212, "029", URGN_TERRITORY, "019" }, { "DO" , 214, "029", URGN_TERRITORY, "019" }, { "DZ" , 12, "015", URGN_TERRITORY, "002" }, - { "EA" , -1, "015", URGN_TERRITORY, "002" }, { "EC" , 218, "005", URGN_TERRITORY, "019" }, { "EE" , 233, "154", URGN_TERRITORY, "150" }, { "EG" , 818, "015", URGN_TERRITORY, "002" }, - { "EH" , 732, "015", URGN_TERRITORY, "002" }, { "ER" , 232, "014", URGN_TERRITORY, "002" }, { "ES" , 724, "039", URGN_TERRITORY, "150" }, { "ET" , 231, "014", URGN_TERRITORY, "002" }, @@ -171,7 +168,6 @@ static KnownRegion knownRegions[] = { { "HR" , 191, "039", URGN_TERRITORY, "150" }, { "HT" , 332, "029", URGN_TERRITORY, "019" }, { "HU" , 348, "151", URGN_TERRITORY, "150" }, - { "IC" , -1, "015", URGN_TERRITORY, "002" }, { "ID" , 360, "035", URGN_TERRITORY, "142" }, { "IE" , 372, "154", URGN_TERRITORY, "150" }, { "IL" , 376, "145", URGN_TERRITORY, "142" }, From 3e56c84d4278ad79e5b70b3fc8d6d4a425469a19 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 11:06:37 +0530 Subject: [PATCH 09/15] patch: apply 012-MSFT-Patch-StaticLink_VCRuntime_VCStartup_STL_but_DynamicLink_UCRT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Statically link VCRuntime + VCStartup + STL into icuuc.dll / icuin.dll, but keep UCRT (ucrtbase.dll) dynamic. This eliminates the VC Redist dependency for consumers — Windows 10+ ships UCRT, but VCRuntime and STL would otherwise require manual VC Redist install. Mechanism (applied to both common.vcxproj and i18n.vcxproj, Debug + Release blocks): - RuntimeLibrary: MultiThreadedDebugDLL/MultiThreadedDLL -> MultiThreaded Debug/MultiThreaded (compiler switches to static C++ runtime). - IgnoreSpecificDefaultLibraries=libucrtd.lib;libucrt.lib (linker drops the static UCRT pulled in by /MT[d]). - /DEFAULTLIB:ucrt[d].lib via AdditionalOptions (force the dynamic UCRT). Reworked: ICU 78 already uses $(IcuMajorVersion) macro for DLL names (unchanged by this patch). Verified no arch-specific overrides exist — only generic Debug/Release ItemDefinitionGroups — so the fix applies uniformly to x86, x64, and ARM64. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/common.vcxproj | 12 ++++++++++-- icu/icu4c/source/i18n/i18n.vcxproj | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/icu/icu4c/source/common/common.vcxproj b/icu/icu4c/source/common/common.vcxproj index 81f7d0c20f9..b0cb5c17824 100644 --- a/icu/icu4c/source/common/common.vcxproj +++ b/icu/icu4c/source/common/common.vcxproj @@ -53,24 +53,32 @@ RBBI_DEBUG;%(PreprocessorDefinitions) - MultiThreadedDebugDLL + + MultiThreadedDebug ..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll .\..\..\$(IcuLibOutputDir)\icuucd.pdb ..\..\$(IcuLibOutputDir)\icuucd.lib + + libucrtd.lib;libucrt.lib + /DEFAULTLIB:ucrtd.lib %(AdditionalOptions) - MultiThreadedDLL + + MultiThreaded true ..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll .\..\..\$(IcuLibOutputDir)\icuuc.pdb ..\..\$(IcuLibOutputDir)\icuuc.lib + + libucrtd.lib;libucrt.lib + /DEFAULTLIB:ucrt.lib %(AdditionalOptions) diff --git a/icu/icu4c/source/i18n/i18n.vcxproj b/icu/icu4c/source/i18n/i18n.vcxproj index 9803e1d1d5b..b7c1cf4fa3e 100644 --- a/icu/icu4c/source/i18n/i18n.vcxproj +++ b/icu/icu4c/source/i18n/i18n.vcxproj @@ -54,19 +54,24 @@ - MultiThreadedDebugDLL + + MultiThreadedDebug icuucd.lib;%(AdditionalDependencies) ..\..\$(IcuBinOutputDir)\icuin$(IcuMajorVersion)d.dll .\..\..\$(IcuLibOutputDir)\icuind.pdb ..\..\$(IcuLibOutputDir)\icuind.lib + + libucrtd.lib;libucrt.lib + /DEFAULTLIB:ucrtd.lib %(AdditionalOptions) - MultiThreadedDLL + + MultiThreaded true @@ -74,6 +79,9 @@ ..\..\$(IcuBinOutputDir)\icuin$(IcuMajorVersion).dll .\..\..\$(IcuLibOutputDir)\icuin.pdb ..\..\$(IcuLibOutputDir)\icuin.lib + + libucrtd.lib;libucrt.lib + /DEFAULTLIB:ucrt.lib %(AdditionalOptions) From 01f38e6a52d0330dfcd587034ba162322ddfb014 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 11:09:41 +0530 Subject: [PATCH 10/15] patch: apply 013-MSFT-Patch-ICU_Add_version_number_to_PDB_names Add ICU major version number to PDB (debug symbol) filenames so they match the DLL filenames. Prevents PDB filename collision when two ICU versions are deployed side-by-side and lets debuggers correctly correlate symbols across versions. Files updated: common.vcxproj, i18n.vcxproj, stubdata.vcxproj. PDBs become icuuc78.pdb / icuuc78d.pdb, icuin78.pdb / icuin78d.pdb, icudt78.pdb (matching the existing icuuc78.dll / icuuc78d.dll / icuin78.dll / icuin78d.dll / icudt78.dll naming). Reworked: Used $(IcuMajorVersion) MSBuild macro rather than hardcoding 78. This is the same pattern ICU 78's tags already use in these vcxproj files, so future ICU upgrades won't need to touch these strings. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/common.vcxproj | 9 ++++++--- icu/icu4c/source/i18n/i18n.vcxproj | 9 ++++++--- icu/icu4c/source/stubdata/stubdata.vcxproj | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/icu/icu4c/source/common/common.vcxproj b/icu/icu4c/source/common/common.vcxproj index b0cb5c17824..f0f13aa1e2e 100644 --- a/icu/icu4c/source/common/common.vcxproj +++ b/icu/icu4c/source/common/common.vcxproj @@ -41,7 +41,8 @@ $(OutDir)/icuuc.pch $(OutDir)/ $(OutDir)/ - $(OutDir)/icuuc.pdb + + $(OutDir)/icuuc$(IcuMajorVersion).pdb @@ -58,7 +59,8 @@ ..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll - .\..\..\$(IcuLibOutputDir)\icuucd.pdb + + .\..\..\$(IcuLibOutputDir)\icuuc$(IcuMajorVersion)d.pdb ..\..\$(IcuLibOutputDir)\icuucd.lib libucrtd.lib;libucrt.lib @@ -74,7 +76,8 @@ ..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll - .\..\..\$(IcuLibOutputDir)\icuuc.pdb + + .\..\..\$(IcuLibOutputDir)\icuuc$(IcuMajorVersion).pdb ..\..\$(IcuLibOutputDir)\icuuc.lib libucrtd.lib;libucrt.lib diff --git a/icu/icu4c/source/i18n/i18n.vcxproj b/icu/icu4c/source/i18n/i18n.vcxproj index b7c1cf4fa3e..2a674773021 100644 --- a/icu/icu4c/source/i18n/i18n.vcxproj +++ b/icu/icu4c/source/i18n/i18n.vcxproj @@ -42,7 +42,8 @@ $(OutDir)/icuin.pch $(OutDir)/ $(OutDir)/ - $(OutDir)/icuin.pdb + + $(OutDir)/icuin$(IcuMajorVersion).pdb ../common;%(AdditionalIncludeDirectories) @@ -60,7 +61,8 @@ icuucd.lib;%(AdditionalDependencies) ..\..\$(IcuBinOutputDir)\icuin$(IcuMajorVersion)d.dll - .\..\..\$(IcuLibOutputDir)\icuind.pdb + + .\..\..\$(IcuLibOutputDir)\icuin$(IcuMajorVersion)d.pdb ..\..\$(IcuLibOutputDir)\icuind.lib libucrtd.lib;libucrt.lib @@ -77,7 +79,8 @@ icuuc.lib;%(AdditionalDependencies) ..\..\$(IcuBinOutputDir)\icuin$(IcuMajorVersion).dll - .\..\..\$(IcuLibOutputDir)\icuin.pdb + + .\..\..\$(IcuLibOutputDir)\icuin$(IcuMajorVersion).pdb ..\..\$(IcuLibOutputDir)\icuin.lib libucrtd.lib;libucrt.lib diff --git a/icu/icu4c/source/stubdata/stubdata.vcxproj b/icu/icu4c/source/stubdata/stubdata.vcxproj index 0b2c61cb06b..9c147a57a14 100644 --- a/icu/icu4c/source/stubdata/stubdata.vcxproj +++ b/icu/icu4c/source/stubdata/stubdata.vcxproj @@ -42,7 +42,8 @@ $(OutDir)/icudt.pch $(OutDir)/ $(OutDir)/ - $(OutDir)/icudt.pdb + + $(OutDir)/icudt$(IcuMajorVersion).pdb STUBDATA_BUILD;%(PreprocessorDefinitions) @@ -56,7 +57,8 @@ true ..\..\$(IcuBinOutputDir)\icudt$(IcuMajorVersion).dll - .\..\..\$(IcuLibOutputDir)\icudt.pdb + + .\..\..\$(IcuLibOutputDir)\icudt$(IcuMajorVersion).pdb ..\..\$(IcuLibOutputDir)\icudt.lib From 76b930c2c7a9aadbd28308ca0bfffc7da75e6710 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 11:56:45 +0530 Subject: [PATCH 11/15] patch: apply 018-MSFT-Patch_ICU_toolutil_increase_string_store_for_extra_locales Bump STRING_STORE_SIZE from 100000 to 120000 in package.h. The package tool uses this as a static buffer for item names when building the .dat data file; CLDR-MS adds extra locales that overflow the vanilla 100K buffer. Applied verbatim from the patch (120000). If CLDR 48 + MS-CLDR overlay overflows this in the Step 6 data build, bump further (see followup todo). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/tools/toolutil/package.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu/icu4c/source/tools/toolutil/package.h b/icu/icu4c/source/tools/toolutil/package.h index ea60c13a74a..6c411ae5fd0 100644 --- a/icu/icu4c/source/tools/toolutil/package.h +++ b/icu/icu4c/source/tools/toolutil/package.h @@ -27,7 +27,7 @@ // .dat package file representation ---------------------------------------- *** -#define STRING_STORE_SIZE 100000 +#define STRING_STORE_SIZE 120000 #define MAX_PKG_NAME_LENGTH 64 typedef void CheckDependency(void *context, const char *itemName, const char *targetName); From 6d82403cf1bb5c0a2057cbbd96068385da38a7f0 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 12:40:16 +0530 Subject: [PATCH 12/15] patch: apply 020-MSFT-Patch_ICU_Add_uprefs_library_to_obtain_default_locale_as_full_BCP47_tag Add MS-only uprefs library so uprv_getDefaultLocaleID() on Windows returns a full BCP47 tag (e.g. en-US-u-ca-gregory-hc-h12-fw-mon-ms-metric) that encodes the user's calendar, currency, hour cycle, first day of week, sort method, and measurement system from Windows Globalization APIs. Vanilla ICU only returns language+region. Files: - new uprefs.cpp/h (common library, gated on UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY and U_PLATFORM_USES_ONLY_WIN32_API) - new uprefstest.cpp/h - putil.cpp wires uprefs_getBCP47Tag() into uprv_getDefaultLocaleID(); unifies buffer sizing (POSIX_LOCALE_CAPACITY -> length * 2) - uconfig.h defines UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY = 1 - sources.txt lists uprefs.cpp - common.vcxproj - common_uwp.vcxproj reference uprefs.cpp/h - test/intltest/ Makefile.in + intltest.vcxproj wire uprefstest.{cpp,h}; itutil.cpp registers UPrefsTest class Reworked from ICU 72 form: - 8 hunks applied cleanly via git apply (with offsets only) - 5 build-system list-insertion hunks reworked manually due to context drift (ICU 78 added fixedstring.cpp, new test files between the patch's anchor lines) - New file uprefstest.cpp uses backup version from ICU 72.1.0.4 (commit 860c2ea8151 by Rahul Pandey, "Add missing parameters to MockGetLocaleInfoEx" Nov 2022) which contains style/whitespace cleanup not present in the original 2021 patch file. Patch file in icu-patches/patches/ remains stale and will be regenerated at end of upgrade. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/common/common.vcxproj | 2 + icu/icu4c/source/common/common_uwp.vcxproj | 2 + icu/icu4c/source/common/putil.cpp | 35 +- icu/icu4c/source/common/sources.txt | 1 + icu/icu4c/source/common/unicode/uconfig.h | 16 + icu/icu4c/source/common/uprefs.cpp | 553 ++++++++++++++++++ icu/icu4c/source/common/uprefs.h | 29 + icu/icu4c/source/test/intltest/Makefile.in | 2 +- .../source/test/intltest/intltest.vcxproj | 2 + icu/icu4c/source/test/intltest/itutil.cpp | 6 + icu/icu4c/source/test/intltest/uprefstest.cpp | 438 ++++++++++++++ icu/icu4c/source/test/intltest/uprefstest.h | 50 ++ 12 files changed, 1132 insertions(+), 4 deletions(-) create mode 100644 icu/icu4c/source/common/uprefs.cpp create mode 100644 icu/icu4c/source/common/uprefs.h create mode 100644 icu/icu4c/source/test/intltest/uprefstest.cpp create mode 100644 icu/icu4c/source/test/intltest/uprefstest.h diff --git a/icu/icu4c/source/common/common.vcxproj b/icu/icu4c/source/common/common.vcxproj index f0f13aa1e2e..aca55a102f9 100644 --- a/icu/icu4c/source/common/common.vcxproj +++ b/icu/icu4c/source/common/common.vcxproj @@ -286,6 +286,7 @@ + @@ -405,6 +406,7 @@ + diff --git a/icu/icu4c/source/common/common_uwp.vcxproj b/icu/icu4c/source/common/common_uwp.vcxproj index 01906f3caf9..113c2418902 100644 --- a/icu/icu4c/source/common/common_uwp.vcxproj +++ b/icu/icu4c/source/common/common_uwp.vcxproj @@ -409,6 +409,7 @@ + @@ -529,6 +530,7 @@ + diff --git a/icu/icu4c/source/common/putil.cpp b/icu/icu4c/source/common/putil.cpp index ea15fdff0b0..307b07d087d 100644 --- a/icu/icu4c/source/common/putil.cpp +++ b/icu/icu4c/source/common/putil.cpp @@ -66,6 +66,7 @@ #include "locmap.h" #include "ucln_cmn.h" #include "charstr.h" +#include "uprefs.h" /* Include standard headers. */ #include @@ -1794,10 +1795,37 @@ The leftmost codepage (.xxx) wins. return posixID; #elif U_PLATFORM_USES_ONLY_WIN32_API -#define POSIX_LOCALE_CAPACITY 64 UErrorCode status = U_ZERO_ERROR; char *correctedPOSIXLocale = nullptr; +#if UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + + int32_t neededBufferSize = uprefs_getBCP47Tag(nullptr, 0, &status); + MaybeStackArray windowsLocale(neededBufferSize, status); + int32_t length = uprefs_getBCP47Tag(windowsLocale.getAlias(), neededBufferSize, &status); + + if (length > 0) // If length is 0, then the call to uprefs_getBCP47Tag failed. + { + // Now normalize the resulting name + correctedPOSIXLocale = static_cast(uprv_malloc(length * 2)); + /* TODO: Should we just exit on memory allocation failure? */ + if (correctedPOSIXLocale) + { + int32_t posixLen = uloc_canonicalize(windowsLocale.getAlias(), correctedPOSIXLocale, length * 2, &status); + if (U_SUCCESS(status)) + { + *(correctedPOSIXLocale + posixLen) = 0; + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + else + { + uprv_free(correctedPOSIXLocale); + } + } + } +#else // If we have already figured this out just use the cached value if (gCorrectedPOSIXLocale != nullptr) { return gCorrectedPOSIXLocale; @@ -1839,11 +1867,11 @@ The leftmost codepage (.xxx) wins. } // Now normalize the resulting name - correctedPOSIXLocale = static_cast(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); + correctedPOSIXLocale = static_cast(uprv_malloc(length * 2)); /* TODO: Should we just exit on memory allocation failure? */ if (correctedPOSIXLocale) { - int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); + int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, length * 2, &status); if (U_SUCCESS(status)) { *(correctedPOSIXLocale + posixLen) = 0; @@ -1857,6 +1885,7 @@ The leftmost codepage (.xxx) wins. } } } +#endif // If unable to find a locale we can agree upon, use en-US by default if (gCorrectedPOSIXLocale == nullptr) { diff --git a/icu/icu4c/source/common/sources.txt b/icu/icu4c/source/common/sources.txt index 5b1c5e262ea..cfcfd95df56 100644 --- a/icu/icu4c/source/common/sources.txt +++ b/icu/icu4c/source/common/sources.txt @@ -163,6 +163,7 @@ unistr_titlecase_brkiter.cpp unorm.cpp unormcmp.cpp uobject.cpp +uprefs.cpp uprops.cpp ures_cnv.cpp uresbund.cpp diff --git a/icu/icu4c/source/common/unicode/uconfig.h b/icu/icu4c/source/common/unicode/uconfig.h index 3c118288665..f339dec982f 100644 --- a/icu/icu4c/source/common/unicode/uconfig.h +++ b/icu/icu4c/source/common/unicode/uconfig.h @@ -400,6 +400,22 @@ # define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1 #endif +/** + * \def UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + * On Windows platforms (ie: U_PLATFORM_HAS_WIN32_API is true), this switch enables ICU to + * detect additional user preferences by setting BCP47 Unicode extension within the default locale. + * This includes information such as calendar, currency, hour cycle, among others. + * + * If this switch is off (or set to 0) then the default behavior of only detecting the language + * and country/region occurs. + * + * For example, the default locale may be detected as "es-MX-u-hc-h24", instead of "es-MX", + * if the user has selected a 24 hour clock option. +*/ +#ifndef UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY +# define UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY 1 +#endif + //IGNORE_WINDOWS_HEADERS_END /* i18n library switches ---------------------------------------------------- */ diff --git a/icu/icu4c/source/common/uprefs.cpp b/icu/icu4c/source/common/uprefs.cpp new file mode 100644 index 00000000000..d9304a11de0 --- /dev/null +++ b/icu/icu4c/source/common/uprefs.cpp @@ -0,0 +1,553 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "uprefs.h" +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY +#include "unicode/ustring.h" +#include "cmemory.h" +#include "charstr.h" +#include "cstring.h" +#include "cwchar.h" +#include + +U_NAMESPACE_USE + +// Older versions of the Windows SDK don’t have definitions for calendar types that were added later on. +// (For example, the Windows 7 SDK doesn’t have CAL_PERSIAN). +// So we’ll need to provide our own definitions for some of them. +// Note that on older versions of the OS these values won't ever be returned by the platform APIs, so providing our own definitions is fine. +#ifndef CAL_PERSIAN +#define CAL_PERSIAN 22 // Persian (Solar Hijri) calendar +#endif + +#define RETURN_FAILURE_STRING_WITH_STATUS_IF(condition, error, status) \ + if (condition) \ + { \ + *status = error; \ + return CharString(); \ + } + +#define RETURN_FAILURE_WITH_STATUS_IF(condition, error, status) \ + if (condition) \ + { \ + *status = error; \ + return 0; \ + } + +#define RETURN_VALUE_IF(condition, value) \ + if (condition) \ + { \ + return value; \ + } + +#define RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status) \ + if (U_FAILURE(*status)) \ + { \ + *status = U_MEMORY_ALLOCATION_ERROR; \ + return CharString(); \ + } \ +// ------------------------------------------------------- +// ----------------- MAPPING FUNCTIONS-------------------- +// ------------------------------------------------------- + +// Maps from a NLS Calendar ID (CALID) to a BCP47 Unicode Extension calendar identifier. +// +// We map the NLS CALID from GetLocaleInfoEx to the calendar identifier +// used in BCP47 tag with Unicode Extensions. +// +// This does not return a full nor valid BCP47Tag, it only returns the option that the BCP47 tag +// would return after the "ca-" part +// +// For example: +// CAL_GREGORIAN would return "gregory". +// CAL_HIJRI would return "islamic". +// +// These could be used in a BCP47 tag like this: "en-US-u-ca-gregory". +// Note that there are some NLS calendars that are not supported with the BCP47 U extensions, +// and vice-versa. +// +// NLS CALID reference:https://docs.microsoft.com/en-us/windows/win32/intl/calendar-identifiers +CharString getCalendarBCP47FromNLSType(int32_t calendar, UErrorCode* status) +{ + switch(calendar) + { + case CAL_GREGORIAN: + case CAL_GREGORIAN_US: + case CAL_GREGORIAN_ME_FRENCH: + case CAL_GREGORIAN_ARABIC: + case CAL_GREGORIAN_XLIT_ENGLISH: + case CAL_GREGORIAN_XLIT_FRENCH: + return CharString("gregory", *status); + + case CAL_JAPAN: + return CharString("japanese", *status); + + case CAL_TAIWAN: + return CharString("roc", *status); + + case CAL_KOREA: + return CharString("dangi", *status); + + case CAL_HIJRI: + return CharString("islamic", *status); + + case CAL_THAI: + return CharString("buddhist", *status); + + case CAL_HEBREW: + return CharString("hebrew", *status); + + case CAL_PERSIAN: + return CharString("persian", *status); + + case CAL_UMALQURA: + return CharString("islamic-umalqura", *status); + + default: + return CharString(); + } +} + +// Maps from a NLS Alternate sorting system to a BCP47 U extension sorting system. +// +// We map the alternate sorting method from GetLocaleInfoEx to the sorting method +// used in BCP47 tag with Unicode Extensions. +// +// This does not return a full nor valid BCP47Tag, it only returns the option that the BCP47 tag +// would return after the "co-" part +// +// For example: +// "phoneb" (parsed from "de-DE_phoneb") would return "phonebk". +// "radstr" (parsed from "ja-JP_radstr") would return "unihan". +// +// These could be used in a BCP47 tag like this: "de-DE-u-co-phonebk". +// Note that there are some NLS Alternate sort methods that are not supported with the BCP47 U extensions, +// and vice-versa. +CharString getSortingSystemBCP47FromNLSType(const wchar_t* sortingSystem, UErrorCode* status) +{ + if (wcscmp(sortingSystem, L"phoneb") == 0) // Phonebook style ordering (such as in German) + { + return CharString("phonebk", *status); + } + else if (wcscmp(sortingSystem, L"tradnl") == 0) // Traditional style ordering (such as in Spanish) + { + return CharString("trad", *status); + } + else if (wcscmp(sortingSystem, L"stroke") == 0) // Pinyin ordering for Latin, stroke order for CJK characters (used in Chinese) + { + return CharString("stroke", *status); + } + else if (wcscmp(sortingSystem, L"radstr") == 0) // Pinyin ordering for Latin, Unihan radical-stroke ordering for CJK characters (used in Chinese) + { + return CharString("unihan", *status); + } + else if (wcscmp(sortingSystem, L"pronun") == 0) // Phonetic ordering (sorting based on pronunciation) + { + return CharString("phonetic", *status); + } + else + { + return CharString(); + } +} + +// Maps from a NLS first day of week value to a BCP47 U extension first day of week. +// +// NLS defines: +// 0 -> Monday, 1 -> Tuesday, ... 5 -> Saturday, 6 -> Sunday +// +// We map the first day of week from GetLocaleInfoEx to the first day of week +// used in BCP47 tag with Unicode Extensions. +// +// This does not return a full nor valid BCP47Tag, it only returns the option that the BCP47 tag +// would return after the "fw-" part +// +// For example: +// 1 (Tuesday) would return "tue". +// 6 (Sunday) would return "sun". +// +// These could be used in a BCP47 tag like this: "en-US-u-fw-sun". +CharString getFirstDayBCP47FromNLSType(int32_t firstday, UErrorCode* status) +{ + switch(firstday) + { + case 0: + return CharString("mon", *status); + + case 1: + return CharString("tue", *status); + + case 2: + return CharString("wed", *status); + + case 3: + return CharString("thu", *status); + + case 4: + return CharString("fri", *status); + + case 5: + return CharString("sat", *status); + + case 6: + return CharString("sun", *status); + + default: + return CharString(); + } +} + +// Maps from a NLS Measurement system to a BCP47 U extension measurement system. +// +// NLS defines: +// 0 -> Metric system, 1 -> U.S. System +// +// This does not return a full nor valid BCP47Tag, it only returns the option that the BCP47 tag +// would return after the "ms-" part +// +// For example: +// 0 (Metric) would return "metric". +// 6 (U.S. System) would return "ussystem". +// +// These could be used in a BCP47 tag like this: "en-US-u-ms-metric". +CharString getMeasureSystemBCP47FromNLSType(int32_t measureSystem, UErrorCode *status) +{ + switch(measureSystem) + { + case 0: + return CharString("metric", *status); + case 1: + return CharString("ussystem", *status); + default: + return CharString(); + } +} + +// ------------------------------------------------------- +// --------------- END OF MAPPING FUNCTIONS -------------- +// ------------------------------------------------------- + +// ------------------------------------------------------- +// ------------------ HELPER FUCTIONS ------------------- +// ------------------------------------------------------- + +// Return the CLDR "h12" or "h23" format for the 12 or 24 hour clock. +// NLS only gives us a "time format" of a form similar to "h:mm:ss tt" +// The NLS "h" is 12 hour, and "H" is 24 hour, so we'll scan for the +// first h or H. +// Note that the NLS string could have sections escaped with single +// quotes, so be sure to skip those parts. Eg: "'Hours:' h:mm:ss" +// would skip the "H" in 'Hours' and use the h in the actual pattern. +CharString get12_or_24hourFormat(wchar_t* hourFormat, UErrorCode* status) +{ + bool isInEscapedString = false; + const int32_t hourLength = static_cast(uprv_wcslen(hourFormat)); + for (int32_t i = 0; i < hourLength; i++) + { + // Toggle escaped flag if in ' quoted portion + if (hourFormat[i] == L'\'') + { + isInEscapedString = !isInEscapedString; + } + + if (!isInEscapedString) + { + // Check for both so we can escape early + if (hourFormat[i] == L'H') + { + return CharString("h23", *status); + } + + if (hourFormat[i] == L'h') + { + return CharString("h12", *status); + } + } + } + // default to a 24 hour clock as that's more common worldwide + return CharString("h23", *status); +} + +UErrorCode getUErrorCodeFromLastError() +{ + DWORD error = GetLastError(); + switch(error) + { + case ERROR_INSUFFICIENT_BUFFER: + return U_BUFFER_OVERFLOW_ERROR; + + case ERROR_INVALID_FLAGS: + case ERROR_INVALID_PARAMETER: + return U_ILLEGAL_ARGUMENT_ERROR; + + case ERROR_OUTOFMEMORY: + return U_MEMORY_ALLOCATION_ERROR; + + default: + return U_INTERNAL_PROGRAM_ERROR; + } +} + +int32_t GetLocaleInfoExWrapper(LPCWSTR lpLocaleName, LCTYPE LCType, LPWSTR lpLCData, int cchData, UErrorCode* status) +{ + RETURN_VALUE_IF(U_FAILURE(*status), 0); + +#ifndef UPREFS_TEST + *status = U_ZERO_ERROR; + int32_t result = GetLocaleInfoEx(lpLocaleName, LCType, lpLCData, cchData); + + if (result == 0) + { + *status = getUErrorCodeFromLastError(); + } + return result; +#else + #include "uprefstest.h" + UPrefsTest prefTests; + return prefTests.MockGetLocaleInfoEx(lpLocaleName, LCType, lpLCData, cchData, status); +#endif +} + +// Copies a string to a buffer if its size allows it and returns the size. +// The returned needed buffer size includes the terminating \0 null character. +// If the buffer's size is set to 0, the needed buffer size is returned before copying the string. +int32_t checkBufferCapacityAndCopy(const char* uprefsString, char* uprefsBuffer, int32_t bufferSize, UErrorCode* status) +{ + int32_t neededBufferSize = static_cast(uprv_strlen(uprefsString) + 1); + + RETURN_VALUE_IF(bufferSize == 0, neededBufferSize); + RETURN_FAILURE_WITH_STATUS_IF(neededBufferSize > bufferSize, U_BUFFER_OVERFLOW_ERROR, status); + + uprv_strcpy(uprefsBuffer, uprefsString); + + return neededBufferSize; +} + +CharString getLocaleBCP47Tag_impl(UErrorCode* status, bool getSorting) +{ + // First part of a bcp47 tag looks like an NLS user locale, so we get the NLS user locale. + int32_t neededBufferSize = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, nullptr, 0, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + MaybeStackArray NLSLocale(neededBufferSize, *status); + RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status); + + int32_t result = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, NLSLocale.getAlias(), neededBufferSize, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + if (getSorting) //We determine if we want the locale (for example, en-US) or the sorting method (for example, phonebk) + { + // We use LOCALE_SNAME to get the sorting method (if any). So we need to keep + // only the sorting bit after the _, removing the locale name. + // Example: from "de-DE_phoneb" we only want "phoneb" + const wchar_t * startPosition = wcschr(NLSLocale.getAlias(), L'_'); + + // Note: not finding a "_" is not an error, it means the user has not selected an alternate sorting method, which is fine. + if (startPosition != nullptr) + { + CharString sortingSystem = getSortingSystemBCP47FromNLSType(startPosition + 1, status); + + if (sortingSystem.length() == 0) + { + *status = U_UNSUPPORTED_ERROR; + return CharString(); + } + return sortingSystem; + } + } + else + { + // The NLS locale may include a non-default sort, such as de-DE_phoneb. We only want the locale name before the _. + wchar_t * position = wcschr(NLSLocale.getAlias(), L'_'); + if (position != nullptr) + { + *position = L'\0'; + } + + CharString languageTag; + int32_t resultCapacity = 0; + languageTag.getAppendBuffer(neededBufferSize, neededBufferSize, resultCapacity, *status); + RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status); + + int32_t unitsWritten = 0; + u_strToUTF8(languageTag.data(), neededBufferSize, &unitsWritten, reinterpret_cast(NLSLocale.getAlias()), neededBufferSize, status); + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + return languageTag; + } + + return CharString(); +} + +CharString getCalendarSystem_impl(UErrorCode* status) +{ + int32_t NLSCalendar = 0; + + GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_ICALENDARTYPE | LOCALE_RETURN_NUMBER, reinterpret_cast(&NLSCalendar), sizeof(NLSCalendar) / sizeof(wchar_t), status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + CharString calendar(getCalendarBCP47FromNLSType(NLSCalendar, status), *status); + RETURN_FAILURE_STRING_WITH_STATUS_IF(calendar.length() == 0, U_UNSUPPORTED_ERROR, status); + + return calendar; +} + +CharString getCurrencyCode_impl(UErrorCode* status) +{ + int32_t neededBufferSize = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_SINTLSYMBOL, nullptr, 0, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + MaybeStackArray NLScurrencyData(neededBufferSize, *status); + RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status); + + int32_t result = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_SINTLSYMBOL, NLScurrencyData.getAlias(), neededBufferSize, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + MaybeStackArray currency(neededBufferSize, *status); + RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status); + + int32_t unitsWritten = 0; + u_strToUTF8(currency.getAlias(), neededBufferSize, &unitsWritten, reinterpret_cast(NLScurrencyData.getAlias()), neededBufferSize, status); + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + if (unitsWritten == 0) + { + *status = U_INTERNAL_PROGRAM_ERROR; + return CharString(); + } + + // Since we retreived the currency code in caps, we need to make it lowercase for it to be in CLDR BCP47 U extensions format. + T_CString_toLowerCase(currency.getAlias()); + + return CharString(currency.getAlias(), neededBufferSize, *status); +} + +CharString getFirstDayOfWeek_impl(UErrorCode* status) +{ + int32_t NLSfirstDay = 0; + GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_IFIRSTDAYOFWEEK | LOCALE_RETURN_NUMBER, reinterpret_cast(&NLSfirstDay), sizeof(NLSfirstDay) / sizeof(wchar_t), status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + CharString firstDay = getFirstDayBCP47FromNLSType(NLSfirstDay, status); + RETURN_FAILURE_STRING_WITH_STATUS_IF(firstDay.length() == 0, U_UNSUPPORTED_ERROR, status); + + return firstDay; +} + +CharString getHourCycle_impl(UErrorCode* status) +{ + int32_t neededBufferSize = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_STIMEFORMAT, nullptr, 0, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + MaybeStackArray NLShourCycle(neededBufferSize, *status); + RETURN_WITH_ALLOCATION_ERROR_IF_FAILED(status); + + int32_t result = GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_STIMEFORMAT, NLShourCycle.getAlias(), neededBufferSize, status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + CharString hourCycle = get12_or_24hourFormat(NLShourCycle.getAlias(), status); + if (hourCycle.length() == 0) + { + *status = U_INTERNAL_PROGRAM_ERROR; + return CharString(); + } + return hourCycle; +} + +CharString getMeasureSystem_impl(UErrorCode* status) +{ + int32_t NLSmeasureSystem = 0; + GetLocaleInfoExWrapper(LOCALE_NAME_USER_DEFAULT, LOCALE_IMEASURE | LOCALE_RETURN_NUMBER, reinterpret_cast(&NLSmeasureSystem), sizeof(NLSmeasureSystem) / sizeof(wchar_t), status); + + RETURN_VALUE_IF(U_FAILURE(*status), CharString()); + + CharString measureSystem = getMeasureSystemBCP47FromNLSType(NLSmeasureSystem, status); + RETURN_FAILURE_STRING_WITH_STATUS_IF(measureSystem.length() == 0, U_UNSUPPORTED_ERROR, status); + + return measureSystem; +} + +void appendIfDataNotEmpty(CharString& dest, const char* firstData, const char* secondData, bool& warningGenerated, UErrorCode* status) +{ + if (*status == U_UNSUPPORTED_ERROR) + { + warningGenerated = true; + *status = U_ZERO_ERROR; + } + + if (uprv_strlen(secondData) != 0) + { + dest.append(firstData, *status); + dest.append(secondData, *status); + } +} +// ------------------------------------------------------- +// --------------- END OF HELPER FUNCTIONS --------------- +// ------------------------------------------------------- + + +// ------------------------------------------------------- +// ---------------------- APIs --------------------------- +// ------------------------------------------------------- + +// Gets the valid and canonical BCP47 tag with the user settings for Language, Calendar, Sorting, Currency, +// First day of week, Hour cycle, and Measurement system. +// Calls all of the other APIs +// Returns the needed buffer size for the BCP47 Tag. +int32_t uprefs_getBCP47Tag(char* uprefsBuffer, int32_t bufferSize, UErrorCode* status) +{ + RETURN_FAILURE_WITH_STATUS_IF(uprefsBuffer == nullptr && bufferSize != 0, U_ILLEGAL_ARGUMENT_ERROR, status); + + *status = U_ZERO_ERROR; + CharString BCP47Tag; + bool warningGenerated = false; + + CharString languageTag = getLocaleBCP47Tag_impl(status, false); + RETURN_VALUE_IF(U_FAILURE(*status), 0); + BCP47Tag.append(languageTag.data(), *status); + BCP47Tag.append("-u", *status); + + CharString calendar = getCalendarSystem_impl(status); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-ca-", calendar.data(), warningGenerated, status); + + CharString sortingSystem = getLocaleBCP47Tag_impl(status, true); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-co-", sortingSystem.data(), warningGenerated, status); + + CharString currency = getCurrencyCode_impl(status); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-cu-", currency.data(), warningGenerated, status); + + CharString firstDay = getFirstDayOfWeek_impl(status); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-fw-", firstDay.data(), warningGenerated, status); + + CharString hourCycle = getHourCycle_impl(status); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-hc-", hourCycle.data(), warningGenerated, status); + + CharString measureSystem = getMeasureSystem_impl(status); + RETURN_VALUE_IF(U_FAILURE(*status) && *status != U_UNSUPPORTED_ERROR, 0); + appendIfDataNotEmpty(BCP47Tag, "-ms-", measureSystem.data(), warningGenerated, status); + + if (warningGenerated) + { + *status = U_USING_FALLBACK_WARNING; + } + + return checkBufferCapacityAndCopy(BCP47Tag.data(), uprefsBuffer, bufferSize, status); +} + +// ------------------------------------------------------- +// ---------------------- END OF APIs -------------------- +// ------------------------------------------------------- + +#endif // U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY \ No newline at end of file diff --git a/icu/icu4c/source/common/uprefs.h b/icu/icu4c/source/common/uprefs.h new file mode 100644 index 00000000000..08ecd86189b --- /dev/null +++ b/icu/icu4c/source/common/uprefs.h @@ -0,0 +1,29 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef UPREFS_H +#define UPREFS_H + +#include "unicode/platform.h" +#include "unicode/utypes.h" +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + +/** +* Gets the valid and canonical BCP47 tag with the user settings for Language, Calendar, Sorting, Currency, +* First day of week, Hour cycle, and Measurement system when available. +* +* @param uprefsBuffer Pointer to a buffer in which this function retrieves the BCP47 tag. +* This pointer is not used if bufferSize is set to 0. +* @param bufferSize Size, in characters, of the data buffer indicated by uprefsBuffer. Alternatively, the application +* can set this parameter to 0. In this case, the function does not use the uprefsBuffer parameter +* and returns the required buffer size, including the terminating null character. +* @param status: Pointer to a UErrorCode. The resulting value will be U_ZERO_ERROR if the call was successful or will +* contain an error or warning code. If the status is U_USING_FALLBACK_WARNING, it means at least one of the + settings was not succesfully mapped between NLS and CLDR, so it will not be shown on the BCP47 tag. +* @return The needed buffer size, including the terminating \0 null character if the call was successful, should be ignored +* if status was not U_ZERO_ERROR. +*/ +int32_t uprefs_getBCP47Tag(char* uprefsBuffer, int32_t bufferSize, UErrorCode* status); + +#endif //U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY +#endif //UPREFS_H \ No newline at end of file diff --git a/icu/icu4c/source/test/intltest/Makefile.in b/icu/icu4c/source/test/intltest/Makefile.in index 2e448319243..eab33ebb0b6 100644 --- a/icu/icu4c/source/test/intltest/Makefile.in +++ b/icu/icu4c/source/test/intltest/Makefile.in @@ -78,7 +78,7 @@ units_data_test.o units_router_test.o units_test.o displayoptions_test.o \ numbertest_simple.o \ cplusplus_header_api_build_test.o uchar_type_build_test.o \ ucolheaderonlytest.o usetheaderonlytest.o utfiteratortest.o utfstringtest.o \ -intltesttest.o +intltesttest.o uprefstest.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu/icu4c/source/test/intltest/intltest.vcxproj b/icu/icu4c/source/test/intltest/intltest.vcxproj index ce69ec0de41..0ee503fcbe4 100644 --- a/icu/icu4c/source/test/intltest/intltest.vcxproj +++ b/icu/icu4c/source/test/intltest/intltest.vcxproj @@ -247,6 +247,7 @@ + @@ -378,6 +379,7 @@ + diff --git a/icu/icu4c/source/test/intltest/itutil.cpp b/icu/icu4c/source/test/intltest/itutil.cpp index 20c16389c0a..768507c632a 100644 --- a/icu/icu4c/source/test/intltest/itutil.cpp +++ b/icu/icu4c/source/test/intltest/itutil.cpp @@ -33,6 +33,9 @@ #include "uvectest.h" #include "aliastst.h" #include "usettest.h" +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + #include "uprefstest.h" +#endif extern IntlTest *createBytesTrieTest(); #if !UCONFIG_NO_COLLATION @@ -76,6 +79,9 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* & TESTCASE_AUTO_CLASS(LocaleAliasTest); TESTCASE_AUTO_CLASS(UnicodeSetTest); TESTCASE_AUTO_CLASS(ErrorCodeTest); +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + TESTCASE_AUTO_CLASS(UPrefsTest); +#endif TESTCASE_AUTO_CREATE_CLASS(LocalPointerTest); TESTCASE_AUTO_CREATE_CLASS(BytesTrieTest); TESTCASE_AUTO_CREATE_CLASS(UCharsTrieTest); diff --git a/icu/icu4c/source/test/intltest/uprefstest.cpp b/icu/icu4c/source/test/intltest/uprefstest.cpp new file mode 100644 index 00000000000..69c5265eed1 --- /dev/null +++ b/icu/icu4c/source/test/intltest/uprefstest.cpp @@ -0,0 +1,438 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +#include "uprefstest.h" +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY + +#define ARRAY_SIZE 512 + +std::wstring UPrefsTest::language = L""; +std::wstring UPrefsTest::currency = L""; +std::wstring UPrefsTest::hourCycle = L""; +int32_t UPrefsTest::firstday = 0; +int32_t UPrefsTest::measureSystem = 0; +CALID UPrefsTest::calendar = 0; + +void UPrefsTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) +{ + if (exec) logln("TestSuite UPrefsTest: "); + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(TestGetDefaultLocaleAsBCP47Tag); + TESTCASE_AUTO(TestBCP47TagWithSorting); + TESTCASE_AUTO(TestBCP47TagChineseSimplified); + TESTCASE_AUTO(TestBCP47TagChineseSortingStroke); + TESTCASE_AUTO(TestBCP47TagJapanCalendar); + TESTCASE_AUTO(TestUseNeededBuffer); + TESTCASE_AUTO(TestGetNeededBuffer); + TESTCASE_AUTO(TestGetUnsupportedSorting); + TESTCASE_AUTO(Get24HourCycleMixed); + TESTCASE_AUTO(Get12HourCycleMixed); + TESTCASE_AUTO(Get12HourCycleMixed2); + TESTCASE_AUTO(Get12HourCycle); + TESTCASE_AUTO(Get12HourCycle2); + TESTCASE_AUTO_END; +} + +int32_t UPrefsTest::MockGetLocaleInfoEx(LPCWSTR lpLocaleName, LCTYPE LCType, LPWSTR lpLCData, + int cchData, UErrorCode *status) +{ + switch (LCType) + { + case LOCALE_SNAME: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return language.length() + 1; + } + + if (language.length() + 1 > cchData) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + wcsncpy(lpLCData, language.c_str(), cchData); + *status = U_ZERO_ERROR; + return language.length(); + + case LOCALE_ICALENDARTYPE | LOCALE_RETURN_NUMBER: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return 2; + } + if (cchData < 2) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + *(reinterpret_cast(lpLCData)) = calendar; + *status = U_ZERO_ERROR; + return 2; + + case LOCALE_SINTLSYMBOL: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return currency.length() + 1; + } + if (currency.length() + 1 > cchData) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + wcsncpy(lpLCData, currency.c_str(), cchData); + *status = U_ZERO_ERROR; + return currency.length(); + + case LOCALE_IFIRSTDAYOFWEEK | LOCALE_RETURN_NUMBER: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return 2; + } + if (cchData < 2) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + + *(reinterpret_cast(lpLCData)) = firstday; + *status = U_ZERO_ERROR; + return 2; + + case LOCALE_STIMEFORMAT: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return hourCycle.length() + 1; + } + + if (hourCycle.length() + 1 > cchData) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + wcsncpy(lpLCData, hourCycle.c_str(), cchData); + *status = U_ZERO_ERROR; + return 0; + + case LOCALE_IMEASURE | LOCALE_RETURN_NUMBER: + if (cchData == 0) + { + *status = U_ZERO_ERROR; + return 2; + } + + if (cchData < 2) + { + *status = U_BUFFER_OVERFLOW_ERROR; + return 0; + } + *(reinterpret_cast(lpLCData)) = measureSystem; + *status = U_ZERO_ERROR; + return 2; + + default: + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } +} + +// The code above is independent of the library itself, but for the code below this point, +// we need to include the library to be able to use the definitions of the API uprefs_getBCP47Tag +#include "uprefs.cpp" + +void UPrefsTest::TestGetDefaultLocaleAsBCP47Tag() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"en-US"; + currency = L"USD"; + hourCycle = L"HH:mm:ss"; + firstday = 0; + measureSystem = 1; + calendar = CAL_GREGORIAN; + UErrorCode status = U_ZERO_ERROR; + const char* expectedValue = "en-US-u-ca-gregory-cu-usd-fw-mon-hc-h23-ms-ussystem"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 52) + { + errln("Expected length to be 52, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestBCP47TagWithSorting() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"de-DE_phoneb"; + currency = L"EUR"; + hourCycle = L"HH:mm:ss"; + firstday = 0; + measureSystem = 1; + calendar = CAL_GREGORIAN; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "de-DE-u-ca-gregory-co-phonebk-cu-eur-fw-mon-hc-h23-ms-ussystem"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 63) + { + errln("Expected length to be 63, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestBCP47TagChineseSimplified() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"zh-Hans-HK"; + currency = L"EUR"; + hourCycle = L"hh:mm:ss"; + firstday = 2; + measureSystem = 1; + calendar = CAL_GREGORIAN; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "zh-Hans-HK-u-ca-gregory-cu-eur-fw-wed-hc-h12-ms-ussystem"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 57) + { + errln("Expected length to be 57, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestBCP47TagChineseSortingStroke() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"zh-SG_stroke"; + currency = L"EUR"; + hourCycle = L"hh:mm:ss"; + firstday = 2; + measureSystem = 0; + calendar = CAL_GREGORIAN; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "zh-SG-u-ca-gregory-co-stroke-cu-eur-fw-wed-hc-h12-ms-metric"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 60) + { + errln("Expected length to be 60, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestBCP47TagJapanCalendar() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"hh:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_JAPAN; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-japanese-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestUseNeededBuffer() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"hh:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + int32_t neededBufferSize = uprefs_getBCP47Tag(nullptr, 0, &status); + + if ( uprefs_getBCP47Tag(languageBuffer, neededBufferSize, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestGetNeededBuffer() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"zh-SG_stroke"; + currency = L"MXN"; + hourCycle = L"hh:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "zh-SG-u-ca-buddhist-co-stroke-cu-mxn-fw-tue-hc-h12-ms-metric"; + + int32_t neededBufferSize = uprefs_getBCP47Tag(nullptr, 0, &status); + + if ( neededBufferSize != 61) + { + errln("Expected buffer size to be 61, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprefs_getBCP47Tag(languageBuffer, neededBufferSize, &status) != 61) + { + errln("Expected length to be 61, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::TestGetUnsupportedSorting() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"hu-HU_technl"; + currency = L"MXN"; + hourCycle = L"hh:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "hu-HU-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if ( uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n",uprv_strlen(languageBuffer)); + } + if ( uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::Get24HourCycleMixed() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"HHhh:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h23-ms-metric"; + + if (uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n", uprv_strlen(languageBuffer)); + } + if (uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::Get12HourCycleMixed() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"hHhH:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if (uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n", uprv_strlen(languageBuffer)); + } + if (uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + + +void UPrefsTest::Get12HourCycleMixed2() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"hH''h'H'H:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if (uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n", uprv_strlen(languageBuffer)); + } + if (uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::Get12HourCycle() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"h'H'h:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if (uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n", uprv_strlen(languageBuffer)); + } + if (uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} + +void UPrefsTest::Get12HourCycle2() +{ + char languageBuffer[ARRAY_SIZE] = {0}; + language = L"ja-JP"; + currency = L"MXN"; + hourCycle = L"'H'h'H'h:mm:ss"; + firstday = 1; + measureSystem = 0; + calendar = CAL_THAI; + UErrorCode status = U_ZERO_ERROR; + char* expectedValue = "ja-JP-u-ca-buddhist-cu-mxn-fw-tue-hc-h12-ms-metric"; + + if (uprefs_getBCP47Tag(languageBuffer, ARRAY_SIZE, &status) != 51) + { + errln("Expected length to be 51, but got: %d\n", uprv_strlen(languageBuffer)); + } + if (uprv_strcmp(expectedValue, languageBuffer) != 0) + { + errln("Expected BCP47Tag to be %s, but got: %s\n", expectedValue, languageBuffer); + } +} +#endif //U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY \ No newline at end of file diff --git a/icu/icu4c/source/test/intltest/uprefstest.h b/icu/icu4c/source/test/intltest/uprefstest.h new file mode 100644 index 00000000000..2f3e6515b06 --- /dev/null +++ b/icu/icu4c/source/test/intltest/uprefstest.h @@ -0,0 +1,50 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +#ifndef UPREFSTEST_H +#define UPREFSTEST_H + +#include "unicode/platform.h" +#if U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY +// We define UPREFS_TEST to use the mock version of GetLocaleInfoEx(), which +// allows us to simulate its behaviour and determine if the results given by the +// API align with what we expect to receive +#define UPREFS_TEST 1 + + +#include "windows.h" +#include "intltest.h" +#include "uprefs.h" + +class UPrefsTest: public IntlTest { +private: + static std::wstring language; + static std::wstring currency; + static std::wstring hourCycle; + static int32_t firstday; + static int32_t measureSystem; + static CALID calendar; + +public: + UPrefsTest(){}; + virtual ~UPrefsTest(){}; + + virtual void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL) override; + int32_t MockGetLocaleInfoEx(LPCWSTR lpLocaleName, LCTYPE LCType, LPWSTR lpLCData, int cchData, UErrorCode* status); + void TestGetDefaultLocaleAsBCP47Tag(); + void TestBCP47TagWithSorting(); + void TestBCP47TagChineseSimplified(); + void TestBCP47TagChineseSortingStroke(); + void TestBCP47TagJapanCalendar(); + void TestUseNeededBuffer(); + void TestGetNeededBuffer(); + void TestGetUnsupportedSorting(); + void Get24HourCycleMixed(); + void Get12HourCycleMixed(); + void Get12HourCycleMixed2(); + void Get12HourCycle(); + void Get12HourCycle2(); +}; + + +#endif //U_PLATFORM_USES_ONLY_WIN32_API && UCONFIG_USE_WINDOWS_PREFERENCES_LIBRARY +#endif //UPREFSTEST_H \ No newline at end of file From f1176071440886a5d21e61ea75ee5f2ed527c7e9 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 13:52:59 +0530 Subject: [PATCH 13/15] chore: bump STRING_STORE_SIZE 120000 -> 200000 for CLDR 48 Patch 018 originally bumped this from 100000 -> 120000 for the CLDR-MS extra-locales overflow at the package-tool stage of data build. CLDR 48 has substantially more locales than CLDR 44 (which 120000 was sized for); prior session evidence (now-deleted branch) suggests 120000 may still overflow at Step 6 data build. Pre-emptively bump to 200000 to avoid a Step 6 rerun on overflow. If the actual measurement at Step 6 shows 120000 was enough, we can revisit post-shipping. Bumping high now is harmless (static array sized at compile time in tool; trivial RAM increase only while makedata runs). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/tools/toolutil/package.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu/icu4c/source/tools/toolutil/package.h b/icu/icu4c/source/tools/toolutil/package.h index 6c411ae5fd0..3a1d3cd23d9 100644 --- a/icu/icu4c/source/tools/toolutil/package.h +++ b/icu/icu4c/source/tools/toolutil/package.h @@ -27,7 +27,7 @@ // .dat package file representation ---------------------------------------- *** -#define STRING_STORE_SIZE 120000 +#define STRING_STORE_SIZE 200000 #define MAX_PKG_NAME_LENGTH 64 typedef void CheckDependency(void *context, const char *itemName, const char *targetName); From 9ff77624732f382c8e421ee7ead50308295a07f8 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Thu, 14 May 2026 14:32:44 +0530 Subject: [PATCH 14/15] patch: add and apply 021-MSFT-Patch_ICU_Cldr2Icu_remove_icu4j_directory_validation The new ICU 78 Cldr2Icu Maven/Java toolchain replaces the deleted build-icu-data.xml Ant entry point. Its CLI-options constructor runs validateEnvironment() and System.exit(1)s unless ICU_DIR contains an icu4j/ subdirectory. The microsoft/icu fork is icu4c-only -- there is no icu4j source tree -- so the Step 6 data-build pipeline cannot run without bypassing this check. The runtime Java dependency on icu4j (used by TransformsMapper for Transliterator) is satisfied via the Maven artifact com.ibm.icu:icu4j in ~/.m2, which is unaffected by source-tree absence. This is an MS-only divergence. Both the patch file and the applied source change land here as a single commit; the patch file documents the divergence in icu-patches/patches/ for future upgrades. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...cu_remove_icu4j_directory_validation.patch | 20 +++++++++++++++++++ .../tool/cldrtoicu/Cldr2IcuCliOptions.java | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 icu-patches/patches/021-MSFT-Patch_ICU_Cldr2Icu_remove_icu4j_directory_validation.patch diff --git a/icu-patches/patches/021-MSFT-Patch_ICU_Cldr2Icu_remove_icu4j_directory_validation.patch b/icu-patches/patches/021-MSFT-Patch_ICU_Cldr2Icu_remove_icu4j_directory_validation.patch new file mode 100644 index 00000000000..51be7eba985 --- /dev/null +++ b/icu-patches/patches/021-MSFT-Patch_ICU_Cldr2Icu_remove_icu4j_directory_validation.patch @@ -0,0 +1,20 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arvind Oruganti +Date: Thu, 14 May 2026 14:30:00 +0530 +Subject: MSFT-PATCH: Remove icu4j directory validation from Cldr2Icu (icu4c-only fork) + +diff --git a/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java b/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java +index d9b46014438..9ff85b38d86 100644 +--- a/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java ++++ b/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java +@@ -380,7 +380,9 @@ private void validateEnvironment() { + + if (!new File(icuDir).isDirectory() + || ! new File(icuDir, "icu4c").isDirectory() +- || ! new File(icuDir, "icu4j").isDirectory() ++ // MSFT-Change: microsoft/icu fork is icu4c-only; no icu4j source tree. ++ // The Maven dependency on icu4j (used by TransformsMapper) is still ++ // resolved from ~/.m2 at runtime, so source-tree absence is harmless. + || ! new File(icuDir, "tools/cldr/cldr-to-icu").isDirectory() + || ! new File(icuDir, "tools/cldr/cldr-to-icu/pom.xml").isFile()) { + System.err.println("The `" + icuDir + "` directory does not look like a valid icu root."); diff --git a/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java b/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java index d9b46014438..9ff85b38d86 100644 --- a/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java +++ b/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java @@ -380,7 +380,9 @@ private void validateEnvironment() { if (!new File(icuDir).isDirectory() || ! new File(icuDir, "icu4c").isDirectory() - || ! new File(icuDir, "icu4j").isDirectory() + // MSFT-Change: microsoft/icu fork is icu4c-only; no icu4j source tree. + // The Maven dependency on icu4j (used by TransformsMapper) is still + // resolved from ~/.m2 at runtime, so source-tree absence is harmless. || ! new File(icuDir, "tools/cldr/cldr-to-icu").isDirectory() || ! new File(icuDir, "tools/cldr/cldr-to-icu/pom.xml").isFile()) { System.err.println("The `" + icuDir + "` directory does not look like a valid icu root."); From f64bdac3e3bb54d032876cef376755bb558b50d4 Mon Sep 17 00:00:00 2001 From: Arvind Oruganti Date: Tue, 19 May 2026 13:59:57 +0530 Subject: [PATCH 15/15] MSFT-PATCH: Override ar-SA Saudi Riyal symbol Use the Unicode U+20C1 SAUDI RIYAL SIGN for the ar-SA SAR currency symbol and add a C API regression test for the locale-specific override. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- icu/icu4c/source/data/curr/ar_SA.txt | 10 ++++++---- icu/icu4c/source/test/cintltst/currtest.c | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/icu/icu4c/source/data/curr/ar_SA.txt b/icu/icu4c/source/data/curr/ar_SA.txt index 32526c8458f..2d969daeb3b 100644 --- a/icu/icu4c/source/data/curr/ar_SA.txt +++ b/icu/icu4c/source/data/curr/ar_SA.txt @@ -1,9 +1,11 @@ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // Generated using tools/cldr/cldr-to-icu/ -/** - * generated alias target - */ ar_SA{ - ___{""} + Currencies{ + SAR{ + "⃁", + "ريال سعودي", + } + } } diff --git a/icu/icu4c/source/test/cintltst/currtest.c b/icu/icu4c/source/test/cintltst/currtest.c index d8355133a18..51dff0eb0b3 100644 --- a/icu/icu4c/source/test/cintltst/currtest.c +++ b/icu/icu4c/source/test/cintltst/currtest.c @@ -298,6 +298,26 @@ static void TestNumericCode(void) { } } +static void TestSaudiRiyalSymbol(void) { + UErrorCode status = U_ZERO_ERROR; + UChar currency[4]; + UBool isChoiceFormat = false; + int32_t len = 0; + static const UChar expectedSymbol[] = {0x20C1, 0}; + const UChar* symbol; + + u_charsToUChars("SAR", currency, UPRV_LENGTHOF(currency)); + symbol = ucurr_getName(currency, "ar_SA", UCURR_SYMBOL_NAME, &isChoiceFormat, &len, &status); + if (U_FAILURE(status)) { + log_data_err("Error: ucurr_getName returned %s (Are you missing data?)\n", u_errorName(status)); + return; + } + if (isChoiceFormat || len != 1 || symbol == NULL || u_strncmp(symbol, expectedSymbol, len) != 0) { + log_err("Error: SAR symbol for ar_SA should be U+20C1. Got length=%d first=U+%04X choice=%s\n", + len, (len > 0 && symbol != NULL) ? symbol[0] : 0, isChoiceFormat ? "true" : "false"); + } +} + void addCurrencyTest(TestNode** root); #define TESTCASE(x) addTest(root, &x, "tsformat/currtest/" #x) @@ -310,6 +330,7 @@ void addCurrencyTest(TestNode** root) TESTCASE(TestFractionDigitOverride); TESTCASE(TestPrefixSuffix); TESTCASE(TestNumericCode); + TESTCASE(TestSaudiRiyalSymbol); } #endif /* #if !UCONFIG_NO_FORMATTING */