Skip to content

Commit 2e56a2a

Browse files
authored
Improve handling of no-inheritance-marker in timezone data (#1194)
* Add icu4c-tools * Improve handling of no-inheritance-marker in timezone data Fixes #1192 (but uncovers another bug)
1 parent 2b93a4a commit 2e56a2a

File tree

6 files changed

+161
-2
lines changed

6 files changed

+161
-2
lines changed

babel/dates.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,9 @@ def get_timezone_name(
649649
info = locale.time_zones.get(zone, {})
650650
# Try explicitly translated zone names first
651651
if width in info and zone_variant in info[width]:
652-
return info[width][zone_variant]
652+
value = info[width][zone_variant]
653+
if value != NO_INHERITANCE_MARKER:
654+
return value
653655

654656
metazone = get_global('meta_zones').get(zone)
655657
if metazone:
@@ -660,7 +662,7 @@ def get_timezone_name(
660662
# If the short form is marked no-inheritance,
661663
# try to fall back to the long name instead.
662664
name = metazone_info.get('long', {}).get(zone_variant)
663-
if name:
665+
if name and name != NO_INHERITANCE_MARKER:
664666
return name
665667

666668
# If we have a concrete datetime, we assume that the result can't be

misc/icu4c-tools/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bin/

misc/icu4c-tools/Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
bin/icu4c_date_format: icu4c_date_format.cpp
2+
mkdir -p bin
3+
$(CXX) -Wall -std=c++17 -o $@ $^ $(shell pkg-config --cflags --libs icu-uc icu-i18n)

misc/icu4c-tools/README.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# icu4c-tools
2+
3+
Some haphazard tools for cross-checking results between ICU4C and Babel.
4+
These are not meant to be production-ready or e.g. guaranteed to not leak memory in any way.
5+
6+
## icu4c_date_format
7+
8+
### Compiling
9+
10+
This worked on my macOS – on a Linux machine, you shouldn't need the `PKG_CONFIG_PATH` environment variable.
11+
12+
```
13+
env PKG_CONFIG_PATH="/opt/homebrew/opt/icu4c@76/lib/pkgconfig" make bin/icu4c_date_format
14+
```
15+
16+
### Running
17+
18+
E.g.
19+
20+
```
21+
env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format
22+
```
+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#include <iostream>
2+
#include <sstream>
3+
#include <unicode/smpdtfmt.h>
4+
#include <unicode/timezone.h>
5+
6+
static std::vector<std::string> split(const std::string &s, char delimiter) {
7+
std::vector<std::string> tokens;
8+
std::string token;
9+
std::istringstream tokenStream(s);
10+
while (std::getline(tokenStream, token, delimiter)) {
11+
tokens.push_back(token);
12+
}
13+
return tokens;
14+
}
15+
16+
static UDate parse_time_str(const char *time_str) {
17+
UErrorCode status = U_ZERO_ERROR;
18+
icu::UnicodeString fauxISO8601("yyyy-MM-dd'T'hh:mm:ss'Z'");
19+
auto fmt = new icu::SimpleDateFormat(fauxISO8601, status);
20+
fmt->setTimeZone(*icu::TimeZone::getGMT());
21+
UDate date = fmt->parse(icu::UnicodeString(time_str), status);
22+
if (U_FAILURE(status)) {
23+
std::cerr << "Failed to parse time string: " << time_str << std::endl;
24+
exit(1);
25+
}
26+
return date;
27+
}
28+
29+
static std::vector<icu::Locale> parse_locales(const char *locales_str) {
30+
auto locales = std::vector<icu::Locale>{};
31+
for (auto token : split(locales_str, ',')) {
32+
auto loc = icu::Locale(token.c_str());
33+
if (loc.isBogus()) {
34+
std::cerr << "Invalid locale: " << token << std::endl;
35+
exit(1);
36+
}
37+
locales.push_back(loc);
38+
}
39+
return locales;
40+
}
41+
42+
static std::vector<icu::TimeZone *> parse_timezones(const char *timezones_str) {
43+
auto timezones = std::vector<icu::TimeZone *>{};
44+
for (auto token : split(timezones_str, ',')) {
45+
auto tz = icu::TimeZone::createTimeZone(token.c_str());
46+
if (tz == nullptr) {
47+
std::cerr << "Invalid timezone: " << token << std::endl;
48+
exit(1);
49+
}
50+
timezones.push_back(tz);
51+
}
52+
return timezones;
53+
}
54+
55+
int main() {
56+
UErrorCode status = U_ZERO_ERROR;
57+
const char *timezones_str = getenv("TEST_TIMEZONES");
58+
const char *locales_str = getenv("TEST_LOCALES");
59+
const char *time_str = getenv("TEST_TIME");
60+
const char *time_format_str = getenv("TEST_TIME_FORMAT");
61+
62+
if (!timezones_str || !locales_str) {
63+
std::cerr << "Please set TEST_TIMEZONES, TEST_LOCALES environment variables"
64+
<< std::endl;
65+
return 1;
66+
}
67+
68+
if (time_str == nullptr) {
69+
time_str = "2025-03-04T13:53:00Z";
70+
std::cerr << "Defaulting TEST_TIME to " << time_str << std::endl;
71+
}
72+
73+
if (time_format_str == nullptr) {
74+
time_format_str = "z:zz:zzz:zzzz";
75+
std::cerr << "Defaulting TEST_TIME_FORMAT to " << time_format_str
76+
<< std::endl;
77+
}
78+
79+
auto date = parse_time_str(time_str);
80+
auto timezones = parse_timezones(timezones_str);
81+
auto locales = parse_locales(locales_str);
82+
83+
for (auto tz : timezones) {
84+
icu::UnicodeString tzid;
85+
tz->getID(tzid);
86+
std::string tzid_str;
87+
tzid.toUTF8String(tzid_str);
88+
for (auto loc : locales) {
89+
auto fmt = new icu::SimpleDateFormat(time_format_str, loc, status);
90+
fmt->setTimeZone(*tz);
91+
icu::UnicodeString name;
92+
fmt->format(date, name);
93+
std::string result;
94+
name.toUTF8String(result);
95+
std::cout << tzid_str << "\t" << loc.getName() << "\t" << result
96+
<< std::endl;
97+
delete fmt;
98+
}
99+
}
100+
return 0;
101+
}

tests/test_dates.py

+30
Original file line numberDiff line numberDiff line change
@@ -1187,3 +1187,33 @@ def test_issue_1089():
11871187
def test_issue_1162(locale, format, negative, expected):
11881188
delta = timedelta(seconds=10800) * (-1 if negative else +1)
11891189
assert dates.format_timedelta(delta, add_direction=True, format=format, locale=locale) == expected
1190+
1191+
1192+
def test_issue_1192():
1193+
# The actual returned value here is not actually strictly specified ("get_timezone_name"
1194+
# is not an operation specified as such). Issue #1192 concerned this invocation returning
1195+
# the invalid "no inheritance marker" value; _that_ should never be returned here.
1196+
# IOW, if the below "Hawaii-Aleutian Time" changes with e.g. CLDR updates, that's fine.
1197+
assert dates.get_timezone_name('Pacific/Honolulu', 'short', locale='en_GB') == "Hawaii-Aleutian Time"
1198+
1199+
1200+
@pytest.mark.xfail
1201+
def test_issue_1192_fmt(timezone_getter):
1202+
"""
1203+
There is an issue in how we format the fallback for z/zz in the absence of data
1204+
(esp. with the no inheritance marker present).
1205+
This test is marked xfail until that's fixed.
1206+
"""
1207+
# env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm z" bin/icu4c_date_format
1208+
# Defaulting TEST_TIME to 2025-03-04T13:53:00Z
1209+
# Pacific/Honolulu en_US 2025-03-04 3:53 HST
1210+
# Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10
1211+
# env TEST_TIMEZONES=Pacific/Honolulu TEST_LOCALES=en_US,en_GB TEST_TIME_FORMAT="YYYY-MM-dd H:mm zz" bin/icu4c_date_format
1212+
# Pacific/Honolulu en_US 2025-03-04 3:53 HST
1213+
# Pacific/Honolulu en_GB 2025-03-04 3:53 GMT-10
1214+
tz = timezone_getter("Pacific/Honolulu")
1215+
dt = _localize(tz, datetime(2025, 3, 4, 13, 53, tzinfo=UTC))
1216+
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_US") == "2025-03-04 3:53 HST"
1217+
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm z", locale="en_GB") == "2025-03-04 3:53 GMT-10"
1218+
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_US") == "2025-03-04 3:53 HST"
1219+
assert dates.format_datetime(dt, "YYYY-MM-dd H:mm zz", locale="en_GB") == "2025-03-04 3:53 GMT-10"

0 commit comments

Comments
 (0)