Merge pull request #826 from akx/cldr-40

CLDR 40
python-babel · Apr 8, 2022 · 1b21b57 · 1b21b57
2 parents 6be6b1f + d2b953b
commit 1b21b57
Show file tree

Hide file tree

Showing 7 changed files with 70 additions and 35 deletions.
diff --git a/babel/dates.py b/babel/dates.py
@@ -322,7 +322,7 @@ def get_day_names(width='wide', context='format', locale=LC_TIME):
     >>> get_day_names('short', locale='en_US')[1]
     u'Tu'
     >>> get_day_names('abbreviated', locale='es')[1]
-    u'mar.'
+    u'mar'
     >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
     u'D'
 
@@ -339,7 +339,7 @@ def get_month_names(width='wide', context='format', locale=LC_TIME):
     >>> get_month_names('wide', locale='en_US')[1]
     u'January'
     >>> get_month_names('abbreviated', locale='es')[1]
-    u'ene.'
+    u'ene'
     >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
     u'J'
 

diff --git a/babel/plural.py b/babel/plural.py
@@ -19,7 +19,7 @@
 def extract_operands(source):
     """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
 
-    The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:
+    The result is a 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
 
     ====== ===============================================================
     Symbol Value
@@ -30,14 +30,16 @@ def extract_operands(source):
     w      number of visible fraction digits in n, without trailing zeros.
     f      visible fractional digits in n, with trailing zeros.
     t      visible fractional digits in n, without trailing zeros.
+    c      compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
+    e      currently, synonym for ‘c’. however, may be redefined in the future.
     ====== ===============================================================
 
-    .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands
+    .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
 
     :param source: A real number
     :type source: int|float|decimal.Decimal
-    :return: A n-i-v-w-f-t tuple
-    :rtype: tuple[decimal.Decimal, int, int, int, int, int]
+    :return: A n-i-v-w-f-t-c-e tuple
+    :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
     """
     n = abs(source)
     i = int(n)
@@ -69,7 +71,8 @@ def extract_operands(source):
         t = int(no_trailing or 0)
     else:
         v = w = f = t = 0
-    return n, i, v, w, f, t
+    c = e = 0  # TODO: c and e are not supported
+    return n, i, v, w, f, t, c, e
 
 
 class PluralRule(object):
@@ -216,7 +219,7 @@ def to_python(rule):
     to_python_func = _PythonCompiler().compile
     result = [
         'def evaluate(n):',
-        ' n, i, v, w, f, t = extract_operands(n)',
+        ' n, i, v, w, f, t, c, e = extract_operands(n)',
     ]
     for tag, ast in PluralRule.parse(rule).abstract:
         # the str() call is to coerce the tag to the native string.  It's
@@ -317,12 +320,20 @@ def cldr_modulo(a, b):
 class RuleError(Exception):
     """Raised if a rule is malformed."""
 
-_VARS = 'nivwft'
+_VARS = {
+    'n',  # absolute value of the source number.
+    'i',  # integer digits of n.
+    'v',  # number of visible fraction digits in n, with trailing zeros.*
+    'w',  # number of visible fraction digits in n, without trailing zeros.*
+    'f',  # visible fraction digits in n, with trailing zeros.*
+    't',  # visible fraction digits in n, without trailing zeros.*
+    'c',  # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
+    'e',  # currently, synonym for ‘c’. however, may be redefined in the future.
+}
 
 _RULES = [
     (None, re.compile(r'\s+', re.UNICODE)),
-    ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
-                        .format(_VARS))),
+    ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
     ('value', re.compile(r'\d+')),
     ('symbol', re.compile(r'%|,|!=|=')),
     ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE))  # U+2026: ELLIPSIS
@@ -525,6 +536,8 @@ def compile(self, arg):
     compile_w = lambda x: 'w'
     compile_f = lambda x: 'f'
     compile_t = lambda x: 't'
+    compile_c = lambda x: 'c'
+    compile_e = lambda x: 'e'
     compile_value = lambda x, v: str(v)
     compile_and = _binary_compiler('(%s && %s)')
     compile_or = _binary_compiler('(%s || %s)')

diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
@@ -13,9 +13,10 @@
     from urllib import urlretrieve
 
 
-URL = 'http://unicode.org/Public/cldr/37/core.zip'
-FILENAME = 'cldr-core-37.zip'
-FILESUM = 'ba93f5ba256a61a6f8253397c6c4b1a9b9e77531f013cc7ffa7977b5f7e4da57'
+URL = 'http://unicode.org/Public/cldr/40/cldr-common-40.0.zip'
+FILENAME = 'cldr-common-40.0.zip'
+# Via https://unicode.org/Public/cldr/40/hashes/SHASUM512.txt
+FILESUM = 'b45ea381002210cf5963a2ba52fa45ee4e9b1e80ae1180bcecf61f431d64e4e0faba700b3d56a96a33355deab3abdb8bcbae9222b60a8ca85536476718175645'
 BLKSIZE = 131072
 
 
@@ -53,7 +54,7 @@ def is_good_file(filename):
     if not os.path.isfile(filename):
         log('Local copy \'%s\' not found', filename)
         return False
-    h = hashlib.sha256()
+    h = hashlib.sha512()
     with open(filename, 'rb') as f:
         while 1:
             blk = f.read(BLKSIZE)
@@ -78,7 +79,7 @@ def main():
     show_progress = (False if os.environ.get("BABEL_CLDR_NO_DOWNLOAD_PROGRESS") else sys.stdout.isatty())
 
     while not is_good_file(zip_path):
-        log('Downloading \'%s\'', FILENAME)
+        log("Downloading '%s' from %s", FILENAME, URL)
         if os.path.isfile(zip_path):
             os.remove(zip_path)
         urlretrieve(URL, zip_path, (reporthook if show_progress else None))

diff --git a/tests/test_dates.py b/tests/test_dates.py
@@ -548,14 +548,14 @@ def test_get_period_names():
 def test_get_day_names():
     assert dates.get_day_names('wide', locale='en_US')[1] == u'Tuesday'
     assert dates.get_day_names('short', locale='en_US')[1] == u'Tu'
-    assert dates.get_day_names('abbreviated', locale='es')[1] == u'mar.'
+    assert dates.get_day_names('abbreviated', locale='es')[1] == u'mar'
     de = dates.get_day_names('narrow', context='stand-alone', locale='de_DE')
     assert de[1] == u'D'
 
 
 def test_get_month_names():
     assert dates.get_month_names('wide', locale='en_US')[1] == u'January'
-    assert dates.get_month_names('abbreviated', locale='es')[1] == u'ene.'
+    assert dates.get_month_names('abbreviated', locale='es')[1] == u'ene'
     de = dates.get_month_names('narrow', context='stand-alone', locale='de_DE')
     assert de[1] == u'J'
 
@@ -834,7 +834,7 @@ def test_lithuanian_long_format():
 
 def test_zh_TW_format():
     # Refs GitHub issue #378
-    assert dates.format_time(datetime(2016, 4, 8, 12, 34, 56), locale='zh_TW') == u'\u4e0b\u534812:34:56'
+    assert dates.format_time(datetime(2016, 4, 8, 12, 34, 56), locale='zh_TW') == u'B12:34:56'
 
 
 def test_format_current_moment():

diff --git a/tests/test_numbers.py b/tests/test_numbers.py
@@ -170,13 +170,13 @@ def test_group_separator(self):
 
         self.assertEqual(u'29,567.12', numbers.format_decimal(29567.12,
                                                             locale='en_US', group_separator=True))
-        self.assertEqual(u'29\u202f567,12', numbers.format_decimal(29567.12,
+        self.assertEqual(u'29\xa0567,12', numbers.format_decimal(29567.12,
                                                             locale='fr_CA', group_separator=True))
         self.assertEqual(u'29.567,12', numbers.format_decimal(29567.12,
                                                             locale='pt_BR', group_separator=True))
         self.assertEqual(u'$1,099.98', numbers.format_currency(1099.98, 'USD',
                                                               locale='en_US', group_separator=True))
-        self.assertEqual(u'101\u202f299,98\xa0\u20ac', numbers.format_currency(101299.98, 'EUR',
+        self.assertEqual(u'101\xa0299,98\xa0\u20ac', numbers.format_currency(101299.98, 'EUR',
                                                                     locale='fr_CA', group_separator=True))
         self.assertEqual(u'101,299.98 euros', numbers.format_currency(101299.98, 'EUR',
                                                                     locale='en_US', group_separator=True,

diff --git a/tests/test_plural.py b/tests/test_plural.py
@@ -255,13 +255,15 @@ def test_or_and(self):
 
 @pytest.mark.parametrize('source,n,i,v,w,f,t', EXTRACT_OPERANDS_TESTS)
 def test_extract_operands(source, n, i, v, w, f, t):
-    e_n, e_i, e_v, e_w, e_f, e_t = plural.extract_operands(source)
+    e_n, e_i, e_v, e_w, e_f, e_t, e_c, e_e = plural.extract_operands(source)
     assert abs(e_n - decimal.Decimal(n)) <= EPSILON  # float-decimal conversion inaccuracy
     assert e_i == i
     assert e_v == v
     assert e_w == w
     assert e_f == f
     assert e_t == t
+    assert not e_c  # Not supported at present
+    assert not e_e  # Not supported at present
 
 
 @pytest.mark.parametrize('locale', ('ru', 'pl'))

diff --git a/tests/test_smoke.py b/tests/test_smoke.py
@@ -6,32 +6,51 @@
 we ship.
 """
 import decimal
-from datetime import datetime
-
+import datetime
 import pytest
-from babel import Locale
-from babel import dates
-from babel import numbers
+
+from babel import Locale, units, dates, numbers
+
+NUMBERS = (
+    decimal.Decimal("-33.76"),  # Negative Decimal
+    decimal.Decimal("13.37"),  # Positive Decimal
+    1.2 - 1.0,  # Inaccurate float
+    10,  # Plain old integer
+    0,  # Zero
+)
 
 
 @pytest.mark.all_locales
 def test_smoke_dates(locale):
     locale = Locale.parse(locale)
-    instant = datetime.now()
+    instant = datetime.datetime.now()
     for width in ("full", "long", "medium", "short"):
         assert dates.format_date(instant, format=width, locale=locale)
         assert dates.format_datetime(instant, format=width, locale=locale)
         assert dates.format_time(instant, format=width, locale=locale)
+    # Interval test
+    past = instant - datetime.timedelta(hours=23)
+    assert dates.format_interval(past, instant, locale=locale)
+    # Duration test - at the time of writing, all locales seem to have `short` width,
+    # so let's test that.
+    duration = instant - instant.replace(hour=0, minute=0, second=0)
+    for granularity in ('second', 'minute', 'hour', 'day'):
+        assert dates.format_timedelta(duration, granularity=granularity, format="short", locale=locale)
 
 
 @pytest.mark.all_locales
 def test_smoke_numbers(locale):
     locale = Locale.parse(locale)
-    for number in (
-        decimal.Decimal("-33.76"),  # Negative Decimal
-        decimal.Decimal("13.37"),  # Positive Decimal
-        1.2 - 1.0,  # Inaccurate float
-        10,  # Plain old integer
-        0,  # Zero
-    ):
+    for number in NUMBERS:
         assert numbers.format_decimal(number, locale=locale)
+        assert numbers.format_currency(number, "EUR", locale=locale)
+        assert numbers.format_scientific(number, locale=locale)
+        assert numbers.format_percent(number / 100, locale=locale)
+
+
+@pytest.mark.all_locales
+def test_smoke_units(locale):
+    locale = Locale.parse(locale)
+    for unit in ('length-meter', 'mass-kilogram', 'energy-calorie', 'volume-liter'):
+        for number in NUMBERS:
+            assert units.format_unit(number, measurement_unit=unit, locale=locale)