diff options
author | shadchin <shadchin@yandex-team.com> | 2024-12-23 19:39:02 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.com> | 2024-12-23 19:54:20 +0300 |
commit | 65a5bf9d37a3b29eb394f560b9a09318196c40e8 (patch) | |
tree | e5cd68fb0682b2388e52d9806bb87adc348e21a8 /contrib/tools/python3/Lib/_strptime.py | |
parent | a1dd87a52878ab3e46e5fd2dba5ecbba6113d7e0 (diff) | |
download | ydb-65a5bf9d37a3b29eb394f560b9a09318196c40e8.tar.gz |
Update Python 3 to 3.12.8
commit_hash:c20045b8a987d8720e1f3328270357491d5530f3
Diffstat (limited to 'contrib/tools/python3/Lib/_strptime.py')
-rw-r--r-- | contrib/tools/python3/Lib/_strptime.py | 183 |
1 files changed, 142 insertions, 41 deletions
diff --git a/contrib/tools/python3/Lib/_strptime.py b/contrib/tools/python3/Lib/_strptime.py index 798cf9f9d3..dfd2bc5d8b 100644 --- a/contrib/tools/python3/Lib/_strptime.py +++ b/contrib/tools/python3/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar from re import compile as re_compile +from re import sub as re_sub from re import IGNORECASE from re import escape as re_escape from datetime import (date as datetime_date, @@ -27,6 +28,18 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -101,7 +114,8 @@ class LocaleTime(object): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm def __calc_date_time(self): @@ -113,42 +127,130 @@ class LocaleTime(object): # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [ + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + # Non-ASCII digits + ('\u0661\u0669\u0669\u0669', '%Y'), + ('\u0669\u0669', '%Oy'), + ('\u0662\u0662', '%OH'), + ('\u0664\u0664', '%OM'), + ('\u0665\u0665', '%OS'), + ('\u0661\u0667', '%Od'), + ('\u0660\u0663', '%Om'), + ('\u0663', '%Om'), + ('\u0662', '%Ow'), + ('\u0661\u0660', '%OI'), + ] + date_time = [] + for directive in ('%c', '%x', '%X'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + current_format = re_sub(r'\d(?<![0-9])', + lambda m: chr(0x0660 + int(m[0])), + current_format) + for old, new in replacement_pairs: + current_format = current_format.replace(old, new) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. - time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if '00' in time.strftime(directive, time_tuple): + if '00' in time.strftime(directive, time_tuple2): U_W = '%W' else: U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) + current_format = current_format.replace('11', U_W) + date_time.append(current_format) self.LC_date_time = date_time[0] self.LC_date = date_time[1] self.LC_time = date_time[2] + def __find_month_format(self, directive): + """Find the month format appropriate for the current locale. + + In some locales (for example French and Hebrew), the default month + used in __calc_date_time has the same name in full and abbreviated + form. Also, the month name can by accident match other part of the + representation: the day of the week name (for example in Morisyen) + or the month number (for example in Japanese). Thus, cycle months + of the year and find all positions that match the month name for + each month, If no common positions are found, the representation + does not use the month name. + """ + full_indices = abbr_indices = None + for m in range(1, 13): + time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_month[m])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + indices = set(_findall(datetime, self.a_month[m])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_month, '%B' + if abbr_indices: + return self.a_month, '%b' + return None, None + + def __find_weekday_format(self, directive): + """Find the day of the week format appropriate for the current locale. + + Similar to __find_month_format(). + """ + full_indices = abbr_indices = None + for wd in range(7): + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, wd, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_weekday[wd])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + if self.f_weekday[wd] != self.a_weekday[wd]: + indices = set(_findall(datetime, self.a_weekday[wd])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_weekday, '%A' + if abbr_indices: + return self.a_weekday, '%a' + return None, None + def __calc_timezone(self): # Set self.timezone by using time.tzname. # Do not worry about possibility of time.tzname[0] == time.tzname[1] @@ -181,12 +283,12 @@ class TimeRE(dict): else: self.locale_time = LocaleTime() base = super() - base.__init__({ + mapping = { # The " [1-9]" part of the regex is to make %c from ANSI C work 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P<f>[0-9]{1,6})", 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", - 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", + 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P<G>\d\d\d\d)", 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", @@ -210,11 +312,15 @@ class TimeRE(dict): 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + '%': '%'} + for d in 'dmyHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P<w>\d)' + mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) def __seqToRE(self, to_convert, directive): """Convert a list to a regex string for matching a directive. @@ -242,21 +348,16 @@ class TimeRE(dict): regex syntax are escaped. """ - processed_format = '' # The sub() call escapes all characters that might be misconstrued # as regex syntax. Cannot use re.escape since we have to deal with # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) - while '%' in format: - directive_index = format.index('%')+1 - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format[directive_index]]) - format = format[directive_index+1:] - return "%s%s" % (processed_format, format) + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR + def repl(m): + return self[m[1]] + format = re_sub(r'%(O?.)', repl, format) + return format def compile(self, format): """Return a compiled re object for the format string.""" |