aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/_strptime.py
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2024-12-23 19:39:02 +0300
committershadchin <shadchin@yandex-team.com>2024-12-23 19:54:20 +0300
commit65a5bf9d37a3b29eb394f560b9a09318196c40e8 (patch)
treee5cd68fb0682b2388e52d9806bb87adc348e21a8 /contrib/tools/python3/Lib/_strptime.py
parenta1dd87a52878ab3e46e5fd2dba5ecbba6113d7e0 (diff)
downloadydb-65a5bf9d37a3b29eb394f560b9a09318196c40e8.tar.gz
Update Python 3 to 3.12.8
commit_hash:c20045b8a987d8720e1f3328270357491d5530f3
Diffstat (limited to 'contrib/tools/python3/Lib/_strptime.py')
-rw-r--r--contrib/tools/python3/Lib/_strptime.py183
1 files changed, 142 insertions, 41 deletions
diff --git a/contrib/tools/python3/Lib/_strptime.py b/contrib/tools/python3/Lib/_strptime.py
index 798cf9f9d3..dfd2bc5d8b 100644
--- a/contrib/tools/python3/Lib/_strptime.py
+++ b/contrib/tools/python3/Lib/_strptime.py
@@ -14,6 +14,7 @@ import time
import locale
import calendar
from re import compile as re_compile
+from re import sub as re_sub
from re import IGNORECASE
from re import escape as re_escape
from datetime import (date as datetime_date,
@@ -27,6 +28,18 @@ def _getlang():
# Figure out what the current language is set to.
return locale.getlocale(locale.LC_TIME)
+def _findall(haystack, needle):
+ # Find all positions of needle in haystack.
+ if not needle:
+ return
+ i = 0
+ while True:
+ i = haystack.find(needle, i)
+ if i < 0:
+ break
+ yield i
+ i += len(needle)
+
class LocaleTime(object):
"""Stores and handles locale-specific information related to time.
@@ -101,7 +114,8 @@ class LocaleTime(object):
am_pm = []
for hour in (1, 22):
time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
- am_pm.append(time.strftime("%p", time_tuple).lower())
+ # br_FR has AM/PM info (' ',' ').
+ am_pm.append(time.strftime("%p", time_tuple).lower().strip())
self.am_pm = am_pm
def __calc_date_time(self):
@@ -113,42 +127,130 @@ class LocaleTime(object):
# values within the format string is very important; it eliminates
# possible ambiguity for what something represents.
time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
- date_time = [None, None, None]
- date_time[0] = time.strftime("%c", time_tuple).lower()
- date_time[1] = time.strftime("%x", time_tuple).lower()
- date_time[2] = time.strftime("%X", time_tuple).lower()
- replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
- (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
- (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
- ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
- ('44', '%M'), ('55', '%S'), ('76', '%j'),
- ('17', '%d'), ('03', '%m'), ('3', '%m'),
- # '3' needed for when no leading zero.
- ('2', '%w'), ('10', '%I')]
- replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
- for tz in tz_values])
- for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
- current_format = date_time[offset]
- for old, new in replacement_pairs:
+ time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0))
+ replacement_pairs = [
+ ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
+ ('44', '%M'), ('55', '%S'), ('76', '%j'),
+ ('17', '%d'), ('03', '%m'), ('3', '%m'),
+ # '3' needed for when no leading zero.
+ ('2', '%w'), ('10', '%I'),
+ # Non-ASCII digits
+ ('\u0661\u0669\u0669\u0669', '%Y'),
+ ('\u0669\u0669', '%Oy'),
+ ('\u0662\u0662', '%OH'),
+ ('\u0664\u0664', '%OM'),
+ ('\u0665\u0665', '%OS'),
+ ('\u0661\u0667', '%Od'),
+ ('\u0660\u0663', '%Om'),
+ ('\u0663', '%Om'),
+ ('\u0662', '%Ow'),
+ ('\u0661\u0660', '%OI'),
+ ]
+ date_time = []
+ for directive in ('%c', '%x', '%X'):
+ current_format = time.strftime(directive, time_tuple).lower()
+ current_format = current_format.replace('%', '%%')
+ # The month and the day of the week formats are treated specially
+ # because of a possible ambiguity in some locales where the full
+ # and abbreviated names are equal or names of different types
+ # are equal. See doc of __find_month_format for more details.
+ lst, fmt = self.__find_weekday_format(directive)
+ if lst:
+ current_format = current_format.replace(lst[2], fmt, 1)
+ lst, fmt = self.__find_month_format(directive)
+ if lst:
+ current_format = current_format.replace(lst[3], fmt, 1)
+ if self.am_pm[1]:
# Must deal with possible lack of locale info
# manifesting itself as the empty string (e.g., Swedish's
# lack of AM/PM info) or a platform returning a tuple of empty
# strings (e.g., MacOS 9 having timezone as ('','')).
- if old:
- current_format = current_format.replace(old, new)
+ current_format = current_format.replace(self.am_pm[1], '%p')
+ for tz_values in self.timezone:
+ for tz in tz_values:
+ if tz:
+ current_format = current_format.replace(tz, "%Z")
+ # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
+ current_format = re_sub(r'\d(?<![0-9])',
+ lambda m: chr(0x0660 + int(m[0])),
+ current_format)
+ for old, new in replacement_pairs:
+ current_format = current_format.replace(old, new)
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
# 2005-01-03 occurs before the first Monday of the year. Otherwise
# %U is used.
- time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
- if '00' in time.strftime(directive, time_tuple):
+ if '00' in time.strftime(directive, time_tuple2):
U_W = '%W'
else:
U_W = '%U'
- date_time[offset] = current_format.replace('11', U_W)
+ current_format = current_format.replace('11', U_W)
+ date_time.append(current_format)
self.LC_date_time = date_time[0]
self.LC_date = date_time[1]
self.LC_time = date_time[2]
+ def __find_month_format(self, directive):
+ """Find the month format appropriate for the current locale.
+
+ In some locales (for example French and Hebrew), the default month
+ used in __calc_date_time has the same name in full and abbreviated
+ form. Also, the month name can by accident match other part of the
+ representation: the day of the week name (for example in Morisyen)
+ or the month number (for example in Japanese). Thus, cycle months
+ of the year and find all positions that match the month name for
+ each month, If no common positions are found, the representation
+ does not use the month name.
+ """
+ full_indices = abbr_indices = None
+ for m in range(1, 13):
+ time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0))
+ datetime = time.strftime(directive, time_tuple).lower()
+ indices = set(_findall(datetime, self.f_month[m]))
+ if full_indices is None:
+ full_indices = indices
+ else:
+ full_indices &= indices
+ indices = set(_findall(datetime, self.a_month[m]))
+ if abbr_indices is None:
+ abbr_indices = indices
+ else:
+ abbr_indices &= indices
+ if not full_indices and not abbr_indices:
+ return None, None
+ if full_indices:
+ return self.f_month, '%B'
+ if abbr_indices:
+ return self.a_month, '%b'
+ return None, None
+
+ def __find_weekday_format(self, directive):
+ """Find the day of the week format appropriate for the current locale.
+
+ Similar to __find_month_format().
+ """
+ full_indices = abbr_indices = None
+ for wd in range(7):
+ time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, wd, 76, 0))
+ datetime = time.strftime(directive, time_tuple).lower()
+ indices = set(_findall(datetime, self.f_weekday[wd]))
+ if full_indices is None:
+ full_indices = indices
+ else:
+ full_indices &= indices
+ if self.f_weekday[wd] != self.a_weekday[wd]:
+ indices = set(_findall(datetime, self.a_weekday[wd]))
+ if abbr_indices is None:
+ abbr_indices = indices
+ else:
+ abbr_indices &= indices
+ if not full_indices and not abbr_indices:
+ return None, None
+ if full_indices:
+ return self.f_weekday, '%A'
+ if abbr_indices:
+ return self.a_weekday, '%a'
+ return None, None
+
def __calc_timezone(self):
# Set self.timezone by using time.tzname.
# Do not worry about possibility of time.tzname[0] == time.tzname[1]
@@ -181,12 +283,12 @@ class TimeRE(dict):
else:
self.locale_time = LocaleTime()
base = super()
- base.__init__({
+ mapping = {
# The " [1-9]" part of the regex is to make %c from ANSI C work
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
'f': r"(?P<f>[0-9]{1,6})",
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
- 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
+ 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])",
'G': r"(?P<G>\d\d\d\d)",
'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
@@ -210,11 +312,15 @@ class TimeRE(dict):
'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
for tz in tz_names),
'Z'),
- '%': '%'})
- base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
- base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
- base.__setitem__('x', self.pattern(self.locale_time.LC_date))
+ '%': '%'}
+ for d in 'dmyHIMS':
+ mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
+ mapping['Ow'] = r'(?P<w>\d)'
+ mapping['W'] = mapping['U'].replace('U', 'W')
+ base.__init__(mapping)
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
+ base.__setitem__('x', self.pattern(self.locale_time.LC_date))
+ base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
def __seqToRE(self, to_convert, directive):
"""Convert a list to a regex string for matching a directive.
@@ -242,21 +348,16 @@ class TimeRE(dict):
regex syntax are escaped.
"""
- processed_format = ''
# The sub() call escapes all characters that might be misconstrued
# as regex syntax. Cannot use re.escape since we have to deal with
# format directives (%m, etc.).
- regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
- format = regex_chars.sub(r"\\\1", format)
- whitespace_replacement = re_compile(r'\s+')
- format = whitespace_replacement.sub(r'\\s+', format)
- while '%' in format:
- directive_index = format.index('%')+1
- processed_format = "%s%s%s" % (processed_format,
- format[:directive_index-1],
- self[format[directive_index]])
- format = format[directive_index+1:]
- return "%s%s" % (processed_format, format)
+ format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format)
+ format = re_sub(r'\s+', r'\\s+', format)
+ format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR
+ def repl(m):
+ return self[m[1]]
+ format = re_sub(r'%(O?.)', repl, format)
+ return format
def compile(self, format):
"""Return a compiled re object for the format string."""