Library import 16 (#2433)

Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
author: AlexSm <alex@ydb.tech> 2024-03-05 10:40:59 +0100
committer: GitHub <noreply@github.com> 2024-03-05 12:40:59 +0300
commit: 1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree: 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/email/_parseaddr.py
parent: ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download: ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
1 files changed, 557 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/email/_parseaddr.py b/contrib/tools/python3/Lib/email/_parseaddr.py
new file mode 100644
index 0000000000..febe411355
--- /dev/null
+++ b/contrib/tools/python3/Lib/email/_parseaddr.py
@@ -0,0 +1,557 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py.  This should eventually be rewritten.
+"""
+
+__all__ = [
+    'mktime_tz',
+    'parsedate',
+    'parsedate_tz',
+    'quote',
+    ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+               'aug', 'sep', 'oct', 'nov', 'dec',
+               'january', 'february', 'march', 'april', 'may', 'june', 'july',
+               'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z.  According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones.  RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
+              'EST': -500, 'EDT': -400,  # Eastern
+              'CST': -600, 'CDT': -500,  # Central
+              'MST': -700, 'MDT': -600,  # Mountain
+              'PST': -800, 'PDT': -700   # Pacific
+              }
+
+
+def parsedate_tz(data):
+    """Convert a date string to a time tuple.
+
+    Accounts for military timezones.
+    """
+    res = _parsedate_tz(data)
+    if not res:
+        return
+    if res[9] is None:
+        res[9] = 0
+    return tuple(res)
+
+def _parsedate_tz(data):
+    """Convert date to extended time tuple.
+
+    The last (additional) element is the time zone offset in seconds, except if
+    the timezone was specified as -0000.  In that case the last element is
+    None.  This indicates a UTC timestamp that explicitly declaims knowledge of
+    the source timezone, as opposed to a +0000 timestamp that indicates the
+    source timezone really was UTC.
+
+    """
+    if not data:
+        return None
+    data = data.split()
+    if not data:  # This happens for whitespace-only input.
+        return None
+    # The FWS after the comma after the day-of-week is optional, so search and
+    # adjust for this.
+    if data[0].endswith(',') or data[0].lower() in _daynames:
+        # There's a dayname here. Skip it
+        del data[0]
+    else:
+        i = data[0].rfind(',')
+        if i >= 0:
+            data[0] = data[0][i+1:]
+    if len(data) == 3: # RFC 850 date, deprecated
+        stuff = data[0].split('-')
+        if len(stuff) == 3:
+            data = stuff + data[1:]
+    if len(data) == 4:
+        s = data[3]
+        i = s.find('+')
+        if i == -1:
+            i = s.find('-')
+        if i > 0:
+            data[3:] = [s[:i], s[i:]]
+        else:
+            data.append('') # Dummy tz
+    if len(data) < 5:
+        return None
+    data = data[:5]
+    [dd, mm, yy, tm, tz] = data
+    if not (dd and mm and yy):
+        return None
+    mm = mm.lower()
+    if mm not in _monthnames:
+        dd, mm = mm, dd.lower()
+        if mm not in _monthnames:
+            return None
+    mm = _monthnames.index(mm) + 1
+    if mm > 12:
+        mm -= 12
+    if dd[-1] == ',':
+        dd = dd[:-1]
+    i = yy.find(':')
+    if i > 0:
+        yy, tm = tm, yy
+    if yy[-1] == ',':
+        yy = yy[:-1]
+        if not yy:
+            return None
+    if not yy[0].isdigit():
+        yy, tz = tz, yy
+    if tm[-1] == ',':
+        tm = tm[:-1]
+    tm = tm.split(':')
+    if len(tm) == 2:
+        [thh, tmm] = tm
+        tss = '0'
+    elif len(tm) == 3:
+        [thh, tmm, tss] = tm
+    elif len(tm) == 1 and '.' in tm[0]:
+        # Some non-compliant MUAs use '.' to separate time elements.
+        tm = tm[0].split('.')
+        if len(tm) == 2:
+            [thh, tmm] = tm
+            tss = 0
+        elif len(tm) == 3:
+            [thh, tmm, tss] = tm
+        else:
+            return None
+    else:
+        return None
+    try:
+        yy = int(yy)
+        dd = int(dd)
+        thh = int(thh)
+        tmm = int(tmm)
+        tss = int(tss)
+    except ValueError:
+        return None
+    # Check for a yy specified in two-digit format, then convert it to the
+    # appropriate four-digit format, according to the POSIX standard. RFC 822
+    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+    # mandates a 4-digit yy. For more information, see the documentation for
+    # the time module.
+    if yy < 100:
+        # The year is between 1969 and 1999 (inclusive).
+        if yy > 68:
+            yy += 1900
+        # The year is between 2000 and 2068 (inclusive).
+        else:
+            yy += 2000
+    tzoffset = None
+    tz = tz.upper()
+    if tz in _timezones:
+        tzoffset = _timezones[tz]
+    else:
+        try:
+            tzoffset = int(tz)
+        except ValueError:
+            pass
+        if tzoffset==0 and tz.startswith('-'):
+            tzoffset = None
+    # Convert a timezone offset into seconds ; -0500 -> -18000
+    if tzoffset:
+        if tzoffset < 0:
+            tzsign = -1
+            tzoffset = -tzoffset
+        else:
+            tzsign = 1
+        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+    # Daylight Saving Time flag is set to -1, since DST is unknown.
+    return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+
+
+def parsedate(data):
+    """Convert a time string to a time tuple."""
+    t = parsedate_tz(data)
+    if isinstance(t, tuple):
+        return t[:9]
+    else:
+        return t
+
+
+def mktime_tz(data):
+    """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+    if data[9] is None:
+        # No zone info, so localtime is better assumption than GMT
+        return time.mktime(data[:8] + (-1,))
+    else:
+        t = calendar.timegm(data)
+        return t - data[9]
+
+
+def quote(str):
+    """Prepare string to be used in a quoted string.
+
+    Turns backslash and double quote characters into quoted pairs.  These
+    are the only characters that need to be quoted inside a quoted string.
+    Does not add the surrounding double quotes.
+    """
+    return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass:
+    """Address parser class by Ben Escoto.
+
+    To understand what this class does, it helps to have a copy of RFC 2822 in
+    front of you.
+
+    Note: this class interface is deprecated and may be removed in the future.
+    Use email.utils.AddressList instead.
+    """
+
+    def __init__(self, field):
+        """Initialize a new instance.
+
+        `field' is an unparsed address header field, containing
+        one or more addresses.
+        """
+        self.specials = '()<>@,:;.\"[]'
+        self.pos = 0
+        self.LWS = ' \t'
+        self.CR = '\r\n'
+        self.FWS = self.LWS + self.CR
+        self.atomends = self.specials + self.LWS + self.CR
+        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
+        # syntax, so allow dots in phrases.
+        self.phraseends = self.atomends.replace('.', '')
+        self.field = field
+        self.commentlist = []
+
+    def gotonext(self):
+        """Skip white space and extract comments."""
+        wslist = []
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS + '\n\r':
+                if self.field[self.pos] not in '\n\r':
+                    wslist.append(self.field[self.pos])
+                self.pos += 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            else:
+                break
+        return EMPTYSTRING.join(wslist)
+
+    def getaddrlist(self):
+        """Parse all addresses.
+
+        Returns a list containing all of the addresses.
+        """
+        result = []
+        while self.pos < len(self.field):
+            ad = self.getaddress()
+            if ad:
+                result += ad
+            else:
+                result.append(('', ''))
+        return result
+
+    def getaddress(self):
+        """Parse the next address."""
+        self.commentlist = []
+        self.gotonext()
+
+        oldpos = self.pos
+        oldcl = self.commentlist
+        plist = self.getphraselist()
+
+        self.gotonext()
+        returnlist = []
+
+        if self.pos >= len(self.field):
+            # Bad email address technically, no domain.
+            if plist:
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+        elif self.field[self.pos] in '.@':
+            # email address is just an addrspec
+            # this isn't very efficient since we start over
+            self.pos = oldpos
+            self.commentlist = oldcl
+            addrspec = self.getaddrspec()
+            returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+        elif self.field[self.pos] == ':':
+            # address is a group
+            returnlist = []
+
+            fieldlen = len(self.field)
+            self.pos += 1
+            while self.pos < len(self.field):
+                self.gotonext()
+                if self.pos < fieldlen and self.field[self.pos] == ';':
+                    self.pos += 1
+                    break
+                returnlist = returnlist + self.getaddress()
+
+        elif self.field[self.pos] == '<':
+            # Address is a phrase then a route addr
+            routeaddr = self.getrouteaddr()
+
+            if self.commentlist:
+                returnlist = [(SPACE.join(plist) + ' (' +
+                               ' '.join(self.commentlist) + ')', routeaddr)]
+            else:
+                returnlist = [(SPACE.join(plist), routeaddr)]
+
+        else:
+            if plist:
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
+            elif self.field[self.pos] in self.specials:
+                self.pos += 1
+
+        self.gotonext()
+        if self.pos < len(self.field) and self.field[self.pos] == ',':
+            self.pos += 1
+        return returnlist
+
+    def getrouteaddr(self):
+        """Parse a route address (Return-path value).
+
+        This method just skips all the route stuff and returns the addrspec.
+        """
+        if self.field[self.pos] != '<':
+            return
+
+        expectroute = False
+        self.pos += 1
+        self.gotonext()
+        adlist = ''
+        while self.pos < len(self.field):
+            if expectroute:
+                self.getdomain()
+                expectroute = False
+            elif self.field[self.pos] == '>':
+                self.pos += 1
+                break
+            elif self.field[self.pos] == '@':
+                self.pos += 1
+                expectroute = True
+            elif self.field[self.pos] == ':':
+                self.pos += 1
+            else:
+                adlist = self.getaddrspec()
+                self.pos += 1
+                break
+            self.gotonext()
+
+        return adlist
+
+    def getaddrspec(self):
+        """Parse an RFC 2822 addr-spec."""
+        aslist = []
+
+        self.gotonext()
+        while self.pos < len(self.field):
+            preserve_ws = True
+            if self.field[self.pos] == '.':
+                if aslist and not aslist[-1].strip():
+                    aslist.pop()
+                aslist.append('.')
+                self.pos += 1
+                preserve_ws = False
+            elif self.field[self.pos] == '"':
+                aslist.append('"%s"' % quote(self.getquote()))
+            elif self.field[self.pos] in self.atomends:
+                if aslist and not aslist[-1].strip():
+                    aslist.pop()
+                break
+            else:
+                aslist.append(self.getatom())
+            ws = self.gotonext()
+            if preserve_ws and ws:
+                aslist.append(ws)
+
+        if self.pos >= len(self.field) or self.field[self.pos] != '@':
+            return EMPTYSTRING.join(aslist)
+
+        aslist.append('@')
+        self.pos += 1
+        self.gotonext()
+        domain = self.getdomain()
+        if not domain:
+            # Invalid domain, return an empty address instead of returning a
+            # local part to denote failed parsing.
+            return EMPTYSTRING
+        return EMPTYSTRING.join(aslist) + domain
+
+    def getdomain(self):
+        """Get the complete domain name from an address."""
+        sdlist = []
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.LWS:
+                self.pos += 1
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] == '[':
+                sdlist.append(self.getdomainliteral())
+            elif self.field[self.pos] == '.':
+                self.pos += 1
+                sdlist.append('.')
+            elif self.field[self.pos] == '@':
+                # bpo-34155: Don't parse domains with two `@` like
+                # `a@malicious.org@important.com`.
+                return EMPTYSTRING
+            elif self.field[self.pos] in self.atomends:
+                break
+            else:
+                sdlist.append(self.getatom())
+        return EMPTYSTRING.join(sdlist)
+
+    def getdelimited(self, beginchar, endchars, allowcomments=True):
+        """Parse a header fragment delimited by special characters.
+
+        `beginchar' is the start character for the fragment.
+        If self is not looking at an instance of `beginchar' then
+        getdelimited returns the empty string.
+
+        `endchars' is a sequence of allowable end-delimiting characters.
+        Parsing stops when one of these is encountered.
+
+        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+        within the parsed fragment.
+        """
+        if self.field[self.pos] != beginchar:
+            return ''
+
+        slist = ['']
+        quote = False
+        self.pos += 1
+        while self.pos < len(self.field):
+            if quote:
+                slist.append(self.field[self.pos])
+                quote = False
+            elif self.field[self.pos] in endchars:
+                self.pos += 1
+                break
+            elif allowcomments and self.field[self.pos] == '(':
+                slist.append(self.getcomment())
+                continue        # have already advanced pos from getcomment
+            elif self.field[self.pos] == '\\':
+                quote = True
+            else:
+                slist.append(self.field[self.pos])
+            self.pos += 1
+
+        return EMPTYSTRING.join(slist)
+
+    def getquote(self):
+        """Get a quote-delimited fragment from self's field."""
+        return self.getdelimited('"', '"\r', False)
+
+    def getcomment(self):
+        """Get a parenthesis-delimited fragment from self's field."""
+        return self.getdelimited('(', ')\r', True)
+
+    def getdomainliteral(self):
+        """Parse an RFC 2822 domain-literal."""
+        return '[%s]' % self.getdelimited('[', ']\r', False)
+
+    def getatom(self, atomends=None):
+        """Parse an RFC 2822 atom.
+
+        Optional atomends specifies a different set of end token delimiters
+        (the default is to use self.atomends).  This is used e.g. in
+        getphraselist() since phrase endings must not include the `.' (which
+        is legal in phrases)."""
+        atomlist = ['']
+        if atomends is None:
+            atomends = self.atomends
+
+        while self.pos < len(self.field):
+            if self.field[self.pos] in atomends:
+                break
+            else:
+                atomlist.append(self.field[self.pos])
+            self.pos += 1
+
+        return EMPTYSTRING.join(atomlist)
+
+    def getphraselist(self):
+        """Parse a sequence of RFC 2822 phrases.
+
+        A phrase is a sequence of words, which are in turn either RFC 2822
+        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
+        runs of continuous whitespace into one space.
+        """
+        plist = []
+
+        while self.pos < len(self.field):
+            if self.field[self.pos] in self.FWS:
+                self.pos += 1
+            elif self.field[self.pos] == '"':
+                plist.append(self.getquote())
+            elif self.field[self.pos] == '(':
+                self.commentlist.append(self.getcomment())
+            elif self.field[self.pos] in self.phraseends:
+                break
+            else:
+                plist.append(self.getatom(self.phraseends))
+
+        return plist
+
+class AddressList(AddrlistClass):
+    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+    def __init__(self, field):
+        AddrlistClass.__init__(self, field)
+        if field:
+            self.addresslist = self.getaddrlist()
+        else:
+            self.addresslist = []
+
+    def __len__(self):
+        return len(self.addresslist)
+
+    def __add__(self, other):
+        # Set union
+        newaddr = AddressList(None)
+        newaddr.addresslist = self.addresslist[:]
+        for x in other.addresslist:
+            if not x in self.addresslist:
+                newaddr.addresslist.append(x)
+        return newaddr
+
+    def __iadd__(self, other):
+        # Set union, in-place
+        for x in other.addresslist:
+            if not x in self.addresslist:
+                self.addresslist.append(x)
+        return self
+
+    def __sub__(self, other):
+        # Set difference
+        newaddr = AddressList(None)
+        for x in self.addresslist:
+            if not x in other.addresslist:
+                newaddr.addresslist.append(x)
+        return newaddr
+
+    def __isub__(self, other):
+        # Set difference, in-place
+        for x in other.addresslist:
+            if x in self.addresslist:
+                self.addresslist.remove(x)
+        return self
+
+    def __getitem__(self, index):
+        # Make indexing, slices, and 'in' work
+        return self.addresslist[index]
author	AlexSm <alex@ydb.tech>	2024-03-05 10:40:59 +0100
committer	GitHub <noreply@github.com>	2024-03-05 12:40:59 +0300
commit	1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree	07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/email/_parseaddr.py
parent	ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download	ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz