add kikimr_configure

author: shmel1k <shmel1k@ydb.tech> 2023-11-26 18:16:14 +0300
committer: shmel1k <shmel1k@ydb.tech> 2023-11-26 18:43:30 +0300
commit: b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch)
tree: 218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py2/twisted/web/sux.py
parent: 523f645a83a0ec97a0332dbc3863bb354c92a328 (diff)
download: ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz
1 files changed, 637 insertions, 0 deletions
diff --git a/contrib/python/Twisted/py2/twisted/web/sux.py b/contrib/python/Twisted/py2/twisted/web/sux.py
new file mode 100644
index 0000000000..6d248d3aa1
--- /dev/null
+++ b/contrib/python/Twisted/py2/twisted/web/sux.py
@@ -0,0 +1,637 @@
+# -*- test-case-name: twisted.web.test.test_xml -*-
+#
+# Copyright (c) Twisted Matrix Laboratories.
+# See LICENSE for details.
+
+
+"""
+*S*mall, *U*ncomplicated *X*ML.
+
+This is a very simple implementation of XML/HTML as a network
+protocol.  It is not at all clever.  Its main features are that it
+does not:
+
+  - support namespaces
+  - mung mnemonic entity references
+  - validate
+  - perform *any* external actions (such as fetching URLs or writing files)
+    under *any* circumstances
+  - has lots and lots of horrible hacks for supporting broken HTML (as an
+    option, they're not on by default).
+"""
+
+from __future__ import print_function
+
+from twisted.internet.protocol import Protocol
+from twisted.python.compat import unicode
+from twisted.python.reflect import prefixedMethodNames
+
+
+
+# Elements of the three-tuples in the state table.
+BEGIN_HANDLER = 0
+DO_HANDLER = 1
+END_HANDLER = 2
+
+identChars = '.-_:'
+lenientIdentChars = identChars + ';+#/%~'
+
+def nop(*args, **kw):
+    "Do nothing."
+
+
+def unionlist(*args):
+    l = []
+    for x in args:
+        l.extend(x)
+    d = dict([(x, 1) for x in l])
+    return d.keys()
+
+
+def zipfndict(*args, **kw):
+    default = kw.get('default', nop)
+    d = {}
+    for key in unionlist(*[fndict.keys() for fndict in args]):
+        d[key] = tuple([x.get(key, default) for x in args])
+    return d
+
+
+def prefixedMethodClassDict(clazz, prefix):
+    return dict([(name, getattr(clazz, prefix + name)) for name in prefixedMethodNames(clazz, prefix)])
+
+
+def prefixedMethodObjDict(obj, prefix):
+    return dict([(name, getattr(obj, prefix + name)) for name in prefixedMethodNames(obj.__class__, prefix)])
+
+
+class ParseError(Exception):
+
+    def __init__(self, filename, line, col, message):
+        self.filename = filename
+        self.line = line
+        self.col = col
+        self.message = message
+
+    def __str__(self):
+       return "%s:%s:%s: %s" % (self.filename, self.line, self.col,
+                                self.message)
+
+class XMLParser(Protocol):
+
+    state = None
+    encodings = None
+    filename = "<xml />"
+    beExtremelyLenient = 0
+    _prepend = None
+
+    # _leadingBodyData will sometimes be set before switching to the
+    # 'bodydata' state, when we "accidentally" read a byte of bodydata
+    # in a different state.
+    _leadingBodyData = None
+
+    def connectionMade(self):
+        self.lineno = 1
+        self.colno = 0
+        self.encodings = []
+
+    def saveMark(self):
+        '''Get the line number and column of the last character parsed'''
+        # This gets replaced during dataReceived, restored afterwards
+        return (self.lineno, self.colno)
+
+    def _parseError(self, message):
+        raise ParseError(*((self.filename,)+self.saveMark()+(message,)))
+
+    def _buildStateTable(self):
+        '''Return a dictionary of begin, do, end state function tuples'''
+        # _buildStateTable leaves something to be desired but it does what it
+        # does.. probably slowly, so I'm doing some evil caching so it doesn't
+        # get called more than once per class.
+        stateTable = getattr(self.__class__, '__stateTable', None)
+        if stateTable is None:
+            stateTable = self.__class__.__stateTable = zipfndict(
+                *[prefixedMethodObjDict(self, prefix)
+                  for prefix in ('begin_', 'do_', 'end_')])
+        return stateTable
+
+    def _decode(self, data):
+        if 'UTF-16' in self.encodings or 'UCS-2' in self.encodings:
+            assert not len(data) & 1, 'UTF-16 must come in pairs for now'
+        if self._prepend:
+            data = self._prepend + data
+        for encoding in self.encodings:
+            data = unicode(data, encoding)
+        return data
+
+    def maybeBodyData(self):
+        if self.endtag:
+            return 'bodydata'
+
+        # Get ready for fun! We're going to allow
+        # <script>if (foo < bar)</script> to work!
+        # We do this by making everything between <script> and
+        # </script> a Text
+        # BUT <script src="foo"> will be special-cased to do regular,
+        # lenient behavior, because those may not have </script>
+        # -radix
+
+        if (self.tagName == 'script' and 'src' not in self.tagAttributes):
+            # we do this ourselves rather than having begin_waitforendscript
+            # because that can get called multiple times and we don't want
+            # bodydata to get reset other than the first time.
+            self.begin_bodydata(None)
+            return 'waitforendscript'
+        return 'bodydata'
+
+
+
+    def dataReceived(self, data):
+        stateTable = self._buildStateTable()
+        if not self.state:
+            # all UTF-16 starts with this string
+            if data.startswith((b'\xff\xfe', b'\xfe\xff')):
+                self._prepend = data[0:2]
+                self.encodings.append('UTF-16')
+                data = data[2:]
+            self.state = 'begin'
+        if self.encodings:
+            data = self._decode(data)
+        else:
+            data = data.decode("utf-8")
+        # bring state, lineno, colno into local scope
+        lineno, colno = self.lineno, self.colno
+        curState = self.state
+        # replace saveMark with a nested scope function
+        _saveMark = self.saveMark
+        def saveMark():
+            return (lineno, colno)
+        self.saveMark = saveMark
+        # fetch functions from the stateTable
+        beginFn, doFn, endFn = stateTable[curState]
+        try:
+            for byte in data:
+                # do newline stuff
+                if byte == u'\n':
+                    lineno += 1
+                    colno = 0
+                else:
+                    colno += 1
+                newState = doFn(byte)
+                if newState is not None and newState != curState:
+                    # this is the endFn from the previous state
+                    endFn()
+                    curState = newState
+                    beginFn, doFn, endFn = stateTable[curState]
+                    beginFn(byte)
+        finally:
+            self.saveMark = _saveMark
+            self.lineno, self.colno = lineno, colno
+        # state doesn't make sense if there's an exception..
+        self.state = curState
+
+
+    def connectionLost(self, reason):
+        """
+        End the last state we were in.
+        """
+        stateTable = self._buildStateTable()
+        stateTable[self.state][END_HANDLER]()
+
+
+    # state methods
+
+    def do_begin(self, byte):
+        if byte.isspace():
+            return
+        if byte != '<':
+            if self.beExtremelyLenient:
+                self._leadingBodyData = byte
+                return 'bodydata'
+            self._parseError("First char of document [%r] wasn't <" % (byte,))
+        return 'tagstart'
+
+    def begin_comment(self, byte):
+        self.commentbuf = ''
+
+    def do_comment(self, byte):
+        self.commentbuf += byte
+        if self.commentbuf.endswith('-->'):
+            self.gotComment(self.commentbuf[:-3])
+            return 'bodydata'
+
+    def begin_tagstart(self, byte):
+        self.tagName = ''               # name of the tag
+        self.tagAttributes = {}         # attributes of the tag
+        self.termtag = 0                # is the tag self-terminating
+        self.endtag = 0
+
+    def do_tagstart(self, byte):
+        if byte.isalnum() or byte in identChars:
+            self.tagName += byte
+            if self.tagName == '!--':
+                return 'comment'
+        elif byte.isspace():
+            if self.tagName:
+                if self.endtag:
+                    # properly strict thing to do here is probably to only
+                    # accept whitespace
+                    return 'waitforgt'
+                return 'attrs'
+            else:
+                self._parseError("Whitespace before tag-name")
+        elif byte == '>':
+            if self.endtag:
+                self.gotTagEnd(self.tagName)
+                return 'bodydata'
+            else:
+                self.gotTagStart(self.tagName, {})
+                return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData()
+        elif byte == '/':
+            if self.tagName:
+                return 'afterslash'
+            else:
+                self.endtag = 1
+        elif byte in '!?':
+            if self.tagName:
+                if not self.beExtremelyLenient:
+                    self._parseError("Invalid character in tag-name")
+            else:
+                self.tagName += byte
+                self.termtag = 1
+        elif byte == '[':
+            if self.tagName == '!':
+                return 'expectcdata'
+            else:
+                self._parseError("Invalid '[' in tag-name")
+        else:
+            if self.beExtremelyLenient:
+                self.bodydata = '<'
+                return 'unentity'
+            self._parseError('Invalid tag character: %r'% byte)
+
+    def begin_unentity(self, byte):
+        self.bodydata += byte
+
+    def do_unentity(self, byte):
+        self.bodydata += byte
+        return 'bodydata'
+
+    def end_unentity(self):
+        self.gotText(self.bodydata)
+
+    def begin_expectcdata(self, byte):
+        self.cdatabuf = byte
+
+    def do_expectcdata(self, byte):
+        self.cdatabuf += byte
+        cdb = self.cdatabuf
+        cd = '[CDATA['
+        if len(cd) > len(cdb):
+            if cd.startswith(cdb):
+                return
+            elif self.beExtremelyLenient:
+                ## WHAT THE CRAP!?  MSWord9 generates HTML that includes these
+                ## bizarre <![if !foo]> <![endif]> chunks, so I've gotta ignore
+                ## 'em as best I can.  this should really be a separate parse
+                ## state but I don't even have any idea what these _are_.
+                return 'waitforgt'
+            else:
+                self._parseError("Mal-formed CDATA header")
+        if cd == cdb:
+            self.cdatabuf = ''
+            return 'cdata'
+        self._parseError("Mal-formed CDATA header")
+
+    def do_cdata(self, byte):
+        self.cdatabuf += byte
+        if self.cdatabuf.endswith("]]>"):
+            self.cdatabuf = self.cdatabuf[:-3]
+            return 'bodydata'
+
+    def end_cdata(self):
+        self.gotCData(self.cdatabuf)
+        self.cdatabuf = ''
+
+    def do_attrs(self, byte):
+        if byte.isalnum() or byte in identChars:
+            # XXX FIXME really handle !DOCTYPE at some point
+            if self.tagName == '!DOCTYPE':
+                return 'doctype'
+            if self.tagName[0] in '!?':
+                return 'waitforgt'
+            return 'attrname'
+        elif byte.isspace():
+            return
+        elif byte == '>':
+            self.gotTagStart(self.tagName, self.tagAttributes)
+            return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData()
+        elif byte == '/':
+            return 'afterslash'
+        elif self.beExtremelyLenient:
+            # discard and move on?  Only case I've seen of this so far was:
+            # <foo bar="baz"">
+            return
+        self._parseError("Unexpected character: %r" % byte)
+
+    def begin_doctype(self, byte):
+        self.doctype = byte
+
+    def do_doctype(self, byte):
+        if byte == '>':
+            return 'bodydata'
+        self.doctype += byte
+
+    def end_doctype(self):
+        self.gotDoctype(self.doctype)
+        self.doctype = None
+
+    def do_waitforgt(self, byte):
+        if byte == '>':
+            if self.endtag or not self.beExtremelyLenient:
+                return 'bodydata'
+            return self.maybeBodyData()
+
+    def begin_attrname(self, byte):
+        self.attrname = byte
+        self._attrname_termtag = 0
+
+    def do_attrname(self, byte):
+        if byte.isalnum() or byte in identChars:
+            self.attrname += byte
+            return
+        elif byte == '=':
+            return 'beforeattrval'
+        elif byte.isspace():
+            return 'beforeeq'
+        elif self.beExtremelyLenient:
+            if byte in '"\'':
+                return 'attrval'
+            if byte in lenientIdentChars or byte.isalnum():
+                self.attrname += byte
+                return
+            if byte == '/':
+                self._attrname_termtag = 1
+                return
+            if byte == '>':
+                self.attrval = 'True'
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                if self._attrname_termtag:
+                    self.gotTagEnd(self.tagName)
+                    return 'bodydata'
+                return self.maybeBodyData()
+            # something is really broken. let's leave this attribute where it
+            # is and move on to the next thing
+            return
+        self._parseError("Invalid attribute name: %r %r" % (self.attrname, byte))
+
+    def do_beforeattrval(self, byte):
+        if byte in '"\'':
+            return 'attrval'
+        elif byte.isspace():
+            return
+        elif self.beExtremelyLenient:
+            if byte in lenientIdentChars or byte.isalnum():
+                return 'messyattr'
+            if byte == '>':
+                self.attrval = 'True'
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                return self.maybeBodyData()
+            if byte == '\\':
+                # I saw this in actual HTML once:
+                # <font size=\"3\"><sup>SM</sup></font>
+                return
+        self._parseError("Invalid initial attribute value: %r; Attribute values must be quoted." % byte)
+
+    attrname = ''
+    attrval = ''
+
+    def begin_beforeeq(self,byte):
+        self._beforeeq_termtag = 0
+
+    def do_beforeeq(self, byte):
+        if byte == '=':
+            return 'beforeattrval'
+        elif byte.isspace():
+            return
+        elif self.beExtremelyLenient:
+            if byte.isalnum() or byte in identChars:
+                self.attrval = 'True'
+                self.tagAttributes[self.attrname] = self.attrval
+                return 'attrname'
+            elif byte == '>':
+                self.attrval = 'True'
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                if self._beforeeq_termtag:
+                    self.gotTagEnd(self.tagName)
+                    return 'bodydata'
+                return self.maybeBodyData()
+            elif byte == '/':
+                self._beforeeq_termtag = 1
+                return
+        self._parseError("Invalid attribute")
+
+    def begin_attrval(self, byte):
+        self.quotetype = byte
+        self.attrval = ''
+
+    def do_attrval(self, byte):
+        if byte == self.quotetype:
+            return 'attrs'
+        self.attrval += byte
+
+    def end_attrval(self):
+        self.tagAttributes[self.attrname] = self.attrval
+        self.attrname = self.attrval = ''
+
+    def begin_messyattr(self, byte):
+        self.attrval = byte
+
+    def do_messyattr(self, byte):
+        if byte.isspace():
+            return 'attrs'
+        elif byte == '>':
+            endTag = 0
+            if self.attrval.endswith('/'):
+                endTag = 1
+                self.attrval = self.attrval[:-1]
+            self.tagAttributes[self.attrname] = self.attrval
+            self.gotTagStart(self.tagName, self.tagAttributes)
+            if endTag:
+                self.gotTagEnd(self.tagName)
+                return 'bodydata'
+            return self.maybeBodyData()
+        else:
+            self.attrval += byte
+
+    def end_messyattr(self):
+        if self.attrval:
+            self.tagAttributes[self.attrname] = self.attrval
+
+    def begin_afterslash(self, byte):
+        self._after_slash_closed = 0
+
+    def do_afterslash(self, byte):
+        # this state is only after a self-terminating slash, e.g. <foo/>
+        if self._after_slash_closed:
+            self._parseError("Mal-formed")#XXX When does this happen??
+        if byte != '>':
+            if self.beExtremelyLenient:
+                return
+            else:
+                self._parseError("No data allowed after '/'")
+        self._after_slash_closed = 1
+        self.gotTagStart(self.tagName, self.tagAttributes)
+        self.gotTagEnd(self.tagName)
+        # don't need maybeBodyData here because there better not be
+        # any javascript code after a <script/>... we'll see :(
+        return 'bodydata'
+
+    def begin_bodydata(self, byte):
+        if self._leadingBodyData:
+            self.bodydata = self._leadingBodyData
+            del self._leadingBodyData
+        else:
+            self.bodydata = ''
+
+    def do_bodydata(self, byte):
+        if byte == '<':
+            return 'tagstart'
+        if byte == '&':
+            return 'entityref'
+        self.bodydata += byte
+
+    def end_bodydata(self):
+        self.gotText(self.bodydata)
+        self.bodydata = ''
+
+    def do_waitforendscript(self, byte):
+        if byte == '<':
+            return 'waitscriptendtag'
+        self.bodydata += byte
+
+    def begin_waitscriptendtag(self, byte):
+        self.temptagdata = ''
+        self.tagName = ''
+        self.endtag = 0
+
+    def do_waitscriptendtag(self, byte):
+        # 1 enforce / as first byte read
+        # 2 enforce following bytes to be subset of "script" until
+        #   tagName == "script"
+        #   2a when that happens, gotText(self.bodydata) and gotTagEnd(self.tagName)
+        # 3 spaces can happen anywhere, they're ignored
+        #   e.g. < / script >
+        # 4 anything else causes all data I've read to be moved to the
+        #   bodydata, and switch back to waitforendscript state
+
+        # If it turns out this _isn't_ a </script>, we need to
+        # remember all the data we've been through so we can append it
+        # to bodydata
+        self.temptagdata += byte
+
+        # 1
+        if byte == '/':
+            self.endtag = True
+        elif not self.endtag:
+            self.bodydata += "<" + self.temptagdata
+            return 'waitforendscript'
+        # 2
+        elif byte.isalnum() or byte in identChars:
+            self.tagName += byte
+            if not 'script'.startswith(self.tagName):
+                self.bodydata += "<" + self.temptagdata
+                return 'waitforendscript'
+            elif self.tagName == 'script':
+                self.gotText(self.bodydata)
+                self.gotTagEnd(self.tagName)
+                return 'waitforgt'
+        # 3
+        elif byte.isspace():
+            return 'waitscriptendtag'
+        # 4
+        else:
+            self.bodydata += "<" + self.temptagdata
+            return 'waitforendscript'
+
+
+    def begin_entityref(self, byte):
+        self.erefbuf = ''
+        self.erefextra = '' # extra bit for lenient mode
+
+    def do_entityref(self, byte):
+        if byte.isspace() or byte == "<":
+            if self.beExtremelyLenient:
+                # '&foo' probably was '&amp;foo'
+                if self.erefbuf and self.erefbuf != "amp":
+                    self.erefextra = self.erefbuf
+                self.erefbuf = "amp"
+                if byte == "<":
+                    return "tagstart"
+                else:
+                    self.erefextra += byte
+                    return 'spacebodydata'
+            self._parseError("Bad entity reference")
+        elif byte != ';':
+            self.erefbuf += byte
+        else:
+            return 'bodydata'
+
+    def end_entityref(self):
+        self.gotEntityReference(self.erefbuf)
+
+    # hacky support for space after & in entityref in beExtremelyLenient
+    # state should only happen in that case
+    def begin_spacebodydata(self, byte):
+        self.bodydata = self.erefextra
+        self.erefextra = None
+    do_spacebodydata = do_bodydata
+    end_spacebodydata = end_bodydata
+
+    # Sorta SAX-ish API
+
+    def gotTagStart(self, name, attributes):
+        '''Encountered an opening tag.
+
+        Default behaviour is to print.'''
+        print('begin', name, attributes)
+
+    def gotText(self, data):
+        '''Encountered text
+
+        Default behaviour is to print.'''
+        print('text:', repr(data))
+
+    def gotEntityReference(self, entityRef):
+        '''Encountered mnemonic entity reference
+
+        Default behaviour is to print.'''
+        print('entityRef: &%s;' % entityRef)
+
+    def gotComment(self, comment):
+        '''Encountered comment.
+
+        Default behaviour is to ignore.'''
+        pass
+
+    def gotCData(self, cdata):
+        '''Encountered CDATA
+
+        Default behaviour is to call the gotText method'''
+        self.gotText(cdata)
+
+    def gotDoctype(self, doctype):
+        """Encountered DOCTYPE
+
+        This is really grotty: it basically just gives you everything between
+        '<!DOCTYPE' and '>' as an argument.
+        """
+        print('!DOCTYPE', repr(doctype))
+
+    def gotTagEnd(self, name):
+        '''Encountered closing tag
+
+        Default behaviour is to print.'''
+        print('end', name)
author	shmel1k <shmel1k@ydb.tech>	2023-11-26 18:16:14 +0300
committer	shmel1k <shmel1k@ydb.tech>	2023-11-26 18:43:30 +0300
commit	b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch)
tree	218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py2/twisted/web/sux.py
parent	523f645a83a0ec97a0332dbc3863bb354c92a328 (diff)
download	ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz