add kikimr_configure

author: shmel1k <shmel1k@ydb.tech> 2023-11-26 18:16:14 +0300
committer: shmel1k <shmel1k@ydb.tech> 2023-11-26 18:43:30 +0300
commit: b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch)
tree: 218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py3/twisted/web/sux.py
parent: 523f645a83a0ec97a0332dbc3863bb354c92a328 (diff)
download: ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz
1 files changed, 644 insertions, 0 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/sux.py b/contrib/python/Twisted/py3/twisted/web/sux.py
new file mode 100644
index 0000000000..69ad4dff95
--- /dev/null
+++ b/contrib/python/Twisted/py3/twisted/web/sux.py
@@ -0,0 +1,644 @@
+# -*- test-case-name: twisted.web.test.test_xml -*-
+#
+# Copyright (c) Twisted Matrix Laboratories.
+# See LICENSE for details.
+
+
+"""
+*S*mall, *U*ncomplicated *X*ML.
+
+This is a very simple implementation of XML/HTML as a network
+protocol.  It is not at all clever.  Its main features are that it
+does not:
+
+  - support namespaces
+  - mung mnemonic entity references
+  - validate
+  - perform *any* external actions (such as fetching URLs or writing files)
+    under *any* circumstances
+  - has lots and lots of horrible hacks for supporting broken HTML (as an
+    option, they're not on by default).
+"""
+
+
+from twisted.internet.protocol import Protocol
+from twisted.python.reflect import prefixedMethodNames
+
+# Elements of the three-tuples in the state table.
+BEGIN_HANDLER = 0
+DO_HANDLER = 1
+END_HANDLER = 2
+
+identChars = ".-_:"
+lenientIdentChars = identChars + ";+#/%~"
+
+
+def nop(*args, **kw):
+    "Do nothing."
+
+
+def unionlist(*args):
+    l = []
+    for x in args:
+        l.extend(x)
+    d = {x: 1 for x in l}
+    return d.keys()
+
+
+def zipfndict(*args, **kw):
+    default = kw.get("default", nop)
+    d = {}
+    for key in unionlist(*(fndict.keys() for fndict in args)):
+        d[key] = tuple(x.get(key, default) for x in args)
+    return d
+
+
+def prefixedMethodClassDict(clazz, prefix):
+    return {
+        name: getattr(clazz, prefix + name)
+        for name in prefixedMethodNames(clazz, prefix)
+    }
+
+
+def prefixedMethodObjDict(obj, prefix):
+    return {
+        name: getattr(obj, prefix + name)
+        for name in prefixedMethodNames(obj.__class__, prefix)
+    }
+
+
+class ParseError(Exception):
+    def __init__(self, filename, line, col, message):
+        self.filename = filename
+        self.line = line
+        self.col = col
+        self.message = message
+
+    def __str__(self) -> str:
+        return f"{self.filename}:{self.line}:{self.col}: {self.message}"
+
+
+class XMLParser(Protocol):
+    state = None
+    encodings = None
+    filename = "<xml />"
+    beExtremelyLenient = 0
+    _prepend = None
+
+    # _leadingBodyData will sometimes be set before switching to the
+    # 'bodydata' state, when we "accidentally" read a byte of bodydata
+    # in a different state.
+    _leadingBodyData = None
+
+    def connectionMade(self):
+        self.lineno = 1
+        self.colno = 0
+        self.encodings = []
+
+    def saveMark(self):
+        """Get the line number and column of the last character parsed"""
+        # This gets replaced during dataReceived, restored afterwards
+        return (self.lineno, self.colno)
+
+    def _parseError(self, message):
+        raise ParseError(*((self.filename,) + self.saveMark() + (message,)))
+
+    def _buildStateTable(self):
+        """Return a dictionary of begin, do, end state function tuples"""
+        # _buildStateTable leaves something to be desired but it does what it
+        # does.. probably slowly, so I'm doing some evil caching so it doesn't
+        # get called more than once per class.
+        stateTable = getattr(self.__class__, "__stateTable", None)
+        if stateTable is None:
+            stateTable = self.__class__.__stateTable = zipfndict(
+                *(
+                    prefixedMethodObjDict(self, prefix)
+                    for prefix in ("begin_", "do_", "end_")
+                )
+            )
+        return stateTable
+
+    def _decode(self, data):
+        if "UTF-16" in self.encodings or "UCS-2" in self.encodings:
+            assert not len(data) & 1, "UTF-16 must come in pairs for now"
+        if self._prepend:
+            data = self._prepend + data
+        for encoding in self.encodings:
+            data = str(data, encoding)
+        return data
+
+    def maybeBodyData(self):
+        if self.endtag:
+            return "bodydata"
+
+        # Get ready for fun! We're going to allow
+        # <script>if (foo < bar)</script> to work!
+        # We do this by making everything between <script> and
+        # </script> a Text
+        # BUT <script src="foo"> will be special-cased to do regular,
+        # lenient behavior, because those may not have </script>
+        # -radix
+
+        if self.tagName == "script" and "src" not in self.tagAttributes:
+            # we do this ourselves rather than having begin_waitforendscript
+            # because that can get called multiple times and we don't want
+            # bodydata to get reset other than the first time.
+            self.begin_bodydata(None)
+            return "waitforendscript"
+        return "bodydata"
+
+    def dataReceived(self, data):
+        stateTable = self._buildStateTable()
+        if not self.state:
+            # all UTF-16 starts with this string
+            if data.startswith((b"\xff\xfe", b"\xfe\xff")):
+                self._prepend = data[0:2]
+                self.encodings.append("UTF-16")
+                data = data[2:]
+            self.state = "begin"
+        if self.encodings:
+            data = self._decode(data)
+        else:
+            data = data.decode("utf-8")
+        # bring state, lineno, colno into local scope
+        lineno, colno = self.lineno, self.colno
+        curState = self.state
+        # replace saveMark with a nested scope function
+        _saveMark = self.saveMark
+
+        def saveMark():
+            return (lineno, colno)
+
+        self.saveMark = saveMark
+        # fetch functions from the stateTable
+        beginFn, doFn, endFn = stateTable[curState]
+        try:
+            for byte in data:
+                # do newline stuff
+                if byte == "\n":
+                    lineno += 1
+                    colno = 0
+                else:
+                    colno += 1
+                newState = doFn(byte)
+                if newState is not None and newState != curState:
+                    # this is the endFn from the previous state
+                    endFn()
+                    curState = newState
+                    beginFn, doFn, endFn = stateTable[curState]
+                    beginFn(byte)
+        finally:
+            self.saveMark = _saveMark
+            self.lineno, self.colno = lineno, colno
+        # state doesn't make sense if there's an exception..
+        self.state = curState
+
+    def connectionLost(self, reason):
+        """
+        End the last state we were in.
+        """
+        stateTable = self._buildStateTable()
+        stateTable[self.state][END_HANDLER]()
+
+    # state methods
+
+    def do_begin(self, byte):
+        if byte.isspace():
+            return
+        if byte != "<":
+            if self.beExtremelyLenient:
+                self._leadingBodyData = byte
+                return "bodydata"
+            self._parseError(f"First char of document [{byte!r}] wasn't <")
+        return "tagstart"
+
+    def begin_comment(self, byte):
+        self.commentbuf = ""
+
+    def do_comment(self, byte):
+        self.commentbuf += byte
+        if self.commentbuf.endswith("-->"):
+            self.gotComment(self.commentbuf[:-3])
+            return "bodydata"
+
+    def begin_tagstart(self, byte):
+        self.tagName = ""  # name of the tag
+        self.tagAttributes = {}  # attributes of the tag
+        self.termtag = 0  # is the tag self-terminating
+        self.endtag = 0
+
+    def do_tagstart(self, byte):
+        if byte.isalnum() or byte in identChars:
+            self.tagName += byte
+            if self.tagName == "!--":
+                return "comment"
+        elif byte.isspace():
+            if self.tagName:
+                if self.endtag:
+                    # properly strict thing to do here is probably to only
+                    # accept whitespace
+                    return "waitforgt"
+                return "attrs"
+            else:
+                self._parseError("Whitespace before tag-name")
+        elif byte == ">":
+            if self.endtag:
+                self.gotTagEnd(self.tagName)
+                return "bodydata"
+            else:
+                self.gotTagStart(self.tagName, {})
+                return (
+                    (not self.beExtremelyLenient) and "bodydata" or self.maybeBodyData()
+                )
+        elif byte == "/":
+            if self.tagName:
+                return "afterslash"
+            else:
+                self.endtag = 1
+        elif byte in "!?":
+            if self.tagName:
+                if not self.beExtremelyLenient:
+                    self._parseError("Invalid character in tag-name")
+            else:
+                self.tagName += byte
+                self.termtag = 1
+        elif byte == "[":
+            if self.tagName == "!":
+                return "expectcdata"
+            else:
+                self._parseError("Invalid '[' in tag-name")
+        else:
+            if self.beExtremelyLenient:
+                self.bodydata = "<"
+                return "unentity"
+            self._parseError("Invalid tag character: %r" % byte)
+
+    def begin_unentity(self, byte):
+        self.bodydata += byte
+
+    def do_unentity(self, byte):
+        self.bodydata += byte
+        return "bodydata"
+
+    def end_unentity(self):
+        self.gotText(self.bodydata)
+
+    def begin_expectcdata(self, byte):
+        self.cdatabuf = byte
+
+    def do_expectcdata(self, byte):
+        self.cdatabuf += byte
+        cdb = self.cdatabuf
+        cd = "[CDATA["
+        if len(cd) > len(cdb):
+            if cd.startswith(cdb):
+                return
+            elif self.beExtremelyLenient:
+                ## WHAT THE CRAP!?  MSWord9 generates HTML that includes these
+                ## bizarre <![if !foo]> <![endif]> chunks, so I've gotta ignore
+                ## 'em as best I can.  this should really be a separate parse
+                ## state but I don't even have any idea what these _are_.
+                return "waitforgt"
+            else:
+                self._parseError("Mal-formed CDATA header")
+        if cd == cdb:
+            self.cdatabuf = ""
+            return "cdata"
+        self._parseError("Mal-formed CDATA header")
+
+    def do_cdata(self, byte):
+        self.cdatabuf += byte
+        if self.cdatabuf.endswith("]]>"):
+            self.cdatabuf = self.cdatabuf[:-3]
+            return "bodydata"
+
+    def end_cdata(self):
+        self.gotCData(self.cdatabuf)
+        self.cdatabuf = ""
+
+    def do_attrs(self, byte):
+        if byte.isalnum() or byte in identChars:
+            # XXX FIXME really handle !DOCTYPE at some point
+            if self.tagName == "!DOCTYPE":
+                return "doctype"
+            if self.tagName[0] in "!?":
+                return "waitforgt"
+            return "attrname"
+        elif byte.isspace():
+            return
+        elif byte == ">":
+            self.gotTagStart(self.tagName, self.tagAttributes)
+            return (not self.beExtremelyLenient) and "bodydata" or self.maybeBodyData()
+        elif byte == "/":
+            return "afterslash"
+        elif self.beExtremelyLenient:
+            # discard and move on?  Only case I've seen of this so far was:
+            # <foo bar="baz"">
+            return
+        self._parseError("Unexpected character: %r" % byte)
+
+    def begin_doctype(self, byte):
+        self.doctype = byte
+
+    def do_doctype(self, byte):
+        if byte == ">":
+            return "bodydata"
+        self.doctype += byte
+
+    def end_doctype(self):
+        self.gotDoctype(self.doctype)
+        self.doctype = None
+
+    def do_waitforgt(self, byte):
+        if byte == ">":
+            if self.endtag or not self.beExtremelyLenient:
+                return "bodydata"
+            return self.maybeBodyData()
+
+    def begin_attrname(self, byte):
+        self.attrname = byte
+        self._attrname_termtag = 0
+
+    def do_attrname(self, byte):
+        if byte.isalnum() or byte in identChars:
+            self.attrname += byte
+            return
+        elif byte == "=":
+            return "beforeattrval"
+        elif byte.isspace():
+            return "beforeeq"
+        elif self.beExtremelyLenient:
+            if byte in "\"'":
+                return "attrval"
+            if byte in lenientIdentChars or byte.isalnum():
+                self.attrname += byte
+                return
+            if byte == "/":
+                self._attrname_termtag = 1
+                return
+            if byte == ">":
+                self.attrval = "True"
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                if self._attrname_termtag:
+                    self.gotTagEnd(self.tagName)
+                    return "bodydata"
+                return self.maybeBodyData()
+            # something is really broken. let's leave this attribute where it
+            # is and move on to the next thing
+            return
+        self._parseError(f"Invalid attribute name: {self.attrname!r} {byte!r}")
+
+    def do_beforeattrval(self, byte):
+        if byte in "\"'":
+            return "attrval"
+        elif byte.isspace():
+            return
+        elif self.beExtremelyLenient:
+            if byte in lenientIdentChars or byte.isalnum():
+                return "messyattr"
+            if byte == ">":
+                self.attrval = "True"
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                return self.maybeBodyData()
+            if byte == "\\":
+                # I saw this in actual HTML once:
+                # <font size=\"3\"><sup>SM</sup></font>
+                return
+        self._parseError(
+            "Invalid initial attribute value: %r; Attribute values must be quoted."
+            % byte
+        )
+
+    attrname = ""
+    attrval = ""
+
+    def begin_beforeeq(self, byte):
+        self._beforeeq_termtag = 0
+
+    def do_beforeeq(self, byte):
+        if byte == "=":
+            return "beforeattrval"
+        elif byte.isspace():
+            return
+        elif self.beExtremelyLenient:
+            if byte.isalnum() or byte in identChars:
+                self.attrval = "True"
+                self.tagAttributes[self.attrname] = self.attrval
+                return "attrname"
+            elif byte == ">":
+                self.attrval = "True"
+                self.tagAttributes[self.attrname] = self.attrval
+                self.gotTagStart(self.tagName, self.tagAttributes)
+                if self._beforeeq_termtag:
+                    self.gotTagEnd(self.tagName)
+                    return "bodydata"
+                return self.maybeBodyData()
+            elif byte == "/":
+                self._beforeeq_termtag = 1
+                return
+        self._parseError("Invalid attribute")
+
+    def begin_attrval(self, byte):
+        self.quotetype = byte
+        self.attrval = ""
+
+    def do_attrval(self, byte):
+        if byte == self.quotetype:
+            return "attrs"
+        self.attrval += byte
+
+    def end_attrval(self):
+        self.tagAttributes[self.attrname] = self.attrval
+        self.attrname = self.attrval = ""
+
+    def begin_messyattr(self, byte):
+        self.attrval = byte
+
+    def do_messyattr(self, byte):
+        if byte.isspace():
+            return "attrs"
+        elif byte == ">":
+            endTag = 0
+            if self.attrval.endswith("/"):
+                endTag = 1
+                self.attrval = self.attrval[:-1]
+            self.tagAttributes[self.attrname] = self.attrval
+            self.gotTagStart(self.tagName, self.tagAttributes)
+            if endTag:
+                self.gotTagEnd(self.tagName)
+                return "bodydata"
+            return self.maybeBodyData()
+        else:
+            self.attrval += byte
+
+    def end_messyattr(self):
+        if self.attrval:
+            self.tagAttributes[self.attrname] = self.attrval
+
+    def begin_afterslash(self, byte):
+        self._after_slash_closed = 0
+
+    def do_afterslash(self, byte):
+        # this state is only after a self-terminating slash, e.g. <foo/>
+        if self._after_slash_closed:
+            self._parseError("Mal-formed")  # XXX When does this happen??
+        if byte != ">":
+            if self.beExtremelyLenient:
+                return
+            else:
+                self._parseError("No data allowed after '/'")
+        self._after_slash_closed = 1
+        self.gotTagStart(self.tagName, self.tagAttributes)
+        self.gotTagEnd(self.tagName)
+        # don't need maybeBodyData here because there better not be
+        # any javascript code after a <script/>... we'll see :(
+        return "bodydata"
+
+    def begin_bodydata(self, byte):
+        if self._leadingBodyData:
+            self.bodydata = self._leadingBodyData
+            del self._leadingBodyData
+        else:
+            self.bodydata = ""
+
+    def do_bodydata(self, byte):
+        if byte == "<":
+            return "tagstart"
+        if byte == "&":
+            return "entityref"
+        self.bodydata += byte
+
+    def end_bodydata(self):
+        self.gotText(self.bodydata)
+        self.bodydata = ""
+
+    def do_waitforendscript(self, byte):
+        if byte == "<":
+            return "waitscriptendtag"
+        self.bodydata += byte
+
+    def begin_waitscriptendtag(self, byte):
+        self.temptagdata = ""
+        self.tagName = ""
+        self.endtag = 0
+
+    def do_waitscriptendtag(self, byte):
+        # 1 enforce / as first byte read
+        # 2 enforce following bytes to be subset of "script" until
+        #   tagName == "script"
+        #   2a when that happens, gotText(self.bodydata) and gotTagEnd(self.tagName)
+        # 3 spaces can happen anywhere, they're ignored
+        #   e.g. < / script >
+        # 4 anything else causes all data I've read to be moved to the
+        #   bodydata, and switch back to waitforendscript state
+
+        # If it turns out this _isn't_ a </script>, we need to
+        # remember all the data we've been through so we can append it
+        # to bodydata
+        self.temptagdata += byte
+
+        # 1
+        if byte == "/":
+            self.endtag = True
+        elif not self.endtag:
+            self.bodydata += "<" + self.temptagdata
+            return "waitforendscript"
+        # 2
+        elif byte.isalnum() or byte in identChars:
+            self.tagName += byte
+            if not "script".startswith(self.tagName):
+                self.bodydata += "<" + self.temptagdata
+                return "waitforendscript"
+            elif self.tagName == "script":
+                self.gotText(self.bodydata)
+                self.gotTagEnd(self.tagName)
+                return "waitforgt"
+        # 3
+        elif byte.isspace():
+            return "waitscriptendtag"
+        # 4
+        else:
+            self.bodydata += "<" + self.temptagdata
+            return "waitforendscript"
+
+    def begin_entityref(self, byte):
+        self.erefbuf = ""
+        self.erefextra = ""  # extra bit for lenient mode
+
+    def do_entityref(self, byte):
+        if byte.isspace() or byte == "<":
+            if self.beExtremelyLenient:
+                # '&foo' probably was '&amp;foo'
+                if self.erefbuf and self.erefbuf != "amp":
+                    self.erefextra = self.erefbuf
+                self.erefbuf = "amp"
+                if byte == "<":
+                    return "tagstart"
+                else:
+                    self.erefextra += byte
+                    return "spacebodydata"
+            self._parseError("Bad entity reference")
+        elif byte != ";":
+            self.erefbuf += byte
+        else:
+            return "bodydata"
+
+    def end_entityref(self):
+        self.gotEntityReference(self.erefbuf)
+
+    # hacky support for space after & in entityref in beExtremelyLenient
+    # state should only happen in that case
+    def begin_spacebodydata(self, byte):
+        self.bodydata = self.erefextra
+        self.erefextra = None
+
+    do_spacebodydata = do_bodydata
+    end_spacebodydata = end_bodydata
+
+    # Sorta SAX-ish API
+
+    def gotTagStart(self, name, attributes):
+        """Encountered an opening tag.
+
+        Default behaviour is to print."""
+        print("begin", name, attributes)
+
+    def gotText(self, data):
+        """Encountered text
+
+        Default behaviour is to print."""
+        print("text:", repr(data))
+
+    def gotEntityReference(self, entityRef):
+        """Encountered mnemonic entity reference
+
+        Default behaviour is to print."""
+        print("entityRef: &%s;" % entityRef)
+
+    def gotComment(self, comment):
+        """Encountered comment.
+
+        Default behaviour is to ignore."""
+        pass
+
+    def gotCData(self, cdata):
+        """Encountered CDATA
+
+        Default behaviour is to call the gotText method"""
+        self.gotText(cdata)
+
+    def gotDoctype(self, doctype):
+        """Encountered DOCTYPE
+
+        This is really grotty: it basically just gives you everything between
+        '<!DOCTYPE' and '>' as an argument.
+        """
+        print("!DOCTYPE", repr(doctype))
+
+    def gotTagEnd(self, name):
+        """Encountered closing tag
+
+        Default behaviour is to print."""
+        print("end", name)
author	shmel1k <shmel1k@ydb.tech>	2023-11-26 18:16:14 +0300
committer	shmel1k <shmel1k@ydb.tech>	2023-11-26 18:43:30 +0300
commit	b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch)
tree	218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py3/twisted/web/sux.py
parent	523f645a83a0ec97a0332dbc3863bb354c92a328 (diff)
download	ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz