Library import 16 (#2433)

Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
author: AlexSm <alex@ydb.tech> 2024-03-05 10:40:59 +0100
committer: GitHub <noreply@github.com> 2024-03-05 12:40:59 +0300
commit: 1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree: 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/encodings/utf_8_sig.py
parent: ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download: ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
1 files changed, 130 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/encodings/utf_8_sig.py b/contrib/tools/python3/Lib/encodings/utf_8_sig.py
new file mode 100644
index 0000000000..1bb479203f
--- /dev/null
+++ b/contrib/tools/python3/Lib/encodings/utf_8_sig.py
@@ -0,0 +1,130 @@
+""" Python 'utf-8-sig' Codec
+This work similar to UTF-8 with the following changes:
+
+* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
+  first three bytes.
+
+* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
+  bytes will be skipped.
+"""
+import codecs
+
+### Codec APIs
+
+def encode(input, errors='strict'):
+    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
+            len(input))
+
+def decode(input, errors='strict'):
+    prefix = 0
+    if input[:3] == codecs.BOM_UTF8:
+        input = input[3:]
+        prefix = 3
+    (output, consumed) = codecs.utf_8_decode(input, errors, True)
+    return (output, consumed+prefix)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        codecs.IncrementalEncoder.__init__(self, errors)
+        self.first = 1
+
+    def encode(self, input, final=False):
+        if self.first:
+            self.first = 0
+            return codecs.BOM_UTF8 + \
+                   codecs.utf_8_encode(input, self.errors)[0]
+        else:
+            return codecs.utf_8_encode(input, self.errors)[0]
+
+    def reset(self):
+        codecs.IncrementalEncoder.reset(self)
+        self.first = 1
+
+    def getstate(self):
+        return self.first
+
+    def setstate(self, state):
+        self.first = state
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    def __init__(self, errors='strict'):
+        codecs.BufferedIncrementalDecoder.__init__(self, errors)
+        self.first = 1
+
+    def _buffer_decode(self, input, errors, final):
+        if self.first:
+            if len(input) < 3:
+                if codecs.BOM_UTF8.startswith(input):
+                    # not enough data to decide if this really is a BOM
+                    # => try again on the next call
+                    return ("", 0)
+                else:
+                    self.first = 0
+            else:
+                self.first = 0
+                if input[:3] == codecs.BOM_UTF8:
+                    (output, consumed) = \
+                       codecs.utf_8_decode(input[3:], errors, final)
+                    return (output, consumed+3)
+        return codecs.utf_8_decode(input, errors, final)
+
+    def reset(self):
+        codecs.BufferedIncrementalDecoder.reset(self)
+        self.first = 1
+
+    def getstate(self):
+        state = codecs.BufferedIncrementalDecoder.getstate(self)
+        # state[1] must be 0 here, as it isn't passed along to the caller
+        return (state[0], self.first)
+
+    def setstate(self, state):
+        # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+        codecs.BufferedIncrementalDecoder.setstate(self, state)
+        self.first = state[1]
+
+class StreamWriter(codecs.StreamWriter):
+    def reset(self):
+        codecs.StreamWriter.reset(self)
+        try:
+            del self.encode
+        except AttributeError:
+            pass
+
+    def encode(self, input, errors='strict'):
+        self.encode = codecs.utf_8_encode
+        return encode(input, errors)
+
+class StreamReader(codecs.StreamReader):
+    def reset(self):
+        codecs.StreamReader.reset(self)
+        try:
+            del self.decode
+        except AttributeError:
+            pass
+
+    def decode(self, input, errors='strict'):
+        if len(input) < 3:
+            if codecs.BOM_UTF8.startswith(input):
+                # not enough data to decide if this is a BOM
+                # => try again on the next call
+                return ("", 0)
+        elif input[:3] == codecs.BOM_UTF8:
+            self.decode = codecs.utf_8_decode
+            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+            return (output, consumed+3)
+        # (else) no BOM present
+        self.decode = codecs.utf_8_decode
+        return codecs.utf_8_decode(input, errors)
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='utf-8-sig',
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
author	AlexSm <alex@ydb.tech>	2024-03-05 10:40:59 +0100
committer	GitHub <noreply@github.com>	2024-03-05 12:40:59 +0300
commit	1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree	07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/encodings/utf_8_sig.py
parent	ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download	ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz