aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/encodings/utf_8_sig.py
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2024-03-05 10:40:59 +0100
committerGitHub <noreply@github.com>2024-03-05 12:40:59 +0300
commit1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/encodings/utf_8_sig.py
parentffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
downloadydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Lib/encodings/utf_8_sig.py')
-rw-r--r--contrib/tools/python3/Lib/encodings/utf_8_sig.py130
1 files changed, 130 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/encodings/utf_8_sig.py b/contrib/tools/python3/Lib/encodings/utf_8_sig.py
new file mode 100644
index 0000000000..1bb479203f
--- /dev/null
+++ b/contrib/tools/python3/Lib/encodings/utf_8_sig.py
@@ -0,0 +1,130 @@
+""" Python 'utf-8-sig' Codec
+This work similar to UTF-8 with the following changes:
+
+* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
+ first three bytes.
+
+* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
+ bytes will be skipped.
+"""
+import codecs
+
+### Codec APIs
+
+def encode(input, errors='strict'):
+ return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
+ len(input))
+
+def decode(input, errors='strict'):
+ prefix = 0
+ if input[:3] == codecs.BOM_UTF8:
+ input = input[3:]
+ prefix = 3
+ (output, consumed) = codecs.utf_8_decode(input, errors, True)
+ return (output, consumed+prefix)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.first = 1
+
+ def encode(self, input, final=False):
+ if self.first:
+ self.first = 0
+ return codecs.BOM_UTF8 + \
+ codecs.utf_8_encode(input, self.errors)[0]
+ else:
+ return codecs.utf_8_encode(input, self.errors)[0]
+
+ def reset(self):
+ codecs.IncrementalEncoder.reset(self)
+ self.first = 1
+
+ def getstate(self):
+ return self.first
+
+ def setstate(self, state):
+ self.first = state
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def __init__(self, errors='strict'):
+ codecs.BufferedIncrementalDecoder.__init__(self, errors)
+ self.first = 1
+
+ def _buffer_decode(self, input, errors, final):
+ if self.first:
+ if len(input) < 3:
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this really is a BOM
+ # => try again on the next call
+ return ("", 0)
+ else:
+ self.first = 0
+ else:
+ self.first = 0
+ if input[:3] == codecs.BOM_UTF8:
+ (output, consumed) = \
+ codecs.utf_8_decode(input[3:], errors, final)
+ return (output, consumed+3)
+ return codecs.utf_8_decode(input, errors, final)
+
+ def reset(self):
+ codecs.BufferedIncrementalDecoder.reset(self)
+ self.first = 1
+
+ def getstate(self):
+ state = codecs.BufferedIncrementalDecoder.getstate(self)
+ # state[1] must be 0 here, as it isn't passed along to the caller
+ return (state[0], self.first)
+
+ def setstate(self, state):
+ # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
+ codecs.BufferedIncrementalDecoder.setstate(self, state)
+ self.first = state[1]
+
+class StreamWriter(codecs.StreamWriter):
+ def reset(self):
+ codecs.StreamWriter.reset(self)
+ try:
+ del self.encode
+ except AttributeError:
+ pass
+
+ def encode(self, input, errors='strict'):
+ self.encode = codecs.utf_8_encode
+ return encode(input, errors)
+
+class StreamReader(codecs.StreamReader):
+ def reset(self):
+ codecs.StreamReader.reset(self)
+ try:
+ del self.decode
+ except AttributeError:
+ pass
+
+ def decode(self, input, errors='strict'):
+ if len(input) < 3:
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this is a BOM
+ # => try again on the next call
+ return ("", 0)
+ elif input[:3] == codecs.BOM_UTF8:
+ self.decode = codecs.utf_8_decode
+ (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+ return (output, consumed+3)
+ # (else) no BOM present
+ self.decode = codecs.utf_8_decode
+ return codecs.utf_8_decode(input, errors)
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='utf-8-sig',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )