Library import 16 (#2433)

Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
author: AlexSm <alex@ydb.tech> 2024-03-05 10:40:59 +0100
committer: GitHub <noreply@github.com> 2024-03-05 12:40:59 +0300
commit: 1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree: 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/dbm/dumb.py
parent: ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download: ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
1 files changed, 317 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/dbm/dumb.py b/contrib/tools/python3/Lib/dbm/dumb.py
new file mode 100644
index 0000000000..754624ccc8
--- /dev/null
+++ b/contrib/tools/python3/Lib/dbm/dumb.py
@@ -0,0 +1,317 @@
+"""A dumb and slow but simple dbm clone.
+
+For database spam, spam.dir contains the index (a text file),
+spam.bak *may* contain a backup of the index (also a text file),
+while spam.dat contains the data (a binary file).
+
+XXX TO DO:
+
+- seems to contain a bug when updating...
+
+- reclaim free space (currently, space once occupied by deleted or expanded
+items is never reused)
+
+- support concurrent access (currently, if two processes take turns making
+updates, they can mess up the index)
+
+- support efficient access to large databases (currently, the whole index
+is read when the database is opened, and some updates rewrite the whole index)
+
+- support opening for read-only (flag = 'm')
+
+"""
+
+import ast as _ast
+import io as _io
+import os as _os
+import collections.abc
+
+__all__ = ["error", "open"]
+
+_BLOCKSIZE = 512
+
+error = OSError
+
+class _Database(collections.abc.MutableMapping):
+
+    # The on-disk directory and data files can remain in mutually
+    # inconsistent states for an arbitrarily long time (see comments
+    # at the end of __setitem__).  This is only repaired when _commit()
+    # gets called.  One place _commit() gets called is from __del__(),
+    # and if that occurs at program shutdown time, module globals may
+    # already have gotten rebound to None.  Since it's crucial that
+    # _commit() finish successfully, we can't ignore shutdown races
+    # here, and _commit() must not reference any globals.
+    _os = _os       # for _commit()
+    _io = _io       # for _commit()
+
+    def __init__(self, filebasename, mode, flag='c'):
+        filebasename = self._os.fsencode(filebasename)
+        self._mode = mode
+        self._readonly = (flag == 'r')
+
+        # The directory file is a text file.  Each line looks like
+        #    "%r, (%d, %d)\n" % (key, pos, siz)
+        # where key is the string key, pos is the offset into the dat
+        # file of the associated value's first byte, and siz is the number
+        # of bytes in the associated value.
+        self._dirfile = filebasename + b'.dir'
+
+        # The data file is a binary file pointed into by the directory
+        # file, and holds the values associated with keys.  Each value
+        # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
+        # binary 8-bit string value.
+        self._datfile = filebasename + b'.dat'
+        self._bakfile = filebasename + b'.bak'
+
+        # The index is an in-memory dict, mirroring the directory file.
+        self._index = None  # maps keys to (pos, siz) pairs
+
+        # Handle the creation
+        self._create(flag)
+        self._update(flag)
+
+    def _create(self, flag):
+        if flag == 'n':
+            for filename in (self._datfile, self._bakfile, self._dirfile):
+                try:
+                    _os.remove(filename)
+                except OSError:
+                    pass
+        # Mod by Jack: create data file if needed
+        try:
+            f = _io.open(self._datfile, 'r', encoding="Latin-1")
+        except OSError:
+            if flag not in ('c', 'n'):
+                raise
+            with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
+                self._chmod(self._datfile)
+        else:
+            f.close()
+
+    # Read directory file into the in-memory index dict.
+    def _update(self, flag):
+        self._modified = False
+        self._index = {}
+        try:
+            f = _io.open(self._dirfile, 'r', encoding="Latin-1")
+        except OSError:
+            if flag not in ('c', 'n'):
+                raise
+            self._modified = True
+        else:
+            with f:
+                for line in f:
+                    line = line.rstrip()
+                    key, pos_and_siz_pair = _ast.literal_eval(line)
+                    key = key.encode('Latin-1')
+                    self._index[key] = pos_and_siz_pair
+
+    # Write the index dict to the directory file.  The original directory
+    # file (if any) is renamed with a .bak extension first.  If a .bak
+    # file currently exists, it's deleted.
+    def _commit(self):
+        # CAUTION:  It's vital that _commit() succeed, and _commit() can
+        # be called from __del__().  Therefore we must never reference a
+        # global in this routine.
+        if self._index is None or not self._modified:
+            return  # nothing to do
+
+        try:
+            self._os.unlink(self._bakfile)
+        except OSError:
+            pass
+
+        try:
+            self._os.rename(self._dirfile, self._bakfile)
+        except OSError:
+            pass
+
+        with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
+            self._chmod(self._dirfile)
+            for key, pos_and_siz_pair in self._index.items():
+                # Use Latin-1 since it has no qualms with any value in any
+                # position; UTF-8, though, does care sometimes.
+                entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
+                f.write(entry)
+
+    sync = _commit
+
+    def _verify_open(self):
+        if self._index is None:
+            raise error('DBM object has already been closed')
+
+    def __getitem__(self, key):
+        if isinstance(key, str):
+            key = key.encode('utf-8')
+        self._verify_open()
+        pos, siz = self._index[key]     # may raise KeyError
+        with _io.open(self._datfile, 'rb') as f:
+            f.seek(pos)
+            dat = f.read(siz)
+        return dat
+
+    # Append val to the data file, starting at a _BLOCKSIZE-aligned
+    # offset.  The data file is first padded with NUL bytes (if needed)
+    # to get to an aligned offset.  Return pair
+    #     (starting offset of val, len(val))
+    def _addval(self, val):
+        with _io.open(self._datfile, 'rb+') as f:
+            f.seek(0, 2)
+            pos = int(f.tell())
+            npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
+            f.write(b'\0'*(npos-pos))
+            pos = npos
+            f.write(val)
+        return (pos, len(val))
+
+    # Write val to the data file, starting at offset pos.  The caller
+    # is responsible for ensuring that there's enough room starting at
+    # pos to hold val, without overwriting some other value.  Return
+    # pair (pos, len(val)).
+    def _setval(self, pos, val):
+        with _io.open(self._datfile, 'rb+') as f:
+            f.seek(pos)
+            f.write(val)
+        return (pos, len(val))
+
+    # key is a new key whose associated value starts in the data file
+    # at offset pos and with length siz.  Add an index record to
+    # the in-memory index dict, and append one to the directory file.
+    def _addkey(self, key, pos_and_siz_pair):
+        self._index[key] = pos_and_siz_pair
+        with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
+            self._chmod(self._dirfile)
+            f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
+
+    def __setitem__(self, key, val):
+        if self._readonly:
+            raise error('The database is opened for reading only')
+        if isinstance(key, str):
+            key = key.encode('utf-8')
+        elif not isinstance(key, (bytes, bytearray)):
+            raise TypeError("keys must be bytes or strings")
+        if isinstance(val, str):
+            val = val.encode('utf-8')
+        elif not isinstance(val, (bytes, bytearray)):
+            raise TypeError("values must be bytes or strings")
+        self._verify_open()
+        self._modified = True
+        if key not in self._index:
+            self._addkey(key, self._addval(val))
+        else:
+            # See whether the new value is small enough to fit in the
+            # (padded) space currently occupied by the old value.
+            pos, siz = self._index[key]
+            oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
+            newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
+            if newblocks <= oldblocks:
+                self._index[key] = self._setval(pos, val)
+            else:
+                # The new value doesn't fit in the (padded) space used
+                # by the old value.  The blocks used by the old value are
+                # forever lost.
+                self._index[key] = self._addval(val)
+
+            # Note that _index may be out of synch with the directory
+            # file now:  _setval() and _addval() don't update the directory
+            # file.  This also means that the on-disk directory and data
+            # files are in a mutually inconsistent state, and they'll
+            # remain that way until _commit() is called.  Note that this
+            # is a disaster (for the database) if the program crashes
+            # (so that _commit() never gets called).
+
+    def __delitem__(self, key):
+        if self._readonly:
+            raise error('The database is opened for reading only')
+        if isinstance(key, str):
+            key = key.encode('utf-8')
+        self._verify_open()
+        self._modified = True
+        # The blocks used by the associated value are lost.
+        del self._index[key]
+        # XXX It's unclear why we do a _commit() here (the code always
+        # XXX has, so I'm not changing it).  __setitem__ doesn't try to
+        # XXX keep the directory file in synch.  Why should we?  Or
+        # XXX why shouldn't __setitem__?
+        self._commit()
+
+    def keys(self):
+        try:
+            return list(self._index)
+        except TypeError:
+            raise error('DBM object has already been closed') from None
+
+    def items(self):
+        self._verify_open()
+        return [(key, self[key]) for key in self._index.keys()]
+
+    def __contains__(self, key):
+        if isinstance(key, str):
+            key = key.encode('utf-8')
+        try:
+            return key in self._index
+        except TypeError:
+            if self._index is None:
+                raise error('DBM object has already been closed') from None
+            else:
+                raise
+
+    def iterkeys(self):
+        try:
+            return iter(self._index)
+        except TypeError:
+            raise error('DBM object has already been closed') from None
+    __iter__ = iterkeys
+
+    def __len__(self):
+        try:
+            return len(self._index)
+        except TypeError:
+            raise error('DBM object has already been closed') from None
+
+    def close(self):
+        try:
+            self._commit()
+        finally:
+            self._index = self._datfile = self._dirfile = self._bakfile = None
+
+    __del__ = close
+
+    def _chmod(self, file):
+        self._os.chmod(file, self._mode)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+
+def open(file, flag='c', mode=0o666):
+    """Open the database file, filename, and return corresponding object.
+
+    The flag argument, used to control how the database is opened in the
+    other DBM implementations, supports only the semantics of 'c' and 'n'
+    values.  Other values will default to the semantics of 'c' value:
+    the database will always opened for update and will be created if it
+    does not exist.
+
+    The optional mode argument is the UNIX mode of the file, used only when
+    the database has to be created.  It defaults to octal code 0o666 (and
+    will be modified by the prevailing umask).
+
+    """
+
+    # Modify mode depending on the umask
+    try:
+        um = _os.umask(0)
+        _os.umask(um)
+    except AttributeError:
+        pass
+    else:
+        # Turn off any bits that are set in the umask
+        mode = mode & (~um)
+    if flag not in ('r', 'w', 'c', 'n'):
+        raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
+    return _Database(file, mode, flag=flag)
author	AlexSm <alex@ydb.tech>	2024-03-05 10:40:59 +0100
committer	GitHub <noreply@github.com>	2024-03-05 12:40:59 +0300
commit	1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree	07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/dbm/dumb.py
parent	ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download	ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz