diff options
author | AlexSm <alex@ydb.tech> | 2024-03-05 10:40:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 12:40:59 +0300 |
commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/dbm | |
parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
download | ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz |
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com>
Co-authored-by: deshevoy <deshevoy@yandex-team.com>
Co-authored-by: robot-contrib <robot-contrib@yandex-team.com>
Co-authored-by: thegeorg <thegeorg@yandex-team.com>
Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com>
Co-authored-by: svidyuk <svidyuk@yandex-team.com>
Co-authored-by: shadchin <shadchin@yandex-team.com>
Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com>
Co-authored-by: innokentii <innokentii@yandex-team.com>
Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com>
Co-authored-by: snermolaev <snermolaev@yandex-team.com>
Co-authored-by: dimdim11 <dimdim11@yandex-team.com>
Co-authored-by: kickbutt <kickbutt@yandex-team.com>
Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com>
Co-authored-by: korsunandrei <korsunandrei@yandex-team.com>
Co-authored-by: petrk <petrk@yandex-team.com>
Co-authored-by: miroslav2 <miroslav2@yandex-team.com>
Co-authored-by: serjflint <serjflint@yandex-team.com>
Co-authored-by: akhropov <akhropov@yandex-team.com>
Co-authored-by: prettyboy <prettyboy@yandex-team.com>
Co-authored-by: ilikepugs <ilikepugs@yandex-team.com>
Co-authored-by: hiddenpath <hiddenpath@yandex-team.com>
Co-authored-by: mikhnenko <mikhnenko@yandex-team.com>
Co-authored-by: spreis <spreis@yandex-team.com>
Co-authored-by: andreyshspb <andreyshspb@yandex-team.com>
Co-authored-by: dimaandreev <dimaandreev@yandex-team.com>
Co-authored-by: rashid <rashid@yandex-team.com>
Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com>
Co-authored-by: r-vetrov <r-vetrov@yandex-team.com>
Co-authored-by: ypodlesov <ypodlesov@yandex-team.com>
Co-authored-by: zaverden <zaverden@yandex-team.com>
Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com>
Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com>
Co-authored-by: v-korovin <v-korovin@yandex-team.com>
Co-authored-by: arikon <arikon@yandex-team.com>
Co-authored-by: khoden <khoden@yandex-team.com>
Co-authored-by: psydmm <psydmm@yandex-team.com>
Co-authored-by: robot-javacom <robot-javacom@yandex-team.com>
Co-authored-by: dtorilov <dtorilov@yandex-team.com>
Co-authored-by: sennikovmv <sennikovmv@yandex-team.com>
Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Lib/dbm')
-rw-r--r-- | contrib/tools/python3/Lib/dbm/__init__.py | 190 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/dbm/dumb.py | 317 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/dbm/gnu.py | 3 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/dbm/ndbm.py | 3 |
4 files changed, 513 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/dbm/__init__.py b/contrib/tools/python3/Lib/dbm/__init__.py new file mode 100644 index 0000000000..8055d3769f --- /dev/null +++ b/contrib/tools/python3/Lib/dbm/__init__.py @@ -0,0 +1,190 @@ +"""Generic interface to all dbm clones. + +Use + + import dbm + d = dbm.open(file, 'w', 0o666) + +The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the +type of database being opened (determined by the whichdb function) in the case +of an existing dbm. If the dbm does not exist and the create or new flag ('c' +or 'n') was specified, the dbm type will be determined by the availability of +the modules (tested in the above order). + +It has the following interface (key and data are strings): + + d[key] = data # store data at key (may override data at + # existing key) + data = d[key] # retrieve data at key (raise KeyError if no + # such key) + del d[key] # delete data stored at key (raises KeyError + # if no such key) + flag = key in d # true if the key exists + list = d.keys() # return a list of all existing keys (slow!) + +Future versions may change the order in which implementations are +tested for existence, and add interfaces to other dbm-like +implementations. +""" + +__all__ = ['open', 'whichdb', 'error'] + +import io +import os +import struct +import sys + + +class error(Exception): + pass + +_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] +_defaultmod = None +_modules = {} + +error = (error, OSError) + +try: + from dbm import ndbm +except ImportError: + ndbm = None + + +def open(file, flag='r', mode=0o666): + """Open or create database at path given by *file*. + + Optional argument *flag* can be 'r' (default) for read-only access, 'w' + for read-write access of an existing database, 'c' for read-write access + to a new or existing database, and 'n' for read-write access to a new + database. + + Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it + only if it doesn't exist; and 'n' always creates a new database. + """ + global _defaultmod + if _defaultmod is None: + for name in _names: + try: + mod = __import__(name, fromlist=['open']) + except ImportError: + continue + if not _defaultmod: + _defaultmod = mod + _modules[name] = mod + if not _defaultmod: + raise ImportError("no dbm clone found; tried %s" % _names) + + # guess the type of an existing database, if not creating a new one + result = whichdb(file) if 'n' not in flag else None + if result is None: + # db doesn't exist or 'n' flag was specified to create a new db + if 'c' in flag or 'n' in flag: + # file doesn't exist and the new flag was used so use default type + mod = _defaultmod + else: + raise error[0]("db file doesn't exist; " + "use 'c' or 'n' flag to create a new db") + elif result == "": + # db type cannot be determined + raise error[0]("db type could not be determined") + elif result not in _modules: + raise error[0]("db type is {0}, but the module is not " + "available".format(result)) + else: + mod = _modules[result] + return mod.open(file, flag, mode) + + +def whichdb(filename): + """Guess which db package to use to open a db file. + + Return values: + + - None if the database file can't be read; + - empty string if the file can be read but can't be recognized + - the name of the dbm submodule (e.g. "ndbm" or "gnu") if recognized. + + Importing the given module may still fail, and opening the + database using that module may still fail. + """ + + # Check for ndbm first -- this has a .pag and a .dir file + filename = os.fsencode(filename) + try: + f = io.open(filename + b".pag", "rb") + f.close() + f = io.open(filename + b".dir", "rb") + f.close() + return "dbm.ndbm" + except OSError: + # some dbm emulations based on Berkeley DB generate a .db file + # some do not, but they should be caught by the bsd checks + try: + f = io.open(filename + b".db", "rb") + f.close() + # guarantee we can actually open the file using dbm + # kind of overkill, but since we are dealing with emulations + # it seems like a prudent step + if ndbm is not None: + d = ndbm.open(filename) + d.close() + return "dbm.ndbm" + except OSError: + pass + + # Check for dumbdbm next -- this has a .dir and a .dat file + try: + # First check for presence of files + os.stat(filename + b".dat") + size = os.stat(filename + b".dir").st_size + # dumbdbm files with no keys are empty + if size == 0: + return "dbm.dumb" + f = io.open(filename + b".dir", "rb") + try: + if f.read(1) in (b"'", b'"'): + return "dbm.dumb" + finally: + f.close() + except OSError: + pass + + # See if the file exists, return None if not + try: + f = io.open(filename, "rb") + except OSError: + return None + + with f: + # Read the start of the file -- the magic number + s16 = f.read(16) + s = s16[0:4] + + # Return "" if not at least 4 bytes + if len(s) != 4: + return "" + + # Convert to 4-byte int in native byte order -- return "" if impossible + try: + (magic,) = struct.unpack("=l", s) + except struct.error: + return "" + + # Check for GNU dbm + if magic in (0x13579ace, 0x13579acd, 0x13579acf): + return "dbm.gnu" + + # Later versions of Berkeley db hash file have a 12-byte pad in + # front of the file type + try: + (magic,) = struct.unpack("=l", s16[-4:]) + except struct.error: + return "" + + # Unknown + return "" + + +if __name__ == "__main__": + for filename in sys.argv[1:]: + print(whichdb(filename) or "UNKNOWN", filename) diff --git a/contrib/tools/python3/Lib/dbm/dumb.py b/contrib/tools/python3/Lib/dbm/dumb.py new file mode 100644 index 0000000000..754624ccc8 --- /dev/null +++ b/contrib/tools/python3/Lib/dbm/dumb.py @@ -0,0 +1,317 @@ +"""A dumb and slow but simple dbm clone. + +For database spam, spam.dir contains the index (a text file), +spam.bak *may* contain a backup of the index (also a text file), +while spam.dat contains the data (a binary file). + +XXX TO DO: + +- seems to contain a bug when updating... + +- reclaim free space (currently, space once occupied by deleted or expanded +items is never reused) + +- support concurrent access (currently, if two processes take turns making +updates, they can mess up the index) + +- support efficient access to large databases (currently, the whole index +is read when the database is opened, and some updates rewrite the whole index) + +- support opening for read-only (flag = 'm') + +""" + +import ast as _ast +import io as _io +import os as _os +import collections.abc + +__all__ = ["error", "open"] + +_BLOCKSIZE = 512 + +error = OSError + +class _Database(collections.abc.MutableMapping): + + # The on-disk directory and data files can remain in mutually + # inconsistent states for an arbitrarily long time (see comments + # at the end of __setitem__). This is only repaired when _commit() + # gets called. One place _commit() gets called is from __del__(), + # and if that occurs at program shutdown time, module globals may + # already have gotten rebound to None. Since it's crucial that + # _commit() finish successfully, we can't ignore shutdown races + # here, and _commit() must not reference any globals. + _os = _os # for _commit() + _io = _io # for _commit() + + def __init__(self, filebasename, mode, flag='c'): + filebasename = self._os.fsencode(filebasename) + self._mode = mode + self._readonly = (flag == 'r') + + # The directory file is a text file. Each line looks like + # "%r, (%d, %d)\n" % (key, pos, siz) + # where key is the string key, pos is the offset into the dat + # file of the associated value's first byte, and siz is the number + # of bytes in the associated value. + self._dirfile = filebasename + b'.dir' + + # The data file is a binary file pointed into by the directory + # file, and holds the values associated with keys. Each value + # begins at a _BLOCKSIZE-aligned byte offset, and is a raw + # binary 8-bit string value. + self._datfile = filebasename + b'.dat' + self._bakfile = filebasename + b'.bak' + + # The index is an in-memory dict, mirroring the directory file. + self._index = None # maps keys to (pos, siz) pairs + + # Handle the creation + self._create(flag) + self._update(flag) + + def _create(self, flag): + if flag == 'n': + for filename in (self._datfile, self._bakfile, self._dirfile): + try: + _os.remove(filename) + except OSError: + pass + # Mod by Jack: create data file if needed + try: + f = _io.open(self._datfile, 'r', encoding="Latin-1") + except OSError: + if flag not in ('c', 'n'): + raise + with _io.open(self._datfile, 'w', encoding="Latin-1") as f: + self._chmod(self._datfile) + else: + f.close() + + # Read directory file into the in-memory index dict. + def _update(self, flag): + self._modified = False + self._index = {} + try: + f = _io.open(self._dirfile, 'r', encoding="Latin-1") + except OSError: + if flag not in ('c', 'n'): + raise + self._modified = True + else: + with f: + for line in f: + line = line.rstrip() + key, pos_and_siz_pair = _ast.literal_eval(line) + key = key.encode('Latin-1') + self._index[key] = pos_and_siz_pair + + # Write the index dict to the directory file. The original directory + # file (if any) is renamed with a .bak extension first. If a .bak + # file currently exists, it's deleted. + def _commit(self): + # CAUTION: It's vital that _commit() succeed, and _commit() can + # be called from __del__(). Therefore we must never reference a + # global in this routine. + if self._index is None or not self._modified: + return # nothing to do + + try: + self._os.unlink(self._bakfile) + except OSError: + pass + + try: + self._os.rename(self._dirfile, self._bakfile) + except OSError: + pass + + with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: + self._chmod(self._dirfile) + for key, pos_and_siz_pair in self._index.items(): + # Use Latin-1 since it has no qualms with any value in any + # position; UTF-8, though, does care sometimes. + entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) + f.write(entry) + + sync = _commit + + def _verify_open(self): + if self._index is None: + raise error('DBM object has already been closed') + + def __getitem__(self, key): + if isinstance(key, str): + key = key.encode('utf-8') + self._verify_open() + pos, siz = self._index[key] # may raise KeyError + with _io.open(self._datfile, 'rb') as f: + f.seek(pos) + dat = f.read(siz) + return dat + + # Append val to the data file, starting at a _BLOCKSIZE-aligned + # offset. The data file is first padded with NUL bytes (if needed) + # to get to an aligned offset. Return pair + # (starting offset of val, len(val)) + def _addval(self, val): + with _io.open(self._datfile, 'rb+') as f: + f.seek(0, 2) + pos = int(f.tell()) + npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE + f.write(b'\0'*(npos-pos)) + pos = npos + f.write(val) + return (pos, len(val)) + + # Write val to the data file, starting at offset pos. The caller + # is responsible for ensuring that there's enough room starting at + # pos to hold val, without overwriting some other value. Return + # pair (pos, len(val)). + def _setval(self, pos, val): + with _io.open(self._datfile, 'rb+') as f: + f.seek(pos) + f.write(val) + return (pos, len(val)) + + # key is a new key whose associated value starts in the data file + # at offset pos and with length siz. Add an index record to + # the in-memory index dict, and append one to the directory file. + def _addkey(self, key, pos_and_siz_pair): + self._index[key] = pos_and_siz_pair + with _io.open(self._dirfile, 'a', encoding="Latin-1") as f: + self._chmod(self._dirfile) + f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) + + def __setitem__(self, key, val): + if self._readonly: + raise error('The database is opened for reading only') + if isinstance(key, str): + key = key.encode('utf-8') + elif not isinstance(key, (bytes, bytearray)): + raise TypeError("keys must be bytes or strings") + if isinstance(val, str): + val = val.encode('utf-8') + elif not isinstance(val, (bytes, bytearray)): + raise TypeError("values must be bytes or strings") + self._verify_open() + self._modified = True + if key not in self._index: + self._addkey(key, self._addval(val)) + else: + # See whether the new value is small enough to fit in the + # (padded) space currently occupied by the old value. + pos, siz = self._index[key] + oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE + newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE + if newblocks <= oldblocks: + self._index[key] = self._setval(pos, val) + else: + # The new value doesn't fit in the (padded) space used + # by the old value. The blocks used by the old value are + # forever lost. + self._index[key] = self._addval(val) + + # Note that _index may be out of synch with the directory + # file now: _setval() and _addval() don't update the directory + # file. This also means that the on-disk directory and data + # files are in a mutually inconsistent state, and they'll + # remain that way until _commit() is called. Note that this + # is a disaster (for the database) if the program crashes + # (so that _commit() never gets called). + + def __delitem__(self, key): + if self._readonly: + raise error('The database is opened for reading only') + if isinstance(key, str): + key = key.encode('utf-8') + self._verify_open() + self._modified = True + # The blocks used by the associated value are lost. + del self._index[key] + # XXX It's unclear why we do a _commit() here (the code always + # XXX has, so I'm not changing it). __setitem__ doesn't try to + # XXX keep the directory file in synch. Why should we? Or + # XXX why shouldn't __setitem__? + self._commit() + + def keys(self): + try: + return list(self._index) + except TypeError: + raise error('DBM object has already been closed') from None + + def items(self): + self._verify_open() + return [(key, self[key]) for key in self._index.keys()] + + def __contains__(self, key): + if isinstance(key, str): + key = key.encode('utf-8') + try: + return key in self._index + except TypeError: + if self._index is None: + raise error('DBM object has already been closed') from None + else: + raise + + def iterkeys(self): + try: + return iter(self._index) + except TypeError: + raise error('DBM object has already been closed') from None + __iter__ = iterkeys + + def __len__(self): + try: + return len(self._index) + except TypeError: + raise error('DBM object has already been closed') from None + + def close(self): + try: + self._commit() + finally: + self._index = self._datfile = self._dirfile = self._bakfile = None + + __del__ = close + + def _chmod(self, file): + self._os.chmod(file, self._mode) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def open(file, flag='c', mode=0o666): + """Open the database file, filename, and return corresponding object. + + The flag argument, used to control how the database is opened in the + other DBM implementations, supports only the semantics of 'c' and 'n' + values. Other values will default to the semantics of 'c' value: + the database will always opened for update and will be created if it + does not exist. + + The optional mode argument is the UNIX mode of the file, used only when + the database has to be created. It defaults to octal code 0o666 (and + will be modified by the prevailing umask). + + """ + + # Modify mode depending on the umask + try: + um = _os.umask(0) + _os.umask(um) + except AttributeError: + pass + else: + # Turn off any bits that are set in the umask + mode = mode & (~um) + if flag not in ('r', 'w', 'c', 'n'): + raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'") + return _Database(file, mode, flag=flag) diff --git a/contrib/tools/python3/Lib/dbm/gnu.py b/contrib/tools/python3/Lib/dbm/gnu.py new file mode 100644 index 0000000000..b07a1defff --- /dev/null +++ b/contrib/tools/python3/Lib/dbm/gnu.py @@ -0,0 +1,3 @@ +"""Provide the _gdbm module as a dbm submodule.""" + +from _gdbm import * diff --git a/contrib/tools/python3/Lib/dbm/ndbm.py b/contrib/tools/python3/Lib/dbm/ndbm.py new file mode 100644 index 0000000000..23056a29ef --- /dev/null +++ b/contrib/tools/python3/Lib/dbm/ndbm.py @@ -0,0 +1,3 @@ +"""Provide the _dbm module as a dbm submodule.""" + +from _dbm import * |