diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/tools/python3/src/Lib/hashlib.py | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/tools/python3/src/Lib/hashlib.py')
-rw-r--r-- | contrib/tools/python3/src/Lib/hashlib.py | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Lib/hashlib.py b/contrib/tools/python3/src/Lib/hashlib.py new file mode 100644 index 0000000000..b546a3fd79 --- /dev/null +++ b/contrib/tools/python3/src/Lib/hashlib.py @@ -0,0 +1,315 @@ +#. Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org) +# Licensed to PSF under a Contributor Agreement. +# + +__doc__ = """hashlib module - A common interface to many hash functions. + +new(name, data=b'', **kwargs) - returns a new hash object implementing the + given hash function; initializing the hash + using the given binary data. + +Named constructor functions are also available, these are faster +than using new(name): + +md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(), +sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256. + +More algorithms may be available on your platform but the above are guaranteed +to exist. See the algorithms_guaranteed and algorithms_available attributes +to find out what algorithm names can be passed to new(). + +NOTE: If you want the adler32 or crc32 hash functions they are available in +the zlib module. + +Choose your hash function wisely. Some have known collision weaknesses. +sha384 and sha512 will be slow on 32 bit platforms. + +Hash objects have these methods: + - update(data): Update the hash object with the bytes in data. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments. + - digest(): Return the digest of the bytes passed to the update() method + so far as a bytes object. + - hexdigest(): Like digest() except the digest is returned as a string + of double length, containing only hexadecimal digits. + - copy(): Return a copy (clone) of the hash object. This can be used to + efficiently compute the digests of datas that share a common + initial substring. + +For example, to obtain the digest of the byte string 'Nobody inspects the +spammish repetition': + + >>> import hashlib + >>> m = hashlib.md5() + >>> m.update(b"Nobody inspects") + >>> m.update(b" the spammish repetition") + >>> m.digest() + b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + +More condensed: + + >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +""" + +# This tuple and __get_builtin_constructor() must be modified if a new +# always available algorithm is added. +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', + 'blake2b', 'blake2s', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256') + + +algorithms_guaranteed = set(__always_supported) +algorithms_available = set(__always_supported) + +__all__ = __always_supported + ('new', 'algorithms_guaranteed', + 'algorithms_available', 'pbkdf2_hmac', 'file_digest') + + +__builtin_constructor_cache = {} + +# Prefer our blake2 implementation +# OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. The OpenSSL +# implementations neither support keyed blake2 (blake2 MAC) nor advanced +# features like salt, personalization, or tree hashing. OpenSSL hash-only +# variants are available as 'blake2b512' and 'blake2s256', though. +__block_openssl_constructor = { + 'blake2b', 'blake2s', +} + +def __get_builtin_constructor(name): + cache = __builtin_constructor_cache + constructor = cache.get(name) + if constructor is not None: + return constructor + try: + if name in {'SHA1', 'sha1'}: + import _sha1 + cache['SHA1'] = cache['sha1'] = _sha1.sha1 + elif name in {'MD5', 'md5'}: + import _md5 + cache['MD5'] = cache['md5'] = _md5.md5 + elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}: + import _sha256 + cache['SHA224'] = cache['sha224'] = _sha256.sha224 + cache['SHA256'] = cache['sha256'] = _sha256.sha256 + elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}: + import _sha512 + cache['SHA384'] = cache['sha384'] = _sha512.sha384 + cache['SHA512'] = cache['sha512'] = _sha512.sha512 + elif name in {'blake2b', 'blake2s'}: + import _blake2 + cache['blake2b'] = _blake2.blake2b + cache['blake2s'] = _blake2.blake2s + elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}: + import _sha3 + cache['sha3_224'] = _sha3.sha3_224 + cache['sha3_256'] = _sha3.sha3_256 + cache['sha3_384'] = _sha3.sha3_384 + cache['sha3_512'] = _sha3.sha3_512 + elif name in {'shake_128', 'shake_256'}: + import _sha3 + cache['shake_128'] = _sha3.shake_128 + cache['shake_256'] = _sha3.shake_256 + except ImportError: + pass # no extension module, this hash is unsupported. + + constructor = cache.get(name) + if constructor is not None: + return constructor + + raise ValueError('unsupported hash type ' + name) + + +def __get_openssl_constructor(name): + if name in __block_openssl_constructor: + # Prefer our builtin blake2 implementation. + return __get_builtin_constructor(name) + try: + # MD5, SHA1, and SHA2 are in all supported OpenSSL versions + # SHA3/shake are available in OpenSSL 1.1.1+ + f = getattr(_hashlib, 'openssl_' + name) + # Allow the C module to raise ValueError. The function will be + # defined but the hash not actually available. Don't fall back to + # builtin if the current security policy blocks a digest, bpo#40695. + f(usedforsecurity=False) + # Use the C function directly (very fast) + return f + except (AttributeError, ValueError): + return __get_builtin_constructor(name) + + +def __py_new(name, data=b'', **kwargs): + """new(name, data=b'', **kwargs) - Return a new hashing object using the + named algorithm; optionally initialized with data (which must be + a bytes-like object). + """ + return __get_builtin_constructor(name)(data, **kwargs) + + +def __hash_new(name, data=b'', **kwargs): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be a bytes-like object). + """ + if name in __block_openssl_constructor: + # Prefer our builtin blake2 implementation. + return __get_builtin_constructor(name)(data, **kwargs) + try: + return _hashlib.new(name, data, **kwargs) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(data) + + +try: + import _hashlib + new = __hash_new + __get_hash = __get_openssl_constructor + algorithms_available = algorithms_available.union( + _hashlib.openssl_md_meth_names) +except ImportError: + _hashlib = None + new = __py_new + __get_hash = __get_builtin_constructor + +try: + # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA + from _hashlib import pbkdf2_hmac +except ImportError: + from warnings import warn as _warn + _trans_5C = bytes((x ^ 0x5C) for x in range(256)) + _trans_36 = bytes((x ^ 0x36) for x in range(256)) + + def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): + """Password based key derivation function 2 (PKCS #5 v2.0) + + This Python implementations based on the hmac module about as fast + as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster + for long passwords. + """ + _warn( + "Python implementation of pbkdf2_hmac() is deprecated.", + category=DeprecationWarning, + stacklevel=2 + ) + if not isinstance(hash_name, str): + raise TypeError(hash_name) + + if not isinstance(password, (bytes, bytearray)): + password = bytes(memoryview(password)) + if not isinstance(salt, (bytes, bytearray)): + salt = bytes(memoryview(salt)) + + # Fast inline HMAC implementation + inner = new(hash_name) + outer = new(hash_name) + blocksize = getattr(inner, 'block_size', 64) + if len(password) > blocksize: + password = new(hash_name, password).digest() + password = password + b'\x00' * (blocksize - len(password)) + inner.update(password.translate(_trans_36)) + outer.update(password.translate(_trans_5C)) + + def prf(msg, inner=inner, outer=outer): + # PBKDF2_HMAC uses the password as key. We can re-use the same + # digest objects and just update copies to skip initialization. + icpy = inner.copy() + ocpy = outer.copy() + icpy.update(msg) + ocpy.update(icpy.digest()) + return ocpy.digest() + + if iterations < 1: + raise ValueError(iterations) + if dklen is None: + dklen = outer.digest_size + if dklen < 1: + raise ValueError(dklen) + + dkey = b'' + loop = 1 + from_bytes = int.from_bytes + while len(dkey) < dklen: + prev = prf(salt + loop.to_bytes(4)) + # endianness doesn't matter here as long to / from use the same + rkey = from_bytes(prev) + for i in range(iterations - 1): + prev = prf(prev) + # rkey = rkey ^ prev + rkey ^= from_bytes(prev) + loop += 1 + dkey += rkey.to_bytes(inner.digest_size) + + return dkey[:dklen] + +try: + # OpenSSL's scrypt requires OpenSSL 1.1+ + from _hashlib import scrypt +except ImportError: + pass + + +def file_digest(fileobj, digest, /, *, _bufsize=2**18): + """Hash the contents of a file-like object. Returns a digest object. + + *fileobj* must be a file-like object opened for reading in binary mode. + It accepts file objects from open(), io.BytesIO(), and SocketIO objects. + The function may bypass Python's I/O and use the file descriptor *fileno* + directly. + + *digest* must either be a hash algorithm name as a *str*, a hash + constructor, or a callable that returns a hash object. + """ + # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy + # hashing with hardware acceleration. + if isinstance(digest, str): + digestobj = new(digest) + else: + digestobj = digest() + + if hasattr(fileobj, "getbuffer"): + # io.BytesIO object, use zero-copy buffer + digestobj.update(fileobj.getbuffer()) + return digestobj + + # Only binary files implement readinto(). + if not ( + hasattr(fileobj, "readinto") + and hasattr(fileobj, "readable") + and fileobj.readable() + ): + raise ValueError( + f"'{fileobj!r}' is not a file-like object in binary reading mode." + ) + + # binary file, socket.SocketIO object + # Note: socket I/O uses different syscalls than file I/O. + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + + return digestobj + + +for __func_name in __always_supported: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + try: + globals()[__func_name] = __get_hash(__func_name) + except ValueError: + import logging + logging.exception('code for hash %s was not found.', __func_name) + + +# Cleanup locals() +del __always_supported, __func_name, __get_hash +del __py_new, __hash_new, __get_openssl_constructor |