diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 01:45:21 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 02:42:50 +0300 |
commit | 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch) | |
tree | 9f88a486917d371d099cd712efd91b4c122d209d /contrib/python/python-magic/py3 | |
parent | 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff) | |
download | ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/python/python-magic/py3')
-rw-r--r-- | contrib/python/python-magic/py3/.dist-info/METADATA | 171 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/.dist-info/top_level.txt | 1 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/LICENSE | 58 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/README.md | 144 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/magic/__init__.py | 469 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/magic/compat.py | 287 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/magic/loader.py | 50 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/magic/py.typed | 0 | ||||
-rw-r--r-- | contrib/python/python-magic/py3/ya.make | 31 |
9 files changed, 1211 insertions, 0 deletions
diff --git a/contrib/python/python-magic/py3/.dist-info/METADATA b/contrib/python/python-magic/py3/.dist-info/METADATA new file mode 100644 index 0000000000..65b11c5555 --- /dev/null +++ b/contrib/python/python-magic/py3/.dist-info/METADATA @@ -0,0 +1,171 @@ +Metadata-Version: 2.1 +Name: python-magic +Version: 0.4.27 +Summary: File type identification using libmagic +Home-page: http://github.com/ahupp/python-magic +Author: Adam Hupp +Author-email: adam@hupp.org +License: MIT +Keywords: mime magic file +Platform: UNKNOWN +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: Implementation :: CPython +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +Description-Content-Type: text/markdown +License-File: LICENSE + +# python-magic +[![PyPI version](https://badge.fury.io/py/python-magic.svg)](https://badge.fury.io/py/python-magic) +[![Build Status](https://travis-ci.org/ahupp/python-magic.svg?branch=master)](https://travis-ci.org/ahupp/python-magic) [![Join the chat at https://gitter.im/ahupp/python-magic](https://badges.gitter.im/ahupp/python-magic.svg)](https://gitter.im/ahupp/python-magic?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + +python-magic is a Python interface to the libmagic file type +identification library. libmagic identifies file types by checking +their headers according to a predefined list of file types. This +functionality is exposed to the command line by the Unix command +`file`. + +## Usage + +```python +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +# recommend using at least the first 2048 bytes, as less can produce incorrect identification +>>> magic.from_buffer(open("testdata/test.pdf", "rb").read(2048)) +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +``` + +There is also a `Magic` class that provides more direct control, +including overriding the magic database file and turning on character +encoding detection. This is not recommended for general use. In +particular, it's not safe for sharing across multiple threads and +will fail throw if this is attempted. + +```python +>>> f = magic.Magic(uncompress=True) +>>> f.from_file('testdata/test.gz') +'ASCII text (gzip compressed data, was "test", last modified: Sat Jun 28 +21:32:52 2008, from Unix)' +``` + +You can also combine the flag options: + +```python +>>> f = magic.Magic(mime=True, uncompress=True) +>>> f.from_file('testdata/test.gz') +'text/plain' +``` + +## Installation + +The current stable version of python-magic is available on PyPI and +can be installed by running `pip install python-magic`. + +Other sources: + +- PyPI: http://pypi.python.org/pypi/python-magic/ +- GitHub: https://github.com/ahupp/python-magic + +This module is a simple wrapper around the libmagic C library, and +that must be installed as well: + +### Debian/Ubuntu + +``` +sudo apt-get install libmagic1 +``` + +### Windows + +You'll need DLLs for libmagic. @julian-r maintains a pypi package with the DLLs, you can fetch it with: + +``` +pip install python-magic-bin +``` + +### OSX + +- When using Homebrew: `brew install libmagic` +- When using macports: `port install file` + +### Troubleshooting + +- 'MagicException: could not find any magic files!': some + installations of libmagic do not correctly point to their magic + database file. Try specifying the path to the file explicitly in the + constructor: `magic.Magic(magic_file="path_to_magic_file")`. + +- 'WindowsError: [Error 193] %1 is not a valid Win32 application': + Attempting to run the 32-bit libmagic DLL in a 64-bit build of + python will fail with this error. Here are 64-bit builds of libmagic for windows: https://github.com/pidydx/libmagicwin64. + Newer version can be found here: https://github.com/nscaife/file-windows. + +- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing + Windows Python and Cygwin Python. Make sure your libmagic and python builds are consistent. + + +## Bug Reports + +python-magic is a thin layer over the libmagic C library. +Historically, most bugs that have been reported against python-magic +are actually bugs in libmagic; libmagic bugs can be reported on their +tracker here: https://bugs.astron.com/my_view_page.php. If you're not +sure where the bug lies feel free to file an issue on GitHub and I can +triage it. + +## Running the tests + +To run the tests across a variety of linux distributions (depends on Docker): + +``` +./test_docker.sh +``` + +To run tests locally across all available python versions: + +``` +./test/run.py +``` + +To run against a specific python version: + +``` +LC_ALL=en_US.UTF-8 python3 test/test.py +``` + +## libmagic python API compatibility + +The python bindings shipped with libmagic use a module name that conflicts with this package. To work around this, python-magic includes a compatibility layer for the libmagic API. See [COMPAT.md](COMPAT.md) for a guide to libmagic / python-magic compatibility. + +## Versioning + +Minor version bumps should be backwards compatible. Major bumps are not. + +## Author + +Written by Adam Hupp in 2001 for a project that never got off the +ground. It originally used SWIG for the C library bindings, but +switched to ctypes once that was part of the python standard library. + +You can contact me via my [website](http://hupp.org/adam) or +[GitHub](http://github.com/ahupp). + +## License + +python-magic is distributed under the MIT license. See the included +LICENSE file for details. + +I am providing code in the repository to you under an open source license. Because this is my personal repository, the license you receive to my code is from me and not my employer (Facebook). + + diff --git a/contrib/python/python-magic/py3/.dist-info/top_level.txt b/contrib/python/python-magic/py3/.dist-info/top_level.txt new file mode 100644 index 0000000000..71c947b02f --- /dev/null +++ b/contrib/python/python-magic/py3/.dist-info/top_level.txt @@ -0,0 +1 @@ +magic diff --git a/contrib/python/python-magic/py3/LICENSE b/contrib/python/python-magic/py3/LICENSE new file mode 100644 index 0000000000..b8ca4b96dd --- /dev/null +++ b/contrib/python/python-magic/py3/LICENSE @@ -0,0 +1,58 @@ +The MIT License (MIT) + +Copyright (c) 2001-2014 Adam Hupp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +==== + +Portions of this package (magic/compat.py and test/libmagic_test.py) +are distributed under the following copyright notice: + + +$File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $ +Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. +Software written by Ian F. Darwin and others; +maintained 1994- Christos Zoulas. + +This software is not subject to any export provision of the United States +Department of Commerce, and may be exported to any country or planet. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice immediately at the beginning of the file, without modification, + this list of conditions, and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/contrib/python/python-magic/py3/README.md b/contrib/python/python-magic/py3/README.md new file mode 100644 index 0000000000..9eb70e8a30 --- /dev/null +++ b/contrib/python/python-magic/py3/README.md @@ -0,0 +1,144 @@ +# python-magic +[![PyPI version](https://badge.fury.io/py/python-magic.svg)](https://badge.fury.io/py/python-magic) +[![Build Status](https://travis-ci.org/ahupp/python-magic.svg?branch=master)](https://travis-ci.org/ahupp/python-magic) [![Join the chat at https://gitter.im/ahupp/python-magic](https://badges.gitter.im/ahupp/python-magic.svg)](https://gitter.im/ahupp/python-magic?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + +python-magic is a Python interface to the libmagic file type +identification library. libmagic identifies file types by checking +their headers according to a predefined list of file types. This +functionality is exposed to the command line by the Unix command +`file`. + +## Usage + +```python +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +# recommend using at least the first 2048 bytes, as less can produce incorrect identification +>>> magic.from_buffer(open("testdata/test.pdf", "rb").read(2048)) +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +``` + +There is also a `Magic` class that provides more direct control, +including overriding the magic database file and turning on character +encoding detection. This is not recommended for general use. In +particular, it's not safe for sharing across multiple threads and +will fail throw if this is attempted. + +```python +>>> f = magic.Magic(uncompress=True) +>>> f.from_file('testdata/test.gz') +'ASCII text (gzip compressed data, was "test", last modified: Sat Jun 28 +21:32:52 2008, from Unix)' +``` + +You can also combine the flag options: + +```python +>>> f = magic.Magic(mime=True, uncompress=True) +>>> f.from_file('testdata/test.gz') +'text/plain' +``` + +## Installation + +The current stable version of python-magic is available on PyPI and +can be installed by running `pip install python-magic`. + +Other sources: + +- PyPI: http://pypi.python.org/pypi/python-magic/ +- GitHub: https://github.com/ahupp/python-magic + +This module is a simple wrapper around the libmagic C library, and +that must be installed as well: + +### Debian/Ubuntu + +``` +sudo apt-get install libmagic1 +``` + +### Windows + +You'll need DLLs for libmagic. @julian-r maintains a pypi package with the DLLs, you can fetch it with: + +``` +pip install python-magic-bin +``` + +### OSX + +- When using Homebrew: `brew install libmagic` +- When using macports: `port install file` + +### Troubleshooting + +- 'MagicException: could not find any magic files!': some + installations of libmagic do not correctly point to their magic + database file. Try specifying the path to the file explicitly in the + constructor: `magic.Magic(magic_file="path_to_magic_file")`. + +- 'WindowsError: [Error 193] %1 is not a valid Win32 application': + Attempting to run the 32-bit libmagic DLL in a 64-bit build of + python will fail with this error. Here are 64-bit builds of libmagic for windows: https://github.com/pidydx/libmagicwin64. + Newer version can be found here: https://github.com/nscaife/file-windows. + +- 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing + Windows Python and Cygwin Python. Make sure your libmagic and python builds are consistent. + + +## Bug Reports + +python-magic is a thin layer over the libmagic C library. +Historically, most bugs that have been reported against python-magic +are actually bugs in libmagic; libmagic bugs can be reported on their +tracker here: https://bugs.astron.com/my_view_page.php. If you're not +sure where the bug lies feel free to file an issue on GitHub and I can +triage it. + +## Running the tests + +To run the tests across a variety of linux distributions (depends on Docker): + +``` +./test_docker.sh +``` + +To run tests locally across all available python versions: + +``` +./test/run.py +``` + +To run against a specific python version: + +``` +LC_ALL=en_US.UTF-8 python3 test/test.py +``` + +## libmagic python API compatibility + +The python bindings shipped with libmagic use a module name that conflicts with this package. To work around this, python-magic includes a compatibility layer for the libmagic API. See [COMPAT.md](COMPAT.md) for a guide to libmagic / python-magic compatibility. + +## Versioning + +Minor version bumps should be backwards compatible. Major bumps are not. + +## Author + +Written by Adam Hupp in 2001 for a project that never got off the +ground. It originally used SWIG for the C library bindings, but +switched to ctypes once that was part of the python standard library. + +You can contact me via my [website](http://hupp.org/adam) or +[GitHub](http://github.com/ahupp). + +## License + +python-magic is distributed under the MIT license. See the included +LICENSE file for details. + +I am providing code in the repository to you under an open source license. Because this is my personal repository, the license you receive to my code is from me and not my employer (Facebook). diff --git a/contrib/python/python-magic/py3/magic/__init__.py b/contrib/python/python-magic/py3/magic/__init__.py new file mode 100644 index 0000000000..bab7c7b122 --- /dev/null +++ b/contrib/python/python-magic/py3/magic/__init__.py @@ -0,0 +1,469 @@ +""" +magic is a wrapper around the libmagic file identification library. + +See README for more information. + +Usage: + +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) +'PDF document, version 1.2' +>>> + +""" + +import sys +import glob +import ctypes +import ctypes.util +import threading +import logging + +from ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER + +# avoid shadowing the real open with the version from compat.py +_real_open = open + + +class MagicException(Exception): + def __init__(self, message): + super(Exception, self).__init__(message) + self.message = message + + +class Magic: + """ + Magic is a wrapper around the libmagic C library. + """ + + def __init__(self, mime=False, magic_file=None, mime_encoding=False, + keep_going=False, uncompress=False, raw=False, extension=False): + """ + Create a new libmagic wrapper. + + mime - if True, mimetypes are returned instead of textual descriptions + mime_encoding - if True, codec is returned + magic_file - use a mime database other than the system default + keep_going - don't stop at the first match, keep going + uncompress - Try to look inside compressed files. + raw - Do not try to decode "non-printable" chars. + extension - Print a slash-separated list of valid extensions for the file type found. + """ + self.flags = MAGIC_NONE + if mime: + self.flags |= MAGIC_MIME_TYPE + if mime_encoding: + self.flags |= MAGIC_MIME_ENCODING + if keep_going: + self.flags |= MAGIC_CONTINUE + if uncompress: + self.flags |= MAGIC_COMPRESS + if raw: + self.flags |= MAGIC_RAW + if extension: + self.flags |= MAGIC_EXTENSION + + self.cookie = magic_open(self.flags) + self.lock = threading.Lock() + + magic_load(self.cookie, magic_file) + + # MAGIC_EXTENSION was added in 523 or 524, so bail if + # it doesn't appear to be available + if extension and (not _has_version or version() < 524): + raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic') + + # For https://github.com/ahupp/python-magic/issues/190 + # libmagic has fixed internal limits that some files exceed, causing + # an error. We can avoid this (at least for the sample file given) + # by bumping the limit up. It's not clear if this is a general solution + # or whether other internal limits should be increased, but given + # the lack of other reports I'll assume this is rare. + if _has_param: + try: + self.setparam(MAGIC_PARAM_NAME_MAX, 64) + except MagicException as e: + # some versions of libmagic fail this call, + # so rather than fail hard just use default behavior + pass + + def from_buffer(self, buf): + """ + Identify the contents of `buf` + """ + with self.lock: + try: + # if we're on python3, convert buf to bytes + # otherwise this string is passed as wchar* + # which is not what libmagic expects + # NEXTBREAK: only take bytes + if type(buf) == str and str != bytes: + buf = buf.encode('utf-8', errors='replace') + return maybe_decode(magic_buffer(self.cookie, buf)) + except MagicException as e: + return self._handle509Bug(e) + + def from_file(self, filename): + # raise FileNotFoundException or IOError if the file does not exist + with _real_open(filename): + pass + + with self.lock: + try: + return maybe_decode(magic_file(self.cookie, filename)) + except MagicException as e: + return self._handle509Bug(e) + + def from_descriptor(self, fd): + with self.lock: + try: + return maybe_decode(magic_descriptor(self.cookie, fd)) + except MagicException as e: + return self._handle509Bug(e) + + def _handle509Bug(self, e): + # libmagic 5.09 has a bug where it might fail to identify the + # mimetype of a file and returns null from magic_file (and + # likely _buffer), but also does not return an error message. + if e.message is None and (self.flags & MAGIC_MIME_TYPE): + return "application/octet-stream" + else: + raise e + + def setparam(self, param, val): + return magic_setparam(self.cookie, param, val) + + def getparam(self, param): + return magic_getparam(self.cookie, param) + + def __del__(self): + # no _thread_check here because there can be no other + # references to this object at this point. + + # during shutdown magic_close may have been cleared already so + # make sure it exists before using it. + + # the self.cookie check should be unnecessary and was an + # incorrect fix for a threading problem, however I'm leaving + # it in because it's harmless and I'm slightly afraid to + # remove it. + if hasattr(self, 'cookie') and self.cookie and magic_close: + magic_close(self.cookie) + self.cookie = None + + +_instances = {} + + +def _get_magic_type(mime): + i = _instances.get(mime) + if i is None: + i = _instances[mime] = Magic(mime=mime) + return i + + +def from_file(filename, mime=False): + """" + Accepts a filename and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_file("testdata/test.pdf", mime=True) + 'application/pdf' + """ + m = _get_magic_type(mime) + return m.from_file(filename) + + +def from_buffer(buffer, mime=False): + """ + Accepts a binary string and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) + 'PDF document, version 1.2' + """ + m = _get_magic_type(mime) + return m.from_buffer(buffer) + + +def from_descriptor(fd, mime=False): + """ + Accepts a file descriptor and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> f = open("testdata/test.pdf") + >>> magic.from_descriptor(f.fileno()) + 'PDF document, version 1.2' + """ + m = _get_magic_type(mime) + return m.from_descriptor(fd) + +from . import loader +libmagic = loader.load_lib() + +magic_t = ctypes.c_void_p + + +def errorcheck_null(result, func, args): + if result is None: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + + +def errorcheck_negative_one(result, func, args): + if result == -1: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + + +# return str on python3. Don't want to unconditionally +# decode because that results in unicode on python2 +def maybe_decode(s): + # NEXTBREAK: remove + if str == bytes: + return s + else: + # backslashreplace here because sometimes libmagic will return metadata in the charset + # of the file, which is unknown to us (e.g the title of a Word doc) + return s.decode('utf-8', 'backslashreplace') + + +try: + from os import PathLike + def unpath(filename): + if isinstance(filename, PathLike): + return filename.__fspath__() + else: + return filename +except ImportError: + def unpath(filename): + return filename + +def coerce_filename(filename): + if filename is None: + return None + + filename = unpath(filename) + + # ctypes will implicitly convert unicode strings to bytes with + # .encode('ascii'). If you use the filesystem encoding + # then you'll get inconsistent behavior (crashes) depending on the user's + # LANG environment variable + # NEXTBREAK: remove + is_unicode = (sys.version_info[0] <= 2 and + isinstance(filename, unicode)) or \ + (sys.version_info[0] >= 3 and + isinstance(filename, str)) + if is_unicode: + return filename.encode('utf-8', 'surrogateescape') + else: + return filename + + +magic_open = libmagic.magic_open +magic_open.restype = magic_t +magic_open.argtypes = [c_int] + +magic_close = libmagic.magic_close +magic_close.restype = None +magic_close.argtypes = [magic_t] + +magic_error = libmagic.magic_error +magic_error.restype = c_char_p +magic_error.argtypes = [magic_t] + +magic_errno = libmagic.magic_errno +magic_errno.restype = c_int +magic_errno.argtypes = [magic_t] + +_magic_file = libmagic.magic_file +_magic_file.restype = c_char_p +_magic_file.argtypes = [magic_t, c_char_p] +_magic_file.errcheck = errorcheck_null + + +def magic_file(cookie, filename): + return _magic_file(cookie, coerce_filename(filename)) + + +_magic_buffer = libmagic.magic_buffer +_magic_buffer.restype = c_char_p +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] +_magic_buffer.errcheck = errorcheck_null + + +def magic_buffer(cookie, buf): + return _magic_buffer(cookie, buf, len(buf)) + + +magic_descriptor = libmagic.magic_descriptor +magic_descriptor.restype = c_char_p +magic_descriptor.argtypes = [magic_t, c_int] +magic_descriptor.errcheck = errorcheck_null + +_magic_descriptor = libmagic.magic_descriptor +_magic_descriptor.restype = c_char_p +_magic_descriptor.argtypes = [magic_t, c_int] +_magic_descriptor.errcheck = errorcheck_null + + +def magic_descriptor(cookie, fd): + return _magic_descriptor(cookie, fd) + + +_magic_load = libmagic.magic_load +_magic_load.restype = c_int +_magic_load.argtypes = [magic_t, c_char_p] +_magic_load.errcheck = errorcheck_negative_one + + +def magic_load(cookie, filename): + return _magic_load(cookie, coerce_filename(filename)) + + +magic_setflags = libmagic.magic_setflags +magic_setflags.restype = c_int +magic_setflags.argtypes = [magic_t, c_int] + +magic_check = libmagic.magic_check +magic_check.restype = c_int +magic_check.argtypes = [magic_t, c_char_p] + +magic_compile = libmagic.magic_compile +magic_compile.restype = c_int +magic_compile.argtypes = [magic_t, c_char_p] + +_has_param = False +if hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'): + _has_param = True + _magic_setparam = libmagic.magic_setparam + _magic_setparam.restype = c_int + _magic_setparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] + _magic_setparam.errcheck = errorcheck_negative_one + + _magic_getparam = libmagic.magic_getparam + _magic_getparam.restype = c_int + _magic_getparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] + _magic_getparam.errcheck = errorcheck_negative_one + + +def magic_setparam(cookie, param, val): + if not _has_param: + raise NotImplementedError("magic_setparam not implemented") + v = c_size_t(val) + return _magic_setparam(cookie, param, byref(v)) + + +def magic_getparam(cookie, param): + if not _has_param: + raise NotImplementedError("magic_getparam not implemented") + val = c_size_t() + _magic_getparam(cookie, param, byref(val)) + return val.value + + +_has_version = False +if hasattr(libmagic, "magic_version"): + _has_version = True + magic_version = libmagic.magic_version + magic_version.restype = c_int + magic_version.argtypes = [] + + +def version(): + if not _has_version: + raise NotImplementedError("magic_version not implemented") + return magic_version() + + +MAGIC_NONE = 0x000000 # No flags +MAGIC_DEBUG = 0x000001 # Turn on debugging +MAGIC_SYMLINK = 0x000002 # Follow symlinks +MAGIC_COMPRESS = 0x000004 # Check inside compressed files +MAGIC_DEVICES = 0x000008 # Look at the contents of devices +MAGIC_MIME_TYPE = 0x000010 # Return a mime string +MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding +# TODO: should be +# MAGIC_MIME = MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING +MAGIC_MIME = 0x000010 # Return a mime string +MAGIC_EXTENSION = 0x1000000 # Return a /-separated list of extensions + +MAGIC_CONTINUE = 0x000020 # Return all matches +MAGIC_CHECK = 0x000040 # Print warnings to stderr +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit +MAGIC_RAW = 0x000100 # Don't translate unprintable chars +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors + +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens + +MAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic +MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic +MAGIC_PARAM_ELF_PHNUM_MAX = 2 # Max ELF notes processed +MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed +MAGIC_PARAM_ELF_NOTES_MAX = 4 # # Max ELF sections processed +MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches +MAGIC_PARAM_BYTES_MAX = 6 # Max number of bytes to read from file + + +# This package name conflicts with the one provided by upstream +# libmagic. This is a common source of confusion for users. To +# resolve, We ship a copy of that module, and expose it's functions +# wrapped in deprecation warnings. +def _add_compat(to_module): + import warnings, re + from magic import compat + + def deprecation_wrapper(fn): + def _(*args, **kwargs): + warnings.warn( + "Using compatibility mode with libmagic's python binding. " + "See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.", + PendingDeprecationWarning) + + return fn(*args, **kwargs) + + return _ + + fn = ['detect_from_filename', + 'detect_from_content', + 'detect_from_fobj', + 'open'] + for fname in fn: + to_module[fname] = deprecation_wrapper(compat.__dict__[fname]) + + # copy constants over, ensuring there's no conflicts + is_const_re = re.compile("^[A-Z_]+$") + allowed_inconsistent = set(['MAGIC_MIME']) + for name, value in compat.__dict__.items(): + if is_const_re.match(name): + if name in to_module: + if name in allowed_inconsistent: + continue + if to_module[name] != value: + raise Exception("inconsistent value for " + name) + else: + continue + else: + to_module[name] = value + + +_add_compat(globals()) diff --git a/contrib/python/python-magic/py3/magic/compat.py b/contrib/python/python-magic/py3/magic/compat.py new file mode 100644 index 0000000000..07fad45a8b --- /dev/null +++ b/contrib/python/python-magic/py3/magic/compat.py @@ -0,0 +1,287 @@ +# coding: utf-8 + +''' +Python bindings for libmagic +''' + +import ctypes + +from collections import namedtuple + +from ctypes import * +from ctypes.util import find_library + + +from . import loader + +_libraries = {} +_libraries['magic'] = loader.load_lib() + +# Flag constants for open and setflags +MAGIC_NONE = NONE = 0 +MAGIC_DEBUG = DEBUG = 1 +MAGIC_SYMLINK = SYMLINK = 2 +MAGIC_COMPRESS = COMPRESS = 4 +MAGIC_DEVICES = DEVICES = 8 +MAGIC_MIME_TYPE = MIME_TYPE = 16 +MAGIC_CONTINUE = CONTINUE = 32 +MAGIC_CHECK = CHECK = 64 +MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 +MAGIC_RAW = RAW = 256 +MAGIC_ERROR = ERROR = 512 +MAGIC_MIME_ENCODING = MIME_ENCODING = 1024 +MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING +MAGIC_APPLE = APPLE = 2048 + +MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 +MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 +MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 +MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 +MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 +MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 +MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 +MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 +MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 + +MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 + +FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) + + +class magic_set(Structure): + pass + + +magic_set._fields_ = [] +magic_t = POINTER(magic_set) + +_open = _libraries['magic'].magic_open +_open.restype = magic_t +_open.argtypes = [c_int] + +_close = _libraries['magic'].magic_close +_close.restype = None +_close.argtypes = [magic_t] + +_file = _libraries['magic'].magic_file +_file.restype = c_char_p +_file.argtypes = [magic_t, c_char_p] + +_descriptor = _libraries['magic'].magic_descriptor +_descriptor.restype = c_char_p +_descriptor.argtypes = [magic_t, c_int] + +_buffer = _libraries['magic'].magic_buffer +_buffer.restype = c_char_p +_buffer.argtypes = [magic_t, c_void_p, c_size_t] + +_error = _libraries['magic'].magic_error +_error.restype = c_char_p +_error.argtypes = [magic_t] + +_setflags = _libraries['magic'].magic_setflags +_setflags.restype = c_int +_setflags.argtypes = [magic_t, c_int] + +_load = _libraries['magic'].magic_load +_load.restype = c_int +_load.argtypes = [magic_t, c_char_p] + +_compile = _libraries['magic'].magic_compile +_compile.restype = c_int +_compile.argtypes = [magic_t, c_char_p] + +_check = _libraries['magic'].magic_check +_check.restype = c_int +_check.argtypes = [magic_t, c_char_p] + +_list = _libraries['magic'].magic_list +_list.restype = c_int +_list.argtypes = [magic_t, c_char_p] + +_errno = _libraries['magic'].magic_errno +_errno.restype = c_int +_errno.argtypes = [magic_t] + + +class Magic(object): + def __init__(self, ms): + self._magic_t = ms + + def close(self): + """ + Closes the magic database and deallocates any resources used. + """ + _close(self._magic_t) + + @staticmethod + def __tostr(s): + if s is None: + return None + if isinstance(s, str): + return s + try: # keep Python 2 compatibility + return str(s, 'utf-8') + except TypeError: + return str(s) + + @staticmethod + def __tobytes(b): + if b is None: + return None + if isinstance(b, bytes): + return b + try: # keep Python 2 compatibility + return bytes(b, 'utf-8') + except TypeError: + return bytes(b) + + def file(self, filename): + """ + Returns a textual description of the contents of the argument passed + as a filename or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) + + def descriptor(self, fd): + """ + Returns a textual description of the contents of the argument passed + as a file descriptor or None if an error occurred and the MAGIC_ERROR + flag is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_descriptor(self._magic_t, fd)) + + def buffer(self, buf): + """ + Returns a textual description of the contents of the argument passed + as a buffer or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) + + def error(self): + """ + Returns a textual explanation of the last error or None + if there was no error. + """ + return Magic.__tostr(_error(self._magic_t)) + + def setflags(self, flags): + """ + Set flags on the magic object which determine how magic checking + behaves; a bitwise OR of the flags described in libmagic(3), but + without the MAGIC_ prefix. + + Returns -1 on systems that don't support utime(2) or utimes(2) + when PRESERVE_ATIME is set. + """ + return _setflags(self._magic_t, flags) + + def load(self, filename=None): + """ + Must be called to load entries in the colon separated list of database + files passed as argument or the default database file if no argument + before any magic queries can be performed. + + Returns 0 on success and -1 on failure. + """ + return _load(self._magic_t, Magic.__tobytes(filename)) + + def compile(self, dbs): + """ + Compile entries in the colon separated list of database files + passed as argument or the default database file if no argument. + The compiled files created are named from the basename(1) of each file + argument with ".mgc" appended to it. + + Returns 0 on success and -1 on failure. + """ + return _compile(self._magic_t, Magic.__tobytes(dbs)) + + def check(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _check(self._magic_t, Magic.__tobytes(dbs)) + + def list(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _list(self._magic_t, Magic.__tobytes(dbs)) + + def errno(self): + """ + Returns a numeric error code. If return value is 0, an internal + magic error occurred. If return value is non-zero, the value is + an OS error code. Use the errno module or os.strerror() can be used + to provide detailed error information. + """ + return _errno(self._magic_t) + + +def open(flags): + """ + Returns a magic object on success and None on failure. + Flags argument as for setflags. + """ + return Magic(_open(flags)) + + +# Objects used by `detect_from_` functions +mime_magic = Magic(_open(MAGIC_MIME)) +mime_magic.load() +none_magic = Magic(_open(MAGIC_NONE)) +none_magic.load() + + +def _create_filemagic(mime_detected, type_detected): + splat = mime_detected.split('; ') + mime_type = splat[0] + if len(splat) == 2: + mime_encoding = splat[1] + else: + mime_encoding = '' + + return FileMagic(name=type_detected, mime_type=mime_type, + encoding=mime_encoding.replace('charset=', '')) + + +def detect_from_filename(filename): + '''Detect mime type, encoding and file type from a filename + + Returns a `FileMagic` namedtuple. + ''' + + return _create_filemagic(mime_magic.file(filename), + none_magic.file(filename)) + + +def detect_from_fobj(fobj): + '''Detect mime type, encoding and file type from file-like object + + Returns a `FileMagic` namedtuple. + ''' + + file_descriptor = fobj.fileno() + return _create_filemagic(mime_magic.descriptor(file_descriptor), + none_magic.descriptor(file_descriptor)) + + +def detect_from_content(byte_content): + '''Detect mime type, encoding and file type from bytes + + Returns a `FileMagic` namedtuple. + ''' + + return _create_filemagic(mime_magic.buffer(byte_content), + none_magic.buffer(byte_content)) diff --git a/contrib/python/python-magic/py3/magic/loader.py b/contrib/python/python-magic/py3/magic/loader.py new file mode 100644 index 0000000000..931f16193e --- /dev/null +++ b/contrib/python/python-magic/py3/magic/loader.py @@ -0,0 +1,50 @@ +from ctypes.util import find_library +import ctypes +import sys +import glob +import os.path + +def _lib_candidates(): + + yield find_library('magic') + + if sys.platform == 'darwin': + + paths = [ + '/opt/local/lib', + '/usr/local/lib', + '/opt/homebrew/lib', + ] + glob.glob('/usr/local/Cellar/libmagic/*/lib') + + for i in paths: + yield os.path.join(i, 'libmagic.dylib') + + elif sys.platform in ('win32', 'cygwin'): + + prefixes = ['libmagic', 'magic1', 'cygmagic-1', 'libmagic-1', 'msys-magic-1'] + + for i in prefixes: + # find_library searches in %PATH% but not the current directory, + # so look for both + yield './%s.dll' % (i,) + yield find_library(i) + + elif sys.platform == 'linux': + # This is necessary because alpine is bad + yield 'libmagic.so.1' + + +def load_lib(): + + for lib in _lib_candidates(): + # find_library returns None when lib not found + if lib is None: + continue + try: + return ctypes.CDLL(lib) + except OSError: + pass + else: + # It is better to raise an ImportError since we are importing magic module + raise ImportError('failed to find libmagic. Check your installation') + diff --git a/contrib/python/python-magic/py3/magic/py.typed b/contrib/python/python-magic/py3/magic/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/python-magic/py3/magic/py.typed diff --git a/contrib/python/python-magic/py3/ya.make b/contrib/python/python-magic/py3/ya.make new file mode 100644 index 0000000000..f1aba820d5 --- /dev/null +++ b/contrib/python/python-magic/py3/ya.make @@ -0,0 +1,31 @@ +# Generated by devtools/yamaker (pypi). + +PY3_LIBRARY() + +VERSION(0.4.27) + +LICENSE(MIT) + +PEERDIR( + contrib/libs/libmagic + library/python/symbols/libmagic +) + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + magic/__init__.py + magic/__init__.pyi + magic/compat.py + magic/loader.py +) + +RESOURCE_FILES( + PREFIX contrib/python/python-magic/py3/ + .dist-info/METADATA + .dist-info/top_level.txt + magic/py.typed +) + +END() |