diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 01:45:21 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 02:42:50 +0300 |
commit | 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch) | |
tree | 9f88a486917d371d099cd712efd91b4c122d209d /contrib/python/marisa-trie/marisa/base.h | |
parent | 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff) | |
download | ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/python/marisa-trie/marisa/base.h')
-rw-r--r-- | contrib/python/marisa-trie/marisa/base.h | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/contrib/python/marisa-trie/marisa/base.h b/contrib/python/marisa-trie/marisa/base.h new file mode 100644 index 0000000000..5c595dcd2b --- /dev/null +++ b/contrib/python/marisa-trie/marisa/base.h @@ -0,0 +1,196 @@ +#pragma once + +#ifndef MARISA_BASE_H_ +#define MARISA_BASE_H_ + +// Old Visual C++ does not provide stdint.h. +#ifndef _MSC_VER + #include <stdint.h> +#endif // _MSC_VER + +#ifdef __cplusplus + #include <cstddef> +#else // __cplusplus + #include <stddef.h> +#endif // __cplusplus + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifdef _MSC_VER +typedef unsigned __int8 marisa_uint8; +typedef unsigned __int16 marisa_uint16; +typedef unsigned __int32 marisa_uint32; +typedef unsigned __int64 marisa_uint64; +#else // _MSC_VER +typedef uint8_t marisa_uint8; +typedef uint16_t marisa_uint16; +typedef uint32_t marisa_uint32; +typedef uint64_t marisa_uint64; +#endif // _MSC_VER + +#if defined(_WIN64) || defined(__amd64__) || defined(__x86_64__) || \ + defined(__ia64__) || defined(__ppc64__) || defined(__powerpc64__) || \ + defined(__sparc64__) || defined(__mips64__) || defined(__aarch64__) || \ + defined(__s390x__) + #define MARISA_WORD_SIZE 64 +#else // defined(_WIN64), etc. + #define MARISA_WORD_SIZE 32 +#endif // defined(_WIN64), etc. + +//#define MARISA_WORD_SIZE (sizeof(void *) * 8) + +#define MARISA_UINT8_MAX ((marisa_uint8)~(marisa_uint8)0) +#define MARISA_UINT16_MAX ((marisa_uint16)~(marisa_uint16)0) +#define MARISA_UINT32_MAX ((marisa_uint32)~(marisa_uint32)0) +#define MARISA_UINT64_MAX ((marisa_uint64)~(marisa_uint64)0) +#define MARISA_SIZE_MAX ((size_t)~(size_t)0) + +#define MARISA_INVALID_LINK_ID MARISA_UINT32_MAX +#define MARISA_INVALID_KEY_ID MARISA_UINT32_MAX +#define MARISA_INVALID_EXTRA (MARISA_UINT32_MAX >> 8) + +// Error codes are defined as members of marisa_error_code. This library throws +// an exception with one of the error codes when an error occurs. +typedef enum marisa_error_code_ { + // MARISA_OK means that a requested operation has succeeded. In practice, an + // exception never has MARISA_OK because it is not an error. + MARISA_OK = 0, + + // MARISA_STATE_ERROR means that an object was not ready for a requested + // operation. For example, an operation to modify a fixed vector throws an + // exception with MARISA_STATE_ERROR. + MARISA_STATE_ERROR = 1, + + // MARISA_NULL_ERROR means that an invalid NULL pointer has been given. + MARISA_NULL_ERROR = 2, + + // MARISA_BOUND_ERROR means that an operation has tried to access an out of + // range address. + MARISA_BOUND_ERROR = 3, + + // MARISA_RANGE_ERROR means that an out of range value has appeared in + // operation. + MARISA_RANGE_ERROR = 4, + + // MARISA_CODE_ERROR means that an undefined code has appeared in operation. + MARISA_CODE_ERROR = 5, + + // MARISA_RESET_ERROR means that a smart pointer has tried to reset itself. + MARISA_RESET_ERROR = 6, + + // MARISA_SIZE_ERROR means that a size has exceeded a library limitation. + MARISA_SIZE_ERROR = 7, + + // MARISA_MEMORY_ERROR means that a memory allocation has failed. + MARISA_MEMORY_ERROR = 8, + + // MARISA_IO_ERROR means that an I/O operation has failed. + MARISA_IO_ERROR = 9, + + // MARISA_FORMAT_ERROR means that input was in invalid format. + MARISA_FORMAT_ERROR = 10, +} marisa_error_code; + +// Min/max values, flags and masks for dictionary settings are defined below. +// Please note that unspecified settings will be replaced with the default +// settings. For example, 0 is equivalent to (MARISA_DEFAULT_NUM_TRIES | +// MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER). + +// A dictionary consists of 3 tries in default. Usually more tries make a +// dictionary space-efficient but time-inefficient. +typedef enum marisa_num_tries_ { + MARISA_MIN_NUM_TRIES = 0x00001, + MARISA_MAX_NUM_TRIES = 0x0007F, + MARISA_DEFAULT_NUM_TRIES = 0x00003, +} marisa_num_tries; + +// This library uses a cache technique to accelerate search functions. The +// following enumerated type marisa_cache_level gives a list of available cache +// size options. A larger cache enables faster search but takes a more space. +typedef enum marisa_cache_level_ { + MARISA_HUGE_CACHE = 0x00080, + MARISA_LARGE_CACHE = 0x00100, + MARISA_NORMAL_CACHE = 0x00200, + MARISA_SMALL_CACHE = 0x00400, + MARISA_TINY_CACHE = 0x00800, + MARISA_DEFAULT_CACHE = MARISA_NORMAL_CACHE +} marisa_cache_level; + +// This library provides 2 kinds of TAIL implementations. +typedef enum marisa_tail_mode_ { + // MARISA_TEXT_TAIL merges last labels as zero-terminated strings. So, it is + // available if and only if the last labels do not contain a NULL character. + // If MARISA_TEXT_TAIL is specified and a NULL character exists in the last + // labels, the setting is automatically switched to MARISA_BINARY_TAIL. + MARISA_TEXT_TAIL = 0x01000, + + // MARISA_BINARY_TAIL also merges last labels but as byte sequences. It uses + // a bit vector to detect the end of a sequence, instead of NULL characters. + // So, MARISA_BINARY_TAIL requires a larger space if the average length of + // labels is greater than 8. + MARISA_BINARY_TAIL = 0x02000, + + MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL, +} marisa_tail_mode; + +// The arrangement of nodes affects the time cost of matching and the order of +// predictive search. +typedef enum marisa_node_order_ { + // MARISA_LABEL_ORDER arranges nodes in ascending label order. + // MARISA_LABEL_ORDER is useful if an application needs to predict keys in + // label order. + MARISA_LABEL_ORDER = 0x10000, + + // MARISA_WEIGHT_ORDER arranges nodes in descending weight order. + // MARISA_WEIGHT_ORDER is generally a better choice because it enables faster + // matching. + MARISA_WEIGHT_ORDER = 0x20000, + + MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER, +} marisa_node_order; + +typedef enum marisa_config_mask_ { + MARISA_NUM_TRIES_MASK = 0x0007F, + MARISA_CACHE_LEVEL_MASK = 0x00F80, + MARISA_TAIL_MODE_MASK = 0x0F000, + MARISA_NODE_ORDER_MASK = 0xF0000, + MARISA_CONFIG_MASK = 0xFFFFF +} marisa_config_mask; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#ifdef __cplusplus +namespace marisa { + +typedef ::marisa_uint8 UInt8; +typedef ::marisa_uint16 UInt16; +typedef ::marisa_uint32 UInt32; +typedef ::marisa_uint64 UInt64; + +typedef ::marisa_error_code ErrorCode; + +typedef ::marisa_cache_level CacheLevel; +typedef ::marisa_tail_mode TailMode; +typedef ::marisa_node_order NodeOrder; + +template <typename T> +inline void swap(T &lhs, T &rhs) { + T temp = lhs; + lhs = rhs; + rhs = temp; +} + +} // namespace marisa +#endif // __cplusplus + +#ifdef __cplusplus + #include "exception.h" + #include "scoped-ptr.h" + #include "scoped-array.h" +#endif // __cplusplus + +#endif // MARISA_BASE_H_ |