diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/containers/atomizer/atomizer.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/containers/atomizer/atomizer.h')
-rw-r--r-- | library/cpp/containers/atomizer/atomizer.h | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/library/cpp/containers/atomizer/atomizer.h b/library/cpp/containers/atomizer/atomizer.h new file mode 100644 index 0000000000..5e40f47ab9 --- /dev/null +++ b/library/cpp/containers/atomizer/atomizer.h @@ -0,0 +1,200 @@ +#pragma once + +#include <library/cpp/containers/str_map/str_map.h> + +#include <util/generic/vector.h> +#include <util/generic/utility.h> + +#include <utility> +#include <cstdio> + +template <class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>> +class atomizer; + +template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>> +class super_atomizer; + +template <class HashFcn, class EqualTo> +class atomizer: public string_hash<ui32, HashFcn, EqualTo> { +private: + TVector<const char*> order; + +public: + using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator; + using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator; + using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type; + using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type; + using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type; + + using string_hash<ui32, HashFcn, EqualTo>::pool; + using string_hash<ui32, HashFcn, EqualTo>::size; + using string_hash<ui32, HashFcn, EqualTo>::find; + using string_hash<ui32, HashFcn, EqualTo>::end; + using string_hash<ui32, HashFcn, EqualTo>::insert_copy; + using string_hash<ui32, HashFcn, EqualTo>::clear_hash; + + atomizer() { + order.reserve(HASH_SIZE_DEFAULT); + } + atomizer(size_type hash_size, pool_size_type pool_size) + : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size) + { + order.reserve(hash_size); + } + ~atomizer() = default; + ui32 string_to_atom(const char* key) { + const char* old_begin = pool.Begin(); + const char* old_end = pool.End(); + std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1)); + if (ins.second) { // new? + if (pool.Begin() != old_begin) // repoint? + for (TVector<const char*>::iterator ptr = order.begin(); ptr != order.end(); ++ptr) + if (old_begin <= *ptr && *ptr < old_end) // from old pool? + *ptr += pool.Begin() - old_begin; + order.push_back((*ins.first).first); // copy of 'key' + } + return (ui32)(*ins.first).second; + } + + ui32 perm_string_to_atom(const char* key) { + value_type val(key, ui32(size() + 1)); + std::pair<iterator, bool> ins = this->insert(val); + if (ins.second) + order.push_back((*ins.first).first); // == copy of 'key' + return (ui32)(*ins.first).second; // == size()+1 + } + ui32 find_atom(const char* key) const { + const_iterator it = find(key); + if (it == end()) + return 0; // INVALID_ATOM + else + return (ui32)(*it).second; + } + const char* get_atom_name(ui32 atom) const { + if (atom && atom <= size()) + return order[atom - 1]; + return nullptr; + } + void clear_atomizer() { + clear_hash(); + order.clear(); + } + void SaveC2N(FILE* f) const { // we write sorted file + for (ui32 i = 0; i < order.size(); i++) + if (order[i]) + fprintf(f, "%d\t%s\n", i + 1, order[i]); + } + void LoadC2N(FILE* f) { // but can read unsorted one + long k, km = 0; + char buf[1000]; + char* s; + while (fgets(buf, 1000, f)) { + k = strtol(buf, &s, 10); + char* endl = strchr(s, '\n'); + if (endl) + *endl = 0; + if (k > 0 && k != LONG_MAX) { + km = Max(km, k); + insert_copy(++s, ui32(k)); + } + } + order.resize(km); + memset(&order[0], 0, order.size()); // if some atoms are absent + for (const_iterator I = this->begin(); I != end(); ++I) + order[(*I).second - 1] = (*I).first; + } +}; + +template <class T, class HashFcn, class EqualTo> +class super_atomizer: public string_hash<ui32, HashFcn, EqualTo> { +private: + using TOrder = TVector<std::pair<const char*, T>>; + TOrder order; + +public: + using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator; + using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator; + using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type; + using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type; + using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type; + + using o_iterator = typename TOrder::iterator; + using o_const_iterator = typename TOrder::const_iterator; + using o_value_type = typename TOrder::value_type; + + using string_hash<ui32, HashFcn, EqualTo>::pool; + using string_hash<ui32, HashFcn, EqualTo>::size; + using string_hash<ui32, HashFcn, EqualTo>::find; + using string_hash<ui32, HashFcn, EqualTo>::end; + using string_hash<ui32, HashFcn, EqualTo>::insert_copy; + using string_hash<ui32, HashFcn, EqualTo>::clear_hash; + + super_atomizer() { + order.reserve(HASH_SIZE_DEFAULT); + } + super_atomizer(size_type hash_size, pool_size_type pool_size) + : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size) + { + order.reserve(hash_size); + } + ~super_atomizer() = default; + ui32 string_to_atom(const char* key, const T* atom_data = NULL) { + const char* old_begin = pool.Begin(); + const char* old_end = pool.End(); + std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1)); + if (ins.second) { // new? + if (pool.Begin() != old_begin) // repoint? + for (typename TOrder::iterator ptr = order.begin(); ptr != order.end(); ++ptr) + if (old_begin <= (*ptr).first && (*ptr).first < old_end) // from old pool? + (*ptr).first += pool.Begin() - old_begin; + order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T())); + } + return (*ins.first).second; + } + + ui32 perm_string_to_atom(const char* key, const T* atom_data = NULL) { + value_type val(key, ui32(size() + 1)); + std::pair<iterator, bool> ins = this->insert(val); + if (ins.second) + order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T())); + return (*ins.first).second; // == size()+1 + } + ui32 find_atom(const char* key) const { + const_iterator it = find(key); + if (it == end()) + return 0; // INVALID_ATOM + else + return (*it).second; + } + const char* get_atom_name(ui32 atom) const { + if (atom && atom <= size()) + return order[atom - 1].first; + return nullptr; + } + const T* get_atom_data(ui32 atom) const { + if (atom && atom <= size()) + return &order[atom - 1].second; + return NULL; + } + T* get_atom_data(ui32 atom) { + if (atom && atom <= size()) + return &order[atom - 1].second; + return NULL; + } + o_iterator o_begin() { + return order.begin(); + } + o_iterator o_end() { + return order.end(); + } + o_const_iterator o_begin() const { + return order.begin(); + } + o_const_iterator o_end() const { + return order.end(); + } + void clear_atomizer() { + clear_hash(); + order.clear(); + } +}; |