aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/atomizer/atomizer.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/containers/atomizer/atomizer.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/containers/atomizer/atomizer.h')
-rw-r--r--library/cpp/containers/atomizer/atomizer.h200
1 files changed, 200 insertions, 0 deletions
diff --git a/library/cpp/containers/atomizer/atomizer.h b/library/cpp/containers/atomizer/atomizer.h
new file mode 100644
index 0000000000..5e40f47ab9
--- /dev/null
+++ b/library/cpp/containers/atomizer/atomizer.h
@@ -0,0 +1,200 @@
+#pragma once
+
+#include <library/cpp/containers/str_map/str_map.h>
+
+#include <util/generic/vector.h>
+#include <util/generic/utility.h>
+
+#include <utility>
+#include <cstdio>
+
+template <class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
+class atomizer;
+
+template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
+class super_atomizer;
+
+template <class HashFcn, class EqualTo>
+class atomizer: public string_hash<ui32, HashFcn, EqualTo> {
+private:
+ TVector<const char*> order;
+
+public:
+ using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
+ using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
+ using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
+ using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
+ using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;
+
+ using string_hash<ui32, HashFcn, EqualTo>::pool;
+ using string_hash<ui32, HashFcn, EqualTo>::size;
+ using string_hash<ui32, HashFcn, EqualTo>::find;
+ using string_hash<ui32, HashFcn, EqualTo>::end;
+ using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
+ using string_hash<ui32, HashFcn, EqualTo>::clear_hash;
+
+ atomizer() {
+ order.reserve(HASH_SIZE_DEFAULT);
+ }
+ atomizer(size_type hash_size, pool_size_type pool_size)
+ : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
+ {
+ order.reserve(hash_size);
+ }
+ ~atomizer() = default;
+ ui32 string_to_atom(const char* key) {
+ const char* old_begin = pool.Begin();
+ const char* old_end = pool.End();
+ std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
+ if (ins.second) { // new?
+ if (pool.Begin() != old_begin) // repoint?
+ for (TVector<const char*>::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
+ if (old_begin <= *ptr && *ptr < old_end) // from old pool?
+ *ptr += pool.Begin() - old_begin;
+ order.push_back((*ins.first).first); // copy of 'key'
+ }
+ return (ui32)(*ins.first).second;
+ }
+
+ ui32 perm_string_to_atom(const char* key) {
+ value_type val(key, ui32(size() + 1));
+ std::pair<iterator, bool> ins = this->insert(val);
+ if (ins.second)
+ order.push_back((*ins.first).first); // == copy of 'key'
+ return (ui32)(*ins.first).second; // == size()+1
+ }
+ ui32 find_atom(const char* key) const {
+ const_iterator it = find(key);
+ if (it == end())
+ return 0; // INVALID_ATOM
+ else
+ return (ui32)(*it).second;
+ }
+ const char* get_atom_name(ui32 atom) const {
+ if (atom && atom <= size())
+ return order[atom - 1];
+ return nullptr;
+ }
+ void clear_atomizer() {
+ clear_hash();
+ order.clear();
+ }
+ void SaveC2N(FILE* f) const { // we write sorted file
+ for (ui32 i = 0; i < order.size(); i++)
+ if (order[i])
+ fprintf(f, "%d\t%s\n", i + 1, order[i]);
+ }
+ void LoadC2N(FILE* f) { // but can read unsorted one
+ long k, km = 0;
+ char buf[1000];
+ char* s;
+ while (fgets(buf, 1000, f)) {
+ k = strtol(buf, &s, 10);
+ char* endl = strchr(s, '\n');
+ if (endl)
+ *endl = 0;
+ if (k > 0 && k != LONG_MAX) {
+ km = Max(km, k);
+ insert_copy(++s, ui32(k));
+ }
+ }
+ order.resize(km);
+ memset(&order[0], 0, order.size()); // if some atoms are absent
+ for (const_iterator I = this->begin(); I != end(); ++I)
+ order[(*I).second - 1] = (*I).first;
+ }
+};
+
+template <class T, class HashFcn, class EqualTo>
+class super_atomizer: public string_hash<ui32, HashFcn, EqualTo> {
+private:
+ using TOrder = TVector<std::pair<const char*, T>>;
+ TOrder order;
+
+public:
+ using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
+ using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
+ using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
+ using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
+ using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;
+
+ using o_iterator = typename TOrder::iterator;
+ using o_const_iterator = typename TOrder::const_iterator;
+ using o_value_type = typename TOrder::value_type;
+
+ using string_hash<ui32, HashFcn, EqualTo>::pool;
+ using string_hash<ui32, HashFcn, EqualTo>::size;
+ using string_hash<ui32, HashFcn, EqualTo>::find;
+ using string_hash<ui32, HashFcn, EqualTo>::end;
+ using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
+ using string_hash<ui32, HashFcn, EqualTo>::clear_hash;
+
+ super_atomizer() {
+ order.reserve(HASH_SIZE_DEFAULT);
+ }
+ super_atomizer(size_type hash_size, pool_size_type pool_size)
+ : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
+ {
+ order.reserve(hash_size);
+ }
+ ~super_atomizer() = default;
+ ui32 string_to_atom(const char* key, const T* atom_data = NULL) {
+ const char* old_begin = pool.Begin();
+ const char* old_end = pool.End();
+ std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
+ if (ins.second) { // new?
+ if (pool.Begin() != old_begin) // repoint?
+ for (typename TOrder::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
+ if (old_begin <= (*ptr).first && (*ptr).first < old_end) // from old pool?
+ (*ptr).first += pool.Begin() - old_begin;
+ order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
+ }
+ return (*ins.first).second;
+ }
+
+ ui32 perm_string_to_atom(const char* key, const T* atom_data = NULL) {
+ value_type val(key, ui32(size() + 1));
+ std::pair<iterator, bool> ins = this->insert(val);
+ if (ins.second)
+ order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
+ return (*ins.first).second; // == size()+1
+ }
+ ui32 find_atom(const char* key) const {
+ const_iterator it = find(key);
+ if (it == end())
+ return 0; // INVALID_ATOM
+ else
+ return (*it).second;
+ }
+ const char* get_atom_name(ui32 atom) const {
+ if (atom && atom <= size())
+ return order[atom - 1].first;
+ return nullptr;
+ }
+ const T* get_atom_data(ui32 atom) const {
+ if (atom && atom <= size())
+ return &order[atom - 1].second;
+ return NULL;
+ }
+ T* get_atom_data(ui32 atom) {
+ if (atom && atom <= size())
+ return &order[atom - 1].second;
+ return NULL;
+ }
+ o_iterator o_begin() {
+ return order.begin();
+ }
+ o_iterator o_end() {
+ return order.end();
+ }
+ o_const_iterator o_begin() const {
+ return order.begin();
+ }
+ o_const_iterator o_end() const {
+ return order.end();
+ }
+ void clear_atomizer() {
+ clear_hash();
+ order.clear();
+ }
+};