#pragma once
#include <library/cpp/containers/str_map/str_map.h>
#include <util/generic/vector.h>
#include <util/generic/utility.h>
#include <utility>
#include <cstdio>
template <class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
class atomizer;
template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
class super_atomizer;
template <class HashFcn, class EqualTo>
class atomizer: public string_hash<ui32, HashFcn, EqualTo> {
private:
TVector<const char*> order;
public:
using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;
using string_hash<ui32, HashFcn, EqualTo>::pool;
using string_hash<ui32, HashFcn, EqualTo>::size;
using string_hash<ui32, HashFcn, EqualTo>::find;
using string_hash<ui32, HashFcn, EqualTo>::end;
using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
using string_hash<ui32, HashFcn, EqualTo>::clear_hash;
atomizer() {
order.reserve(HASH_SIZE_DEFAULT);
}
atomizer(size_type hash_size, pool_size_type pool_size)
: string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
{
order.reserve(hash_size);
}
~atomizer() = default;
ui32 string_to_atom(const char* key) {
const char* old_begin = pool.Begin();
const char* old_end = pool.End();
std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
if (ins.second) { // new?
if (pool.Begin() != old_begin) // repoint?
for (TVector<const char*>::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
if (old_begin <= *ptr && *ptr < old_end) // from old pool?
*ptr += pool.Begin() - old_begin;
order.push_back((*ins.first).first); // copy of 'key'
}
return (ui32)(*ins.first).second;
}
ui32 perm_string_to_atom(const char* key) {
value_type val(key, ui32(size() + 1));
std::pair<iterator, bool> ins = this->insert(val);
if (ins.second)
order.push_back((*ins.first).first); // == copy of 'key'
return (ui32)(*ins.first).second; // == size()+1
}
ui32 find_atom(const char* key) const {
const_iterator it = find(key);
if (it == end())
return 0; // INVALID_ATOM
else
return (ui32)(*it).second;
}
const char* get_atom_name(ui32 atom) const {
if (atom && atom <= size())
return order[atom - 1];
return nullptr;
}
void clear_atomizer() {
clear_hash();
order.clear();
}
void SaveC2N(FILE* f) const { // we write sorted file
for (ui32 i = 0; i < order.size(); i++)
if (order[i])
fprintf(f, "%d\t%s\n", i + 1, order[i]);
}
void LoadC2N(FILE* f) { // but can read unsorted one
long k, km = 0;
char buf[1000];
char* s;
while (fgets(buf, 1000, f)) {
k = strtol(buf, &s, 10);
char* endl = strchr(s, '\n');
if (endl)
*endl = 0;
if (k > 0 && k != LONG_MAX) {
km = Max(km, k);
insert_copy(++s, ui32(k));
}
}
order.resize(km);
memset(&order[0], 0, order.size()); // if some atoms are absent
for (const_iterator I = this->begin(); I != end(); ++I)
order[(*I).second - 1] = (*I).first;
}
};
template <class T, class HashFcn, class EqualTo>
class super_atomizer: public string_hash<ui32, HashFcn, EqualTo> {
private:
using TOrder = TVector<std::pair<const char*, T>>;
TOrder order;
public:
using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;
using o_iterator = typename TOrder::iterator;
using o_const_iterator = typename TOrder::const_iterator;
using o_value_type = typename TOrder::value_type;
using string_hash<ui32, HashFcn, EqualTo>::pool;
using string_hash<ui32, HashFcn, EqualTo>::size;
using string_hash<ui32, HashFcn, EqualTo>::find;
using string_hash<ui32, HashFcn, EqualTo>::end;
using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
using string_hash<ui32, HashFcn, EqualTo>::clear_hash;
super_atomizer() {
order.reserve(HASH_SIZE_DEFAULT);
}
super_atomizer(size_type hash_size, pool_size_type pool_size)
: string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
{
order.reserve(hash_size);
}
~super_atomizer() = default;
ui32 string_to_atom(const char* key, const T* atom_data = NULL) {
const char* old_begin = pool.Begin();
const char* old_end = pool.End();
std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
if (ins.second) { // new?
if (pool.Begin() != old_begin) // repoint?
for (typename TOrder::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
if (old_begin <= (*ptr).first && (*ptr).first < old_end) // from old pool?
(*ptr).first += pool.Begin() - old_begin;
order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
}
return (*ins.first).second;
}
ui32 perm_string_to_atom(const char* key, const T* atom_data = NULL) {
value_type val(key, ui32(size() + 1));
std::pair<iterator, bool> ins = this->insert(val);
if (ins.second)
order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
return (*ins.first).second; // == size()+1
}
ui32 find_atom(const char* key) const {
const_iterator it = find(key);
if (it == end())
return 0; // INVALID_ATOM
else
return (*it).second;
}
const char* get_atom_name(ui32 atom) const {
if (atom && atom <= size())
return order[atom - 1].first;
return nullptr;
}
const T* get_atom_data(ui32 atom) const {
if (atom && atom <= size())
return &order[atom - 1].second;
return NULL;
}
T* get_atom_data(ui32 atom) {
if (atom && atom <= size())
return &order[atom - 1].second;
return NULL;
}
o_iterator o_begin() {
return order.begin();
}
o_iterator o_end() {
return order.end();
}
o_const_iterator o_begin() const {
return order.begin();
}
o_const_iterator o_end() const {
return order.end();
}
void clear_atomizer() {
clear_hash();
order.clear();
}
};