aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/containers/atomizer/atomizer.h
blob: 5e40f47ab933ad7389d50f72691028f6498a9092 (plain) (tree)
1
2
3
4
5
6
7
8
            
 
                                                   
 
                                 
 
                  






                                                                                             
                                       
                                                            
        
                               
 
       



                                                                                        






                                                           
                
                                         
                                                           
                                                                   

                                 
                          

                                             
                                                                           
                                                      
                                                                                                   
                                                                              
                                                         
                                                                 
                                         
     
                                               
                                                          
                                                                    
                                                               
     
                                           
                                      

                                     
                                      
     
                                                
                                   
                                   
                       
     
                           

                      
                                                         
                                               
                                                        
     
                                                        
                       
                
                                     
                                         

                                         
                                


                                          
                                                                       
                                                               

                                                
 
                                                
                                                                  
        
                                                      
                 
 
       



                                                                                        
 

                                                             






                                                           
                      

                                                                 
                                                                   

                                 
                                

                                                                     
                                                                           
                                                      
                                                                                              
                                                                                              
                                                                 
                                                                                                         


                                   
                                                                          
                                                          
                       
                                                                                                         
                                                  
                                           




                                      
                                                
                                   
                                         
                       

                                             
                                           


                                   
                                           












                                      
                           

                      
  
#pragma once

#include <library/cpp/containers/str_map/str_map.h>

#include <util/generic/vector.h>
#include <util/generic/utility.h>

#include <utility>
#include <cstdio>

template <class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
class atomizer;

template <class T, class HashFcn = THash<const char*>, class EqualTo = TEqualTo<const char*>>
class super_atomizer;

template <class HashFcn, class EqualTo>
class atomizer: public string_hash<ui32, HashFcn, EqualTo> {
private:
    TVector<const char*> order;

public:
    using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
    using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
    using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
    using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
    using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;

    using string_hash<ui32, HashFcn, EqualTo>::pool;
    using string_hash<ui32, HashFcn, EqualTo>::size;
    using string_hash<ui32, HashFcn, EqualTo>::find;
    using string_hash<ui32, HashFcn, EqualTo>::end;
    using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
    using string_hash<ui32, HashFcn, EqualTo>::clear_hash;

    atomizer() {
        order.reserve(HASH_SIZE_DEFAULT);
    }
    atomizer(size_type hash_size, pool_size_type pool_size)
        : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
    {
        order.reserve(hash_size);
    }
    ~atomizer() = default;
    ui32 string_to_atom(const char* key) {
        const char* old_begin = pool.Begin();
        const char* old_end = pool.End();
        std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
        if (ins.second) {                  // new?
            if (pool.Begin() != old_begin) // repoint?
                for (TVector<const char*>::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
                    if (old_begin <= *ptr && *ptr < old_end) // from old pool?
                        *ptr += pool.Begin() - old_begin;
            order.push_back((*ins.first).first); // copy of 'key'
        }
        return (ui32)(*ins.first).second;
    }

    ui32 perm_string_to_atom(const char* key) {
        value_type val(key, ui32(size() + 1));
        std::pair<iterator, bool> ins = this->insert(val);
        if (ins.second)
            order.push_back((*ins.first).first); // == copy of 'key'
        return (ui32)(*ins.first).second;        // == size()+1
    }
    ui32 find_atom(const char* key) const {
        const_iterator it = find(key);
        if (it == end())
            return 0; // INVALID_ATOM
        else
            return (ui32)(*it).second;
    }
    const char* get_atom_name(ui32 atom) const {
        if (atom && atom <= size())
            return order[atom - 1];
        return nullptr;
    }
    void clear_atomizer() {
        clear_hash();
        order.clear();
    }
    void SaveC2N(FILE* f) const { // we write sorted file
        for (ui32 i = 0; i < order.size(); i++)
            if (order[i])
                fprintf(f, "%d\t%s\n", i + 1, order[i]);
    }
    void LoadC2N(FILE* f) { // but can read unsorted one
        long k, km = 0;
        char buf[1000];
        char* s;
        while (fgets(buf, 1000, f)) {
            k = strtol(buf, &s, 10);
            char* endl = strchr(s, '\n');
            if (endl)
                *endl = 0;
            if (k > 0 && k != LONG_MAX) {
                km = Max(km, k);
                insert_copy(++s, ui32(k));
            }
        }
        order.resize(km);
        memset(&order[0], 0, order.size()); // if some atoms are absent
        for (const_iterator I = this->begin(); I != end(); ++I)
            order[(*I).second - 1] = (*I).first;
    }
};

template <class T, class HashFcn, class EqualTo>
class super_atomizer: public string_hash<ui32, HashFcn, EqualTo> {
private:
    using TOrder = TVector<std::pair<const char*, T>>;
    TOrder order;

public:
    using iterator = typename string_hash<ui32, HashFcn, EqualTo>::iterator;
    using const_iterator = typename string_hash<ui32, HashFcn, EqualTo>::const_iterator;
    using value_type = typename string_hash<ui32, HashFcn, EqualTo>::value_type;
    using size_type = typename string_hash<ui32, HashFcn, EqualTo>::size_type;
    using pool_size_type = typename string_hash<ui32, HashFcn, EqualTo>::pool_size_type;

    using o_iterator = typename TOrder::iterator;
    using o_const_iterator = typename TOrder::const_iterator;
    using o_value_type = typename TOrder::value_type;

    using string_hash<ui32, HashFcn, EqualTo>::pool;
    using string_hash<ui32, HashFcn, EqualTo>::size;
    using string_hash<ui32, HashFcn, EqualTo>::find;
    using string_hash<ui32, HashFcn, EqualTo>::end;
    using string_hash<ui32, HashFcn, EqualTo>::insert_copy;
    using string_hash<ui32, HashFcn, EqualTo>::clear_hash;

    super_atomizer() {
        order.reserve(HASH_SIZE_DEFAULT);
    }
    super_atomizer(size_type hash_size, pool_size_type pool_size)
        : string_hash<ui32, HashFcn, EqualTo>(hash_size, pool_size)
    {
        order.reserve(hash_size);
    }
    ~super_atomizer() = default;
    ui32 string_to_atom(const char* key, const T* atom_data = NULL) {
        const char* old_begin = pool.Begin();
        const char* old_end = pool.End();
        std::pair<iterator, bool> ins = insert_copy(key, ui32(size() + 1));
        if (ins.second) {                  // new?
            if (pool.Begin() != old_begin) // repoint?
                for (typename TOrder::iterator ptr = order.begin(); ptr != order.end(); ++ptr)
                    if (old_begin <= (*ptr).first && (*ptr).first < old_end) // from old pool?
                        (*ptr).first += pool.Begin() - old_begin;
            order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
        }
        return (*ins.first).second;
    }

    ui32 perm_string_to_atom(const char* key, const T* atom_data = NULL) {
        value_type val(key, ui32(size() + 1));
        std::pair<iterator, bool> ins = this->insert(val);
        if (ins.second)
            order.push_back(std::pair<const char*, T>((*ins.first).first, atom_data ? *atom_data : T()));
        return (*ins.first).second; // == size()+1
    }
    ui32 find_atom(const char* key) const {
        const_iterator it = find(key);
        if (it == end())
            return 0; // INVALID_ATOM
        else
            return (*it).second;
    }
    const char* get_atom_name(ui32 atom) const {
        if (atom && atom <= size())
            return order[atom - 1].first;
        return nullptr;
    }
    const T* get_atom_data(ui32 atom) const {
        if (atom && atom <= size())
            return &order[atom - 1].second;
        return NULL;
    }
    T* get_atom_data(ui32 atom) {
        if (atom && atom <= size())
            return &order[atom - 1].second;
        return NULL;
    }
    o_iterator o_begin() {
        return order.begin();
    }
    o_iterator o_end() {
        return order.end();
    }
    o_const_iterator o_begin() const {
        return order.begin();
    }
    o_const_iterator o_end() const {
        return order.end();
    }
    void clear_atomizer() {
        clear_hash();
        order.clear();
    }
};