diff options
| author | vvvv <[email protected]> | 2023-07-31 18:21:04 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2023-07-31 18:21:04 +0300 | 
| commit | dec41c40e51aa407edef81a3c566a5a15780fc49 (patch) | |
| tree | 4f197b596b32f35eca368121f0dff913419da9af /library/cpp/containers | |
| parent | 3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff) | |
YQL-16239 Move purecalc to public
Diffstat (limited to 'library/cpp/containers')
| -rw-r--r-- | library/cpp/containers/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt | 19 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt | 20 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt | 20 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt | 19 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/str_hash.cpp | 60 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/str_hash.h | 181 | ||||
| -rw-r--r-- | library/cpp/containers/str_hash/ya.make | 12 | 
9 files changed, 349 insertions, 0 deletions
diff --git a/library/cpp/containers/CMakeLists.txt b/library/cpp/containers/CMakeLists.txt index 43fcbe83466..40f50138673 100644 --- a/library/cpp/containers/CMakeLists.txt +++ b/library/cpp/containers/CMakeLists.txt @@ -20,5 +20,6 @@ add_subdirectory(ring_buffer)  add_subdirectory(sorted_vector)  add_subdirectory(stack_array)  add_subdirectory(stack_vector) +add_subdirectory(str_hash)  add_subdirectory(str_map)  add_subdirectory(top_keeper) diff --git a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..627814f0ed6 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +  cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..cd723cbea23 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +  cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..cd723cbea23 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +  cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.txt b/library/cpp/containers/str_hash/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) +  include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") +  include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) +  include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) +  include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..627814f0ed6 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +  cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/str_hash.cpp b/library/cpp/containers/str_hash/str_hash.cpp new file mode 100644 index 00000000000..12986385334 --- /dev/null +++ b/library/cpp/containers/str_hash/str_hash.cpp @@ -0,0 +1,60 @@ +#include "str_hash.h" + +#include <library/cpp/charset/ci_string.h> +#include <util/stream/output.h> +#include <util/stream/input.h> + +HashSet::HashSet(const char** array, size_type size) { +    Resize(size); +    while (*array && **array) +        AddPermanent(*array++); +} + +void HashSet::Read(IInputStream* input) { +    TString s; + +    while (input->ReadLine(s)) { +        AddUniq(TCiString(s).c_str()); +    } +} + +void HashSet::Write(IOutputStream* output) const { +    for (const auto& it : *this) { +        *output << it.first << "\n"; +    } +} + +#ifdef TEST_STRHASH +#include <ctime> +#include <fstream> +#include <cstdio> +#include <cstdlib> + +using namespace std; + +int main(int argc, char* argv[]) { +    if (argc < 2) { +        printf("usage: stoplist <stop-words file ...\n"); +        exit(EXIT_FAILURE); // FreeBSD: EX_USAGE +    } +    Hash hash; +    hash.Read(cin); +    for (--argc, ++argv; argc > 0; --argc, ++argv) { +        ifstream input(argv[0]); +        if (!input.good()) { +            perror(argv[0]); +            continue; +        } +        TCiString s; +        while (input >> s) { +            if (!hash.Has(s)) +                cout << s << "\n"; +            else +                cout << "[[" << s << "]]" +                     << "\n"; +        } +    } +    return EXIT_SUCCESS; // EX_OK +} + +#endif diff --git a/library/cpp/containers/str_hash/str_hash.h b/library/cpp/containers/str_hash/str_hash.h new file mode 100644 index 00000000000..25f960dbb5f --- /dev/null +++ b/library/cpp/containers/str_hash/str_hash.h @@ -0,0 +1,181 @@ +#pragma once + +#include <library/cpp/containers/str_map/str_map.h> +#include <library/cpp/charset/ci_string.h> +#include <util/system/yassert.h> +#include <util/memory/tempbuf.h> + +#include <memory> + +class IInputStream; +class IOutputStream; + +template <class T, class Alloc = std::allocator<const char*>> +class Hash; + +struct yvoid { +    yvoid() = default; +}; + +template <typename T, class Alloc> +class Hash: public string_hash<T, ci_hash, ci_equal_to, Alloc> { +    using ci_string_hash = string_hash<T, ci_hash, ci_equal_to, Alloc>; + +protected: +    using ci_string_hash::pool; + +public: +    using size_type = typename ci_string_hash::size_type; +    using const_iterator = typename ci_string_hash::const_iterator; +    using iterator = typename ci_string_hash::iterator; +    using value_type = typename ci_string_hash::value_type; +    using ci_string_hash::begin; +    using ci_string_hash::end; +    using ci_string_hash::find; +    using ci_string_hash::size; + +    Hash() +        : ci_string_hash() +    { +    } +    explicit Hash(size_type theSize) +        : ci_string_hash(theSize, theSize * AVERAGEWORD_BUF) +    { +    } +    Hash(const char** strings, size_type size = 0, T* = 0); // must end with NULL or "\0" +    virtual ~Hash(); +    bool Has(const char* s, size_t len, T* pp = nullptr) const; +    bool Has(const char* s, T* pp = nullptr) const { +        const_iterator it; +        if ((it = find(s)) == end()) +            return false; +        else if (pp) +            *pp = (*it).second; +        return true; +    } +    void Add(const char* s, T data) { +        // in fact it is the same insert_unique as in AddUnique. +        // it's impossible to have _FAST_ version of insert() in 'hash_map' + +        // you have to use 'hash_mmap' to get the _kind_ of desired effect. +        // BUT still there will be "Checks" inside - +        // to make the same keys close to each other (see insert_equal()) +        this->insert_copy(s, data); +    } +    bool AddUniq(const char* s, T data) { +        return this->insert_copy(s, data).second; +    } +    // new function to get rid of allocations completely! -- e.g. in constructors +    void AddPermanent(const char* s, T data) { +        this->insert(value_type(s, data)); +    } +    T Detach(const char* s) { +        iterator it = find(s); +        if (it == end()) +            return T(); +        T data = (*it).second; +        this->erase(it); +        return data; +    } +    size_type NumEntries() const { +        return size(); +    } +    bool ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie = nullptr); +    void Resize(size_type theSize) { +        this->reserve(theSize); +        // no pool resizing here. +    } +    virtual void Clear(); +    char* Pool() { +        if (pool.Size() < 2 || pool.End()[-2] != '\0') +            pool.Append("\0", 1); +        return pool.Begin(); +    } +}; + +template <class T, class Alloc> +Hash<T, Alloc>::Hash(const char** array, size_type theSize, T* data) { +    // must end with NULL or "\0" +    Y_ASSERT(data != nullptr); +    Resize(theSize); +    while (*array && **array) +        AddPermanent(*array++, *data++); +} + +template <class T, class Alloc> +bool Hash<T, Alloc>::Has(const char* s, size_t len, T* pp) const { +    TTempArray<char> buf(len + 1); +    char* const allocated = buf.Data(); +    memcpy(allocated, s, len); +    allocated[len] = '\x00'; +    return Has(allocated, pp); +} + +template <class T, class Alloc> +Hash<T, Alloc>::~Hash() { +    Clear(); +} + +template <class T, class Alloc> +void Hash<T, Alloc>::Clear() { +    ci_string_hash::clear_hash(); // to make the key pool empty +} + +template <class T, class Alloc> +bool Hash<T, Alloc>::ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie) { +    for (const_iterator it = begin(); it != end(); ++it) +        if (!func((*it).first, (*it).second, cookie)) +            return false; +    return true; +} + +class HashSet: public Hash<yvoid> { +public: +    HashSet(const char** array, size_type size = 0); +    HashSet() +        : Hash<yvoid>() +    { +    } +    void Read(IInputStream* input); +    void Write(IOutputStream* output) const; +    void Add(const char* s) { +        // in fact it is the same insert_unique as in AddUnique. +        // it's impossible to have _FAST_ version of insert() in 'hash_map' + +        // you have to use 'hash_mmap' to get the _kind_ of desired effect. +        // BUT still there will be "Checks" inside - +        // to make the same keys close to each other (see insert_equal()) +        insert_copy(s, yvoid()); +    } +    bool AddUniq(const char* s) { +        return insert_copy(s, yvoid()).second; +    } +    // new function to get rid of allocations completely! -- e.g. in constructors +    void AddPermanent(const char* s) { +        insert(value_type(s, yvoid())); +    } +}; + +template <class T, class HashFcn = THash<T>, class EqualKey = TEqualTo<T>, class Alloc = std::allocator<T>> +class TStaticHash: private THashMap<T, T, HashFcn, EqualKey> { +private: +    using TBase = THashMap<T, T, HashFcn, EqualKey>; + +public: +    TStaticHash(T arr[][2], size_t size) { +        TBase::reserve(size); +        while (size) { +            TBase::insert(typename TBase::value_type(arr[0][0], arr[0][1])); +            arr++; +            size--; +        } +    } +    T operator[](const T& key) const { // !!! it is not lvalue nor it used to be +        typename TBase::const_iterator it = TBase::find(key); +        if (it == TBase::end()) +            return nullptr; +        return it->second; +    } +}; + +using TStHash = TStaticHash<const char*, ci_hash, ci_equal_to>; diff --git a/library/cpp/containers/str_hash/ya.make b/library/cpp/containers/str_hash/ya.make new file mode 100644 index 00000000000..f7e24316b98 --- /dev/null +++ b/library/cpp/containers/str_hash/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( +    library/cpp/charset +    library/cpp/containers/str_map +) + +SRCS( +    str_hash.cpp +) + +END()  | 
