aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
committervvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
commitdec41c40e51aa407edef81a3c566a5a15780fc49 (patch)
tree4f197b596b32f35eca368121f0dff913419da9af /library/cpp/containers
parent3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff)
downloadydb-dec41c40e51aa407edef81a3c566a5a15780fc49.tar.gz
YQL-16239 Move purecalc to public
Diffstat (limited to 'library/cpp/containers')
-rw-r--r--library/cpp/containers/CMakeLists.txt1
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt19
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt20
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt20
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.txt17
-rw-r--r--library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt19
-rw-r--r--library/cpp/containers/str_hash/str_hash.cpp60
-rw-r--r--library/cpp/containers/str_hash/str_hash.h181
-rw-r--r--library/cpp/containers/str_hash/ya.make12
9 files changed, 349 insertions, 0 deletions
diff --git a/library/cpp/containers/CMakeLists.txt b/library/cpp/containers/CMakeLists.txt
index 43fcbe83466..40f50138673 100644
--- a/library/cpp/containers/CMakeLists.txt
+++ b/library/cpp/containers/CMakeLists.txt
@@ -20,5 +20,6 @@ add_subdirectory(ring_buffer)
add_subdirectory(sorted_vector)
add_subdirectory(stack_array)
add_subdirectory(stack_vector)
+add_subdirectory(str_hash)
add_subdirectory(str_map)
add_subdirectory(top_keeper)
diff --git a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 00000000000..627814f0ed6
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..cd723cbea23
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt
new file mode 100644
index 00000000000..cd723cbea23
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/CMakeLists.txt b/library/cpp/containers/str_hash/CMakeLists.txt
new file mode 100644
index 00000000000..f8b31df0c11
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt
new file mode 100644
index 00000000000..627814f0ed6
--- /dev/null
+++ b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-containers-str_hash)
+target_link_libraries(cpp-containers-str_hash PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-charset
+ cpp-containers-str_map
+)
+target_sources(cpp-containers-str_hash PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp
+)
diff --git a/library/cpp/containers/str_hash/str_hash.cpp b/library/cpp/containers/str_hash/str_hash.cpp
new file mode 100644
index 00000000000..12986385334
--- /dev/null
+++ b/library/cpp/containers/str_hash/str_hash.cpp
@@ -0,0 +1,60 @@
+#include "str_hash.h"
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/stream/output.h>
+#include <util/stream/input.h>
+
+HashSet::HashSet(const char** array, size_type size) {
+ Resize(size);
+ while (*array && **array)
+ AddPermanent(*array++);
+}
+
+void HashSet::Read(IInputStream* input) {
+ TString s;
+
+ while (input->ReadLine(s)) {
+ AddUniq(TCiString(s).c_str());
+ }
+}
+
+void HashSet::Write(IOutputStream* output) const {
+ for (const auto& it : *this) {
+ *output << it.first << "\n";
+ }
+}
+
+#ifdef TEST_STRHASH
+#include <ctime>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+
+using namespace std;
+
+int main(int argc, char* argv[]) {
+ if (argc < 2) {
+ printf("usage: stoplist <stop-words file ...\n");
+ exit(EXIT_FAILURE); // FreeBSD: EX_USAGE
+ }
+ Hash hash;
+ hash.Read(cin);
+ for (--argc, ++argv; argc > 0; --argc, ++argv) {
+ ifstream input(argv[0]);
+ if (!input.good()) {
+ perror(argv[0]);
+ continue;
+ }
+ TCiString s;
+ while (input >> s) {
+ if (!hash.Has(s))
+ cout << s << "\n";
+ else
+ cout << "[[" << s << "]]"
+ << "\n";
+ }
+ }
+ return EXIT_SUCCESS; // EX_OK
+}
+
+#endif
diff --git a/library/cpp/containers/str_hash/str_hash.h b/library/cpp/containers/str_hash/str_hash.h
new file mode 100644
index 00000000000..25f960dbb5f
--- /dev/null
+++ b/library/cpp/containers/str_hash/str_hash.h
@@ -0,0 +1,181 @@
+#pragma once
+
+#include <library/cpp/containers/str_map/str_map.h>
+#include <library/cpp/charset/ci_string.h>
+#include <util/system/yassert.h>
+#include <util/memory/tempbuf.h>
+
+#include <memory>
+
+class IInputStream;
+class IOutputStream;
+
+template <class T, class Alloc = std::allocator<const char*>>
+class Hash;
+
+struct yvoid {
+ yvoid() = default;
+};
+
+template <typename T, class Alloc>
+class Hash: public string_hash<T, ci_hash, ci_equal_to, Alloc> {
+ using ci_string_hash = string_hash<T, ci_hash, ci_equal_to, Alloc>;
+
+protected:
+ using ci_string_hash::pool;
+
+public:
+ using size_type = typename ci_string_hash::size_type;
+ using const_iterator = typename ci_string_hash::const_iterator;
+ using iterator = typename ci_string_hash::iterator;
+ using value_type = typename ci_string_hash::value_type;
+ using ci_string_hash::begin;
+ using ci_string_hash::end;
+ using ci_string_hash::find;
+ using ci_string_hash::size;
+
+ Hash()
+ : ci_string_hash()
+ {
+ }
+ explicit Hash(size_type theSize)
+ : ci_string_hash(theSize, theSize * AVERAGEWORD_BUF)
+ {
+ }
+ Hash(const char** strings, size_type size = 0, T* = 0); // must end with NULL or "\0"
+ virtual ~Hash();
+ bool Has(const char* s, size_t len, T* pp = nullptr) const;
+ bool Has(const char* s, T* pp = nullptr) const {
+ const_iterator it;
+ if ((it = find(s)) == end())
+ return false;
+ else if (pp)
+ *pp = (*it).second;
+ return true;
+ }
+ void Add(const char* s, T data) {
+ // in fact it is the same insert_unique as in AddUnique.
+ // it's impossible to have _FAST_ version of insert() in 'hash_map'
+
+ // you have to use 'hash_mmap' to get the _kind_ of desired effect.
+ // BUT still there will be "Checks" inside -
+ // to make the same keys close to each other (see insert_equal())
+ this->insert_copy(s, data);
+ }
+ bool AddUniq(const char* s, T data) {
+ return this->insert_copy(s, data).second;
+ }
+ // new function to get rid of allocations completely! -- e.g. in constructors
+ void AddPermanent(const char* s, T data) {
+ this->insert(value_type(s, data));
+ }
+ T Detach(const char* s) {
+ iterator it = find(s);
+ if (it == end())
+ return T();
+ T data = (*it).second;
+ this->erase(it);
+ return data;
+ }
+ size_type NumEntries() const {
+ return size();
+ }
+ bool ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie = nullptr);
+ void Resize(size_type theSize) {
+ this->reserve(theSize);
+ // no pool resizing here.
+ }
+ virtual void Clear();
+ char* Pool() {
+ if (pool.Size() < 2 || pool.End()[-2] != '\0')
+ pool.Append("\0", 1);
+ return pool.Begin();
+ }
+};
+
+template <class T, class Alloc>
+Hash<T, Alloc>::Hash(const char** array, size_type theSize, T* data) {
+ // must end with NULL or "\0"
+ Y_ASSERT(data != nullptr);
+ Resize(theSize);
+ while (*array && **array)
+ AddPermanent(*array++, *data++);
+}
+
+template <class T, class Alloc>
+bool Hash<T, Alloc>::Has(const char* s, size_t len, T* pp) const {
+ TTempArray<char> buf(len + 1);
+ char* const allocated = buf.Data();
+ memcpy(allocated, s, len);
+ allocated[len] = '\x00';
+ return Has(allocated, pp);
+}
+
+template <class T, class Alloc>
+Hash<T, Alloc>::~Hash() {
+ Clear();
+}
+
+template <class T, class Alloc>
+void Hash<T, Alloc>::Clear() {
+ ci_string_hash::clear_hash(); // to make the key pool empty
+}
+
+template <class T, class Alloc>
+bool Hash<T, Alloc>::ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie) {
+ for (const_iterator it = begin(); it != end(); ++it)
+ if (!func((*it).first, (*it).second, cookie))
+ return false;
+ return true;
+}
+
+class HashSet: public Hash<yvoid> {
+public:
+ HashSet(const char** array, size_type size = 0);
+ HashSet()
+ : Hash<yvoid>()
+ {
+ }
+ void Read(IInputStream* input);
+ void Write(IOutputStream* output) const;
+ void Add(const char* s) {
+ // in fact it is the same insert_unique as in AddUnique.
+ // it's impossible to have _FAST_ version of insert() in 'hash_map'
+
+ // you have to use 'hash_mmap' to get the _kind_ of desired effect.
+ // BUT still there will be "Checks" inside -
+ // to make the same keys close to each other (see insert_equal())
+ insert_copy(s, yvoid());
+ }
+ bool AddUniq(const char* s) {
+ return insert_copy(s, yvoid()).second;
+ }
+ // new function to get rid of allocations completely! -- e.g. in constructors
+ void AddPermanent(const char* s) {
+ insert(value_type(s, yvoid()));
+ }
+};
+
+template <class T, class HashFcn = THash<T>, class EqualKey = TEqualTo<T>, class Alloc = std::allocator<T>>
+class TStaticHash: private THashMap<T, T, HashFcn, EqualKey> {
+private:
+ using TBase = THashMap<T, T, HashFcn, EqualKey>;
+
+public:
+ TStaticHash(T arr[][2], size_t size) {
+ TBase::reserve(size);
+ while (size) {
+ TBase::insert(typename TBase::value_type(arr[0][0], arr[0][1]));
+ arr++;
+ size--;
+ }
+ }
+ T operator[](const T& key) const { // !!! it is not lvalue nor it used to be
+ typename TBase::const_iterator it = TBase::find(key);
+ if (it == TBase::end())
+ return nullptr;
+ return it->second;
+ }
+};
+
+using TStHash = TStaticHash<const char*, ci_hash, ci_equal_to>;
diff --git a/library/cpp/containers/str_hash/ya.make b/library/cpp/containers/str_hash/ya.make
new file mode 100644
index 00000000000..f7e24316b98
--- /dev/null
+++ b/library/cpp/containers/str_hash/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/charset
+ library/cpp/containers/str_map
+)
+
+SRCS(
+ str_hash.cpp
+)
+
+END()