diff options
author | vvvv <vvvv@ydb.tech> | 2023-07-31 20:07:26 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-07-31 20:07:26 +0300 |
commit | f9e4743508b7930e884714cc99985ac45f84ed98 (patch) | |
tree | a1290261a4915a6f607e110e2cc27aee4c205f85 /library/cpp | |
parent | 5cf9beeab3ea847da0b6c414fcb5faa9cb041317 (diff) | |
download | ydb-f9e4743508b7930e884714cc99985ac45f84ed98.tar.gz |
Use UDFs from YDB
Diffstat (limited to 'library/cpp')
286 files changed, 0 insertions, 23598 deletions
diff --git a/library/cpp/CMakeLists.darwin-x86_64.txt b/library/cpp/CMakeLists.darwin-x86_64.txt index 5497fd21be..772027a342 100644 --- a/library/cpp/CMakeLists.darwin-x86_64.txt +++ b/library/cpp/CMakeLists.darwin-x86_64.txt @@ -36,9 +36,6 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) -add_subdirectory(geo) -add_subdirectory(geobase) -add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -47,11 +44,9 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) -add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) -add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -60,7 +55,6 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) -add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -74,8 +68,6 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) -add_subdirectory(reverse_geocoder) -add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -98,7 +90,6 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) -add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.linux-aarch64.txt b/library/cpp/CMakeLists.linux-aarch64.txt index 5e93629802..cd50b0e3a4 100644 --- a/library/cpp/CMakeLists.linux-aarch64.txt +++ b/library/cpp/CMakeLists.linux-aarch64.txt @@ -35,9 +35,6 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) -add_subdirectory(geo) -add_subdirectory(geobase) -add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -46,11 +43,9 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) -add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) -add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -59,7 +54,6 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) -add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -73,8 +67,6 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) -add_subdirectory(reverse_geocoder) -add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -97,7 +89,6 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) -add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.linux-x86_64.txt b/library/cpp/CMakeLists.linux-x86_64.txt index 5497fd21be..772027a342 100644 --- a/library/cpp/CMakeLists.linux-x86_64.txt +++ b/library/cpp/CMakeLists.linux-x86_64.txt @@ -36,9 +36,6 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) -add_subdirectory(geo) -add_subdirectory(geobase) -add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -47,11 +44,9 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) -add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) -add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -60,7 +55,6 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) -add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -74,8 +68,6 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) -add_subdirectory(reverse_geocoder) -add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -98,7 +90,6 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) -add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.windows-x86_64.txt b/library/cpp/CMakeLists.windows-x86_64.txt index 5497fd21be..772027a342 100644 --- a/library/cpp/CMakeLists.windows-x86_64.txt +++ b/library/cpp/CMakeLists.windows-x86_64.txt @@ -36,9 +36,6 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) -add_subdirectory(geo) -add_subdirectory(geobase) -add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -47,11 +44,9 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) -add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) -add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -60,7 +55,6 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) -add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -74,8 +68,6 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) -add_subdirectory(reverse_geocoder) -add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -98,7 +90,6 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) -add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/containers/CMakeLists.txt b/library/cpp/containers/CMakeLists.txt index 40f5013867..43fcbe8346 100644 --- a/library/cpp/containers/CMakeLists.txt +++ b/library/cpp/containers/CMakeLists.txt @@ -20,6 +20,5 @@ add_subdirectory(ring_buffer) add_subdirectory(sorted_vector) add_subdirectory(stack_array) add_subdirectory(stack_vector) -add_subdirectory(str_hash) add_subdirectory(str_map) add_subdirectory(top_keeper) diff --git a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 627814f0ed..0000000000 --- a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-containers-str_hash) -target_link_libraries(cpp-containers-str_hash PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset - cpp-containers-str_map -) -target_sources(cpp-containers-str_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp -) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt deleted file mode 100644 index cd723cbea2..0000000000 --- a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-containers-str_hash) -target_link_libraries(cpp-containers-str_hash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset - cpp-containers-str_map -) -target_sources(cpp-containers-str_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp -) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt deleted file mode 100644 index cd723cbea2..0000000000 --- a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-containers-str_hash) -target_link_libraries(cpp-containers-str_hash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset - cpp-containers-str_map -) -target_sources(cpp-containers-str_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp -) diff --git a/library/cpp/containers/str_hash/CMakeLists.txt b/library/cpp/containers/str_hash/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/containers/str_hash/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 627814f0ed..0000000000 --- a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-containers-str_hash) -target_link_libraries(cpp-containers-str_hash PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset - cpp-containers-str_map -) -target_sources(cpp-containers-str_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp -) diff --git a/library/cpp/containers/str_hash/str_hash.cpp b/library/cpp/containers/str_hash/str_hash.cpp deleted file mode 100644 index 1298638533..0000000000 --- a/library/cpp/containers/str_hash/str_hash.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "str_hash.h" - -#include <library/cpp/charset/ci_string.h> -#include <util/stream/output.h> -#include <util/stream/input.h> - -HashSet::HashSet(const char** array, size_type size) { - Resize(size); - while (*array && **array) - AddPermanent(*array++); -} - -void HashSet::Read(IInputStream* input) { - TString s; - - while (input->ReadLine(s)) { - AddUniq(TCiString(s).c_str()); - } -} - -void HashSet::Write(IOutputStream* output) const { - for (const auto& it : *this) { - *output << it.first << "\n"; - } -} - -#ifdef TEST_STRHASH -#include <ctime> -#include <fstream> -#include <cstdio> -#include <cstdlib> - -using namespace std; - -int main(int argc, char* argv[]) { - if (argc < 2) { - printf("usage: stoplist <stop-words file ...\n"); - exit(EXIT_FAILURE); // FreeBSD: EX_USAGE - } - Hash hash; - hash.Read(cin); - for (--argc, ++argv; argc > 0; --argc, ++argv) { - ifstream input(argv[0]); - if (!input.good()) { - perror(argv[0]); - continue; - } - TCiString s; - while (input >> s) { - if (!hash.Has(s)) - cout << s << "\n"; - else - cout << "[[" << s << "]]" - << "\n"; - } - } - return EXIT_SUCCESS; // EX_OK -} - -#endif diff --git a/library/cpp/containers/str_hash/str_hash.h b/library/cpp/containers/str_hash/str_hash.h deleted file mode 100644 index 25f960dbb5..0000000000 --- a/library/cpp/containers/str_hash/str_hash.h +++ /dev/null @@ -1,181 +0,0 @@ -#pragma once - -#include <library/cpp/containers/str_map/str_map.h> -#include <library/cpp/charset/ci_string.h> -#include <util/system/yassert.h> -#include <util/memory/tempbuf.h> - -#include <memory> - -class IInputStream; -class IOutputStream; - -template <class T, class Alloc = std::allocator<const char*>> -class Hash; - -struct yvoid { - yvoid() = default; -}; - -template <typename T, class Alloc> -class Hash: public string_hash<T, ci_hash, ci_equal_to, Alloc> { - using ci_string_hash = string_hash<T, ci_hash, ci_equal_to, Alloc>; - -protected: - using ci_string_hash::pool; - -public: - using size_type = typename ci_string_hash::size_type; - using const_iterator = typename ci_string_hash::const_iterator; - using iterator = typename ci_string_hash::iterator; - using value_type = typename ci_string_hash::value_type; - using ci_string_hash::begin; - using ci_string_hash::end; - using ci_string_hash::find; - using ci_string_hash::size; - - Hash() - : ci_string_hash() - { - } - explicit Hash(size_type theSize) - : ci_string_hash(theSize, theSize * AVERAGEWORD_BUF) - { - } - Hash(const char** strings, size_type size = 0, T* = 0); // must end with NULL or "\0" - virtual ~Hash(); - bool Has(const char* s, size_t len, T* pp = nullptr) const; - bool Has(const char* s, T* pp = nullptr) const { - const_iterator it; - if ((it = find(s)) == end()) - return false; - else if (pp) - *pp = (*it).second; - return true; - } - void Add(const char* s, T data) { - // in fact it is the same insert_unique as in AddUnique. - // it's impossible to have _FAST_ version of insert() in 'hash_map' - - // you have to use 'hash_mmap' to get the _kind_ of desired effect. - // BUT still there will be "Checks" inside - - // to make the same keys close to each other (see insert_equal()) - this->insert_copy(s, data); - } - bool AddUniq(const char* s, T data) { - return this->insert_copy(s, data).second; - } - // new function to get rid of allocations completely! -- e.g. in constructors - void AddPermanent(const char* s, T data) { - this->insert(value_type(s, data)); - } - T Detach(const char* s) { - iterator it = find(s); - if (it == end()) - return T(); - T data = (*it).second; - this->erase(it); - return data; - } - size_type NumEntries() const { - return size(); - } - bool ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie = nullptr); - void Resize(size_type theSize) { - this->reserve(theSize); - // no pool resizing here. - } - virtual void Clear(); - char* Pool() { - if (pool.Size() < 2 || pool.End()[-2] != '\0') - pool.Append("\0", 1); - return pool.Begin(); - } -}; - -template <class T, class Alloc> -Hash<T, Alloc>::Hash(const char** array, size_type theSize, T* data) { - // must end with NULL or "\0" - Y_ASSERT(data != nullptr); - Resize(theSize); - while (*array && **array) - AddPermanent(*array++, *data++); -} - -template <class T, class Alloc> -bool Hash<T, Alloc>::Has(const char* s, size_t len, T* pp) const { - TTempArray<char> buf(len + 1); - char* const allocated = buf.Data(); - memcpy(allocated, s, len); - allocated[len] = '\x00'; - return Has(allocated, pp); -} - -template <class T, class Alloc> -Hash<T, Alloc>::~Hash() { - Clear(); -} - -template <class T, class Alloc> -void Hash<T, Alloc>::Clear() { - ci_string_hash::clear_hash(); // to make the key pool empty -} - -template <class T, class Alloc> -bool Hash<T, Alloc>::ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie) { - for (const_iterator it = begin(); it != end(); ++it) - if (!func((*it).first, (*it).second, cookie)) - return false; - return true; -} - -class HashSet: public Hash<yvoid> { -public: - HashSet(const char** array, size_type size = 0); - HashSet() - : Hash<yvoid>() - { - } - void Read(IInputStream* input); - void Write(IOutputStream* output) const; - void Add(const char* s) { - // in fact it is the same insert_unique as in AddUnique. - // it's impossible to have _FAST_ version of insert() in 'hash_map' - - // you have to use 'hash_mmap' to get the _kind_ of desired effect. - // BUT still there will be "Checks" inside - - // to make the same keys close to each other (see insert_equal()) - insert_copy(s, yvoid()); - } - bool AddUniq(const char* s) { - return insert_copy(s, yvoid()).second; - } - // new function to get rid of allocations completely! -- e.g. in constructors - void AddPermanent(const char* s) { - insert(value_type(s, yvoid())); - } -}; - -template <class T, class HashFcn = THash<T>, class EqualKey = TEqualTo<T>, class Alloc = std::allocator<T>> -class TStaticHash: private THashMap<T, T, HashFcn, EqualKey> { -private: - using TBase = THashMap<T, T, HashFcn, EqualKey>; - -public: - TStaticHash(T arr[][2], size_t size) { - TBase::reserve(size); - while (size) { - TBase::insert(typename TBase::value_type(arr[0][0], arr[0][1])); - arr++; - size--; - } - } - T operator[](const T& key) const { // !!! it is not lvalue nor it used to be - typename TBase::const_iterator it = TBase::find(key); - if (it == TBase::end()) - return nullptr; - return it->second; - } -}; - -using TStHash = TStaticHash<const char*, ci_hash, ci_equal_to>; diff --git a/library/cpp/containers/str_hash/ya.make b/library/cpp/containers/str_hash/ya.make deleted file mode 100644 index f7e24316b9..0000000000 --- a/library/cpp/containers/str_hash/ya.make +++ /dev/null @@ -1,12 +0,0 @@ -LIBRARY() - -PEERDIR( - library/cpp/charset - library/cpp/containers/str_map -) - -SRCS( - str_hash.cpp -) - -END() diff --git a/library/cpp/deprecated/CMakeLists.txt b/library/cpp/deprecated/CMakeLists.txt index 765ea6aad7..ad818e3662 100644 --- a/library/cpp/deprecated/CMakeLists.txt +++ b/library/cpp/deprecated/CMakeLists.txt @@ -8,10 +8,6 @@ add_subdirectory(accessors) add_subdirectory(atomic) -add_subdirectory(autoarray) -add_subdirectory(datafile) add_subdirectory(enum_codegen) -add_subdirectory(fgood) add_subdirectory(kmp) -add_subdirectory(mapped_file) add_subdirectory(split) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index f2a246218c..0000000000 --- a/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-autoarray) -target_link_libraries(cpp-deprecated-autoarray PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-autoarray PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp -) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 2411a48cd3..0000000000 --- a/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-autoarray) -target_link_libraries(cpp-deprecated-autoarray PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-autoarray PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp -) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 2411a48cd3..0000000000 --- a/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-autoarray) -target_link_libraries(cpp-deprecated-autoarray PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-autoarray PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp -) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.txt b/library/cpp/deprecated/autoarray/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/deprecated/autoarray/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt deleted file mode 100644 index f2a246218c..0000000000 --- a/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-autoarray) -target_link_libraries(cpp-deprecated-autoarray PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-autoarray PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp -) diff --git a/library/cpp/deprecated/autoarray/README.md b/library/cpp/deprecated/autoarray/README.md deleted file mode 100644 index 1d83147cee..0000000000 --- a/library/cpp/deprecated/autoarray/README.md +++ /dev/null @@ -1,3 +0,0 @@ -Pre-C++11 vector-like container. - -Just use std::vector. If you need to fill your vector with custom-constructed data, use reserve+emplace_back (but make sure that your elements are movable). diff --git a/library/cpp/deprecated/autoarray/autoarray.cpp b/library/cpp/deprecated/autoarray/autoarray.cpp deleted file mode 100644 index 15167f27f6..0000000000 --- a/library/cpp/deprecated/autoarray/autoarray.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "autoarray.h" diff --git a/library/cpp/deprecated/autoarray/autoarray.h b/library/cpp/deprecated/autoarray/autoarray.h deleted file mode 100644 index 2aa12c5916..0000000000 --- a/library/cpp/deprecated/autoarray/autoarray.h +++ /dev/null @@ -1,264 +0,0 @@ -#pragma once - -#include <util/system/compat.h> -#include <util/system/yassert.h> -#include <util/system/defaults.h> -#include <util/system/sys_alloc.h> - -#include <util/generic/typetraits.h> -#include <utility> - -#include <new> -#include <util/generic/noncopyable.h> - -struct autoarray_getindex { - autoarray_getindex() = default; -}; - -struct aarr_b0 { - aarr_b0() = default; -}; - -struct aarr_nofill { - aarr_nofill() = default; -}; - -template <typename T> -struct ynd_type_traits { - enum { - empty_destructor = TTypeTraits<T>::IsPod, - }; -}; - -template <class T> -class autoarray : TNonCopyable { -protected: - T* arr; - size_t _size; - -private: - void AllocBuf(size_t siz) { - arr = nullptr; - _size = 0; - if (siz) { - arr = (T*)y_allocate(sizeof(T) * siz); - _size = siz; - } - } - -public: - using value_type = T; - using iterator = T*; - using const_iterator = const T*; - - autoarray() - : arr(nullptr) - , _size(0) - { - } - autoarray(size_t siz) { - AllocBuf(siz); - T* curr = arr; - try { - for (T* end = arr + _size; curr != end; ++curr) - new (curr) T(); - } catch (...) { - for (--curr; curr >= arr; --curr) - curr->~T(); - y_deallocate(arr); - throw; - } - } - template <class A> - explicit autoarray(size_t siz, A& fill) { - AllocBuf(siz); - T* curr = arr; - try { - for (T* end = arr + _size; curr != end; ++curr) - new (curr) T(fill); - } catch (...) { - for (--curr; curr >= arr; --curr) - curr->~T(); - y_deallocate(arr); - throw; - } - } - explicit autoarray(size_t siz, autoarray_getindex) { - AllocBuf(siz); - size_t nCurrent = 0; - try { - for (nCurrent = 0; nCurrent < _size; ++nCurrent) - new (&arr[nCurrent]) T(nCurrent); - } catch (...) { - for (size_t n = 0; n < nCurrent; ++n) - arr[n].~T(); - y_deallocate(arr); - throw; - } - } - explicit autoarray(size_t siz, aarr_b0) { - AllocBuf(siz); - memset(arr, 0, _size * sizeof(T)); - } - explicit autoarray(size_t siz, aarr_nofill) { - AllocBuf(siz); - } - template <class A> - explicit autoarray(const A* fill, size_t siz) { - AllocBuf(siz); - size_t nCurrent = 0; - try { - for (nCurrent = 0; nCurrent < _size; ++nCurrent) - new (&arr[nCurrent]) T(fill[nCurrent]); - } catch (...) { - for (size_t n = 0; n < nCurrent; ++n) - arr[n].~T(); - y_deallocate(arr); - throw; - } - } - template <class A, class B> - explicit autoarray(const A* fill, const B* cfill, size_t siz) { - AllocBuf(siz); - size_t nCurrent = 0; - try { - for (nCurrent = 0; nCurrent < _size; ++nCurrent) - new (&arr[nCurrent]) T(fill[nCurrent], cfill); - } catch (...) { - for (size_t n = 0; n < nCurrent; ++n) - arr[n].~T(); - y_deallocate(arr); - throw; - } - } - template <class A> - explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz) { - AllocBuf(fullsiz); - size_t nCurrent = 0; - try { - for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent) - new (&arr[nCurrent]) T(fill[nCurrent]); - for (; nCurrent < _size; ++nCurrent) - new (&arr[nCurrent]) T(); - } catch (...) { - for (size_t n = 0; n < nCurrent; ++n) - arr[n].~T(); - y_deallocate(arr); - throw; - } - } - template <class A> - explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz, const T& dummy) { - AllocBuf(fullsiz); - size_t nCurrent = 0; - try { - for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent) - new (&arr[nCurrent]) T(fill[nCurrent]); - for (; nCurrent < _size; ++nCurrent) - new (&arr[nCurrent]) T(dummy); - } catch (...) { - for (size_t n = 0; n < nCurrent; ++n) - arr[n].~T(); - y_deallocate(arr); - throw; - } - } - - template <class... R> - explicit autoarray(size_t siz, R&&... fill) { - AllocBuf(siz); - T* curr = arr; - try { - for (T* end = arr + _size; curr != end; ++curr) - new (curr) T(std::forward<R>(fill)...); - } catch (...) { - for (--curr; curr >= arr; --curr) - curr->~T(); - y_deallocate(arr); - throw; - } - } - ~autoarray() { - if (_size) { - if (!ynd_type_traits<T>::empty_destructor) - for (T *curr = arr, *end = arr + _size; curr != end; ++curr) - curr->~T(); - y_deallocate(arr); - } - } - T& operator[](size_t pos) { - Y_ASSERT(pos < _size); - return arr[pos]; - } - const T& operator[](size_t pos) const { - Y_ASSERT(pos < _size); - return arr[pos]; - } - size_t size() const { - return _size; - } - void swap(autoarray& with) { - T* tmp_arr = arr; - size_t tmp_size = _size; - arr = with.arr; - _size = with._size; - with.arr = tmp_arr; - with._size = tmp_size; - } - void resize(size_t siz) { - autoarray<T> tmp(arr, _size, siz); - swap(tmp); - } - void resize(size_t siz, const T& dummy) { - autoarray<T> tmp(arr, _size, siz, dummy); - swap(tmp); - } - T* rawpointer() { - return arr; - } - const T* operator~() const { - return arr; - } - T* begin() { - return arr; - } - T* end() { - return arr + _size; - } - T& back() { - Y_ASSERT(_size); - return arr[_size - 1]; - } - bool empty() const { - return !_size; - } - bool operator!() const { - return !_size; - } - size_t operator+() const { - return _size; - } - const T* begin() const { - return arr; - } - const T* end() const { - return arr + _size; - } - const T& back() const { - Y_ASSERT(_size); - return arr[_size - 1]; - } - //operator T*() { return arr; } -}; - -template <class T> -inline bool operator==(const autoarray<T>& a, const autoarray<T>& b) { - size_t count = a.size(); - if (count != b.size()) - return false; - for (size_t i = 0; i < count; ++i) { - if (a[i] != b[i]) - return false; - } - return true; -} diff --git a/library/cpp/deprecated/autoarray/ya.make b/library/cpp/deprecated/autoarray/ya.make deleted file mode 100644 index 4b055f8c29..0000000000 --- a/library/cpp/deprecated/autoarray/ya.make +++ /dev/null @@ -1,7 +0,0 @@ -LIBRARY() - -SRCS( - autoarray.cpp -) - -END() diff --git a/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 3f88f788da..0000000000 --- a/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-datafile) -target_link_libraries(cpp-deprecated-datafile PUBLIC - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-deprecated-datafile PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp -) diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 43da9ae45a..0000000000 --- a/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-datafile) -target_link_libraries(cpp-deprecated-datafile PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-deprecated-datafile PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp -) diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 43da9ae45a..0000000000 --- a/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-datafile) -target_link_libraries(cpp-deprecated-datafile PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-deprecated-datafile PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp -) diff --git a/library/cpp/deprecated/datafile/CMakeLists.txt b/library/cpp/deprecated/datafile/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/deprecated/datafile/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 3f88f788da..0000000000 --- a/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-datafile) -target_link_libraries(cpp-deprecated-datafile PUBLIC - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-deprecated-datafile PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp -) diff --git a/library/cpp/deprecated/datafile/README.md b/library/cpp/deprecated/datafile/README.md deleted file mode 100644 index 7f8547108e..0000000000 --- a/library/cpp/deprecated/datafile/README.md +++ /dev/null @@ -1,3 +0,0 @@ -A wrapper on top of some user-defined custom file format. - -Just write your own if you need it. It's going to be way easier than figuring out how to use this one. diff --git a/library/cpp/deprecated/datafile/datafile.cpp b/library/cpp/deprecated/datafile/datafile.cpp deleted file mode 100644 index ff93f11c6b..0000000000 --- a/library/cpp/deprecated/datafile/datafile.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "datafile.h" - -void TDataFileBase::DoLoad(const char* fname, int loadMode) { - Destroy(); - TFile f(fname, RdOnly); - DoLoad(f, loadMode, nullptr, 0); -} - -void TDataFileBase::DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize) { - if (hdrPtr) { - if (loadMode & DLM_EXACT_SIZE && f.GetLength() != (i64)Length) - throw yexception() << f.GetName() << " size does not match its header value"; - } else { - Length = f.GetLength(); - hdrSize = 0; - } - if ((loadMode & DLM_LD_TYPE_MASK) == DLM_READ) { - MemData = TVector<char>(Length); - memcpy(MemData.begin(), hdrPtr, hdrSize); - f.Load(MemData.begin() + hdrSize, Length - hdrSize); - Start = MemData.begin(); - } else { - FileData.init(f); - if (FileData.getSize() < Length) - throw yexception() << f.GetName() << " is smaller than what its header value says"; - if ((loadMode & DLM_LD_TYPE_MASK) == DLM_MMAP_PRC) - FileData.precharge(); - Start = (const char*)FileData.getData(); - } -} - -void TDataFileBase::Destroy() { - TVector<char>().swap(MemData); - FileData.term(); - Start = nullptr; - Length = 0; -} - -void TDataFileBase::Precharge() const { - if (Length && Start == (char*)FileData.getData()) - FileData.precharge(); -} diff --git a/library/cpp/deprecated/datafile/datafile.h b/library/cpp/deprecated/datafile/datafile.h deleted file mode 100644 index a438baceca..0000000000 --- a/library/cpp/deprecated/datafile/datafile.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include "loadmode.h" - -#include <library/cpp/deprecated/mapped_file/mapped_file.h> - -#include <util/generic/vector.h> -#include <util/system/file.h> -#include <util/system/filemap.h> - -/** Simple helper that allows a file to be either mapped or read into malloc'ed memory. - This behaviour is controlled by EDataLoadMode enum defined in loadmode.h. - Unlike TBlob it provides Precharge() function and simple file size - based integrity check. - - To use this code, inherit your class from TDataFile<TFileHeader>. - TFileHeader must be a pod-type structure with byte layout of the file header. - File must start with that header. - TFileHeader must have FileSize() member function that determines expected file size or - length of data that need to be read from the beginning of file. - */ - -class TDataFileBase { -protected: - TVector<char> MemData; - TMappedFile FileData; - - const char* Start; - size_t Length; - - TDataFileBase() - : Start(nullptr) - , Length(0) - { - } - - void DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize); - void DoLoad(const char* fname, int loadMode); // just whole file - void Destroy(); - void swap(TDataFileBase& with) { - MemData.swap(with.MemData); - FileData.swap(with.FileData); - DoSwap(Start, with.Start); - DoSwap(Length, with.Length); - } - -public: - void Precharge() const; -}; - -template <class TFileHeader> -class TDataFile: public TDataFileBase { -protected: - void Load(const char* fname, EDataLoadMode loadMode) { - Destroy(); - TFile f(fname, RdOnly | Seq); - TFileHeader hdr; - f.Load(&hdr, sizeof(hdr)); - Length = hdr.FileSize(); - DoLoad(f, (int)loadMode, &hdr, sizeof(hdr)); - } - const TFileHeader& Hdr() const { - return *(TFileHeader*)Start; - } -}; - -// Use: class TFoo: public TDataFileEx<Foo> {...}; -// Additional requrement: TFileHeader must have Validate(fname) function that throws exception. -// Class TUser itself must have Init(fname) function -// Adds Load() function to your class (TUser) -template <class TUser, class TFileHeader> -class TDataFileEx: public TDataFile<TFileHeader> { -private: - using TBase = TDataFile<TFileHeader>; - TUser& User() const { - return *(TUser*)this; - } - -public: - TDataFileEx(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) { - if (fname) - Load(fname, loadMode); - } - void Load(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) { - TBase::Load(fname, loadMode); - TBase::Hdr().Validate(fname); - User().Init(fname); - } -}; diff --git a/library/cpp/deprecated/datafile/loadmode.cpp b/library/cpp/deprecated/datafile/loadmode.cpp deleted file mode 100644 index a857830326..0000000000 --- a/library/cpp/deprecated/datafile/loadmode.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "loadmode.h" diff --git a/library/cpp/deprecated/datafile/loadmode.h b/library/cpp/deprecated/datafile/loadmode.h deleted file mode 100644 index f04054dd64..0000000000 --- a/library/cpp/deprecated/datafile/loadmode.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -// It is recommended to support all reasonal value combinations via this enum, -// to let Load() function argument be of EDataLoadMode type, not just int type - -enum EDataLoadMode { - DLM_READ = 0, - DLM_MMAP_PRC = 1, // precharge - DLM_MMAP = 2, // w/o precharge - DLM_MMAP_AUTO_PRC = 3, // precharge automatically (same as DLM_MMAP unless specifically supported) - DLM_LD_TYPE_MASK = 15, - DLM_EXACT_SIZE = 16, // fail if input file is larger than what header says - - DLM_READ_ESZ = DLM_READ | DLM_EXACT_SIZE, - DLM_MMAP_PRC_ESZ = DLM_MMAP_PRC | DLM_EXACT_SIZE, - DLM_MMAP_ESZ = DLM_MMAP | DLM_EXACT_SIZE, - DLM_MMAP_APRC_ESZ = DLM_MMAP_AUTO_PRC | DLM_EXACT_SIZE, - - DLM_DEFAULT = DLM_MMAP_PRC_ESZ, -}; diff --git a/library/cpp/deprecated/datafile/ya.make b/library/cpp/deprecated/datafile/ya.make deleted file mode 100644 index 1ad4fe9bc7..0000000000 --- a/library/cpp/deprecated/datafile/ya.make +++ /dev/null @@ -1,12 +0,0 @@ -LIBRARY() - -SRCS( - datafile.cpp - loadmode.cpp -) - -PEERDIR( - library/cpp/deprecated/mapped_file -) - -END() diff --git a/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index a82750e559..0000000000 --- a/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-fgood) -target_link_libraries(cpp-deprecated-fgood PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-fgood PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp -) diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 52e29348fd..0000000000 --- a/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-fgood) -target_link_libraries(cpp-deprecated-fgood PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-fgood PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp -) diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 52e29348fd..0000000000 --- a/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-fgood) -target_link_libraries(cpp-deprecated-fgood PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-fgood PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp -) diff --git a/library/cpp/deprecated/fgood/CMakeLists.txt b/library/cpp/deprecated/fgood/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/deprecated/fgood/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt deleted file mode 100644 index a82750e559..0000000000 --- a/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-fgood) -target_link_libraries(cpp-deprecated-fgood PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-fgood PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp -) diff --git a/library/cpp/deprecated/fgood/README.md b/library/cpp/deprecated/fgood/README.md deleted file mode 100644 index 4f66289657..0000000000 --- a/library/cpp/deprecated/fgood/README.md +++ /dev/null @@ -1,15 +0,0 @@ -Some ancient wrappers on top of FILE*, and some string manupulation functions. - -Alternatives are as follows. - -For TFILEPtr. Use TIFStream or TOFStream if you need IO. For some rare use cases a TFileMap might also do. - -For fput/fget/getline. Use streams API. - -For struct ffb and struct prnstr. Just don't use them. Even if you can figure out what they do. - -For sf family of functions and TLineSplitter. Just use Split* from util/string/split.h - -For TSFReader. Use TMapTsvFile. - -For read_or_die family of functions. Use streams API. diff --git a/library/cpp/deprecated/fgood/ffb.cpp b/library/cpp/deprecated/fgood/ffb.cpp deleted file mode 100644 index aa9da861a6..0000000000 --- a/library/cpp/deprecated/fgood/ffb.cpp +++ /dev/null @@ -1,407 +0,0 @@ -#include "ffb.h" - -#include <util/string/util.h> // str_spn -#include <util/system/compat.h> -#include <util/generic/yexception.h> - -#include <cstdio> -#include <algorithm> - -#include <ctype.h> - -#ifdef _win_ -#include <io.h> -#else -#include <unistd.h> -#endif - -ffb::ffb(FILE* file) - : TFILEPtr(file) -{ - if (file && !isatty(fileno(file)) && BUFSIZ < 512 * 1024) - setvbuf(file, nullptr, _IOFBF, 512 * 1024); -} - -void ffb::operator=(FILE* f) { - TFILEPtr::operator=(f); - if (f && !isatty(fileno(f)) && BUFSIZ < 512 * 1024) - setvbuf(f, nullptr, _IOFBF, 512 * 1024); -} - -void ffb::open(const char* name, const char* mode) { - TFILEPtr::open(name, mode); - if (!isatty(fileno(*this)) && BUFSIZ < 512 * 1024) - setvbuf(*this, nullptr, _IOFBF, 512 * 1024); -} - -int sf(char** fb, char* buf) { //don't want to call sf(fb, buf, 32) - if (!(*buf && *buf != 10)) { - *fb = nullptr; - return 0; - } - int n = 1; - fb[0] = buf; - while (*buf && *buf != 10 && n < 31) { - if (*buf == '\t') { - *buf++ = 0; - fb[n++] = buf; - continue; - } - buf++; - } - if (*buf == 10 && buf[-1] == 13) - buf[-1] = 0; - *buf = 0; - fb[n] = nullptr; - return n; -} - -int sf(char** fb, char* buf, size_t fb_sz) { - if (!(*buf && *buf != 10)) { - *fb = nullptr; - return 0; - } - fb_sz--; - int n = 1; - fb[0] = buf; - while (*buf && *buf != 10 && n < (int)fb_sz) { - if (*buf == '\t') { - *buf++ = 0; - fb[n++] = buf; - continue; - } - buf++; - } - if (*buf == 10 && buf[-1] == 13) - buf[-1] = 0; - *buf = 0; - fb[n] = nullptr; - return n; -} - -inline int sf_blank(char** fb, char* buf, size_t fb_sz) { - while (isspace((ui8)*buf)) - buf++; - if (!*buf) { - *fb = nullptr; - return 0; - } - fb_sz--; - int n = 1; - fb[0] = buf; - while (*buf && *buf != 10 && n < (int)fb_sz) { - if (isspace((ui8)*buf)) { - *buf++ = 0; - while (isspace((ui8)*buf)) - buf++; - if (*buf) - fb[n++] = buf; - continue; - } - buf++; - } - if (*buf == 10 && buf[-1] == 13) - buf[-1] = 0; - *buf = 0; - fb[n] = nullptr; - return n; -} - -int sf(char fs, char** fb, char* buf, size_t fb_sz) { - if (fs == ' ') - return sf_blank(fb, buf, fb_sz); - while (*buf == fs) - buf++; - if (!(*buf && *buf != 10)) { - *fb = nullptr; - return 0; - } - fb_sz--; - int n = 1; - fb[0] = buf; - while (*buf && *buf != 10 && n < (int)fb_sz) { - if (*buf == fs) { - *buf++ = 0; - while (*buf == fs) - buf++; - fb[n++] = buf; - continue; - } - buf++; - } - if (*buf == 10 && buf[-1] == 13) - buf[-1] = 0; - *buf = 0; - fb[n] = nullptr; - return n; -} - -int sf(const char* fs, char** fb, char* buf, size_t fb_sz) { - if (!(*buf && *buf != 10)) { - *fb = nullptr; - return 0; - } - int fs_len = strlen(fs); - fb_sz--; - int n = 1; - fb[0] = buf; - while (*buf && *buf != 10 && n < (int)fb_sz) { - if (*buf == *fs && !strncmp(buf + 1, fs + 1, fs_len - 1)) { - *buf = 0; - buf += fs_len; - fb[n++] = buf; - continue; - } - buf++; - } - if (*buf == 10 && buf[-1] == 13) - buf[-1] = 0; - *buf = 0; - fb[n] = nullptr; - return n; -} - -inline bool is_end(const char* p) { - return !p || !p[0]; -} - -int sf(const char* seps, char* buf, char** fb, size_t fb_sz) { - if (fb_sz < 1 || is_end(buf)) { - *fb = nullptr; - return 0; - } - str_spn sseps(seps); - fb[0] = nullptr; - int n = 0; - // skip leading delimeters - buf = sseps.cbrk(buf); - if (is_end(buf)) - return 0; - // store fields - while (n < (int)fb_sz) { - fb[n++] = buf; - // find delimeters - buf = sseps.brk(buf + 1); - if (is_end(buf)) - break; - *buf = 0; - // skip delimiters - buf = sseps.cbrk(buf + 1); - if (is_end(buf)) - break; - } - fb[n] = nullptr; - return n; -} - -void TLineSplitter::operator()(char* p, TVector<char*>& fields) const { - if (!p || !*p) - return; - char* q = p; - while (1) { - p = Sep.brk(p); - if (q && (p - q || !SkipEmpty())) - fields.push_back(q); - q = nullptr; - if (!*p) - break; - if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) { - *p = 0; - p += SepStrLen; - q = p; - } else - p++; - } -} - -void TLineSplitter::operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const { - if (!p || !*p) - return; - const char* q = p; - while (1) { - p = Sep.brk(p); - if (q && (p - q || !SkipEmpty())) - fields.push_back(std::make_pair(q, p - q)); - q = nullptr; - if (!*p) - break; - if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) { - p += SepStrLen; - q = p; - } else - p++; - } -} - -TSFReader::TSFReader(const char* fname, char sep, i32 nfrq) // if sep == ' ' isspace will be imitated (for compat) - : Split(str_spn(sep == ' ' ? "\t\n\v\f\r " : TString(1, sep).data()), sep == ' ') - , OpenPipe(false) -{ - Open(fname, nfrq); -} - -TSFReader::TSFReader(const char* fname, const char* sep, i32 nfrq) - : Split(sep, false) - , OpenPipe(false) -{ - Open(fname, nfrq); -} - -TSFReader::TSFReader(const char* fname, const TLineSplitter& spl, i32 nfrq) - : Split(spl) - , OpenPipe(false) -{ - Open(fname, nfrq); -} - -void TSFReader::Open(const char* fname, i32 nfrq, size_t vbuf_size) { - FieldsRequired = nfrq; - NF = NR = 0; - - if (IsOpen()) - File.close(); - - if (!fname) - return; - - if (!strcmp(fname, "/dev/stdin")) { - File.assign(stdin, "/dev/stdin"); - } else { - if (OpenPipe) - File.popen(fname, "r"); - else - File.open(fname, "r"); - } - OpenPipe = false; - if (!isatty(fileno(File))) - setvbuf(File, nullptr, _IOFBF, vbuf_size); -} - -void TSFReader::Popen(const char* pname, i32 nfrq, size_t vbuf_size) { - OpenPipe = true; - Open(pname, nfrq, vbuf_size); -} - -bool TSFReader::NextLine(segmented_string_pool* pool) { - size_t line_len = 0; - -#ifdef __FreeBSD__ - char* ptr = fgetln(File, &line_len); - if (!ptr) - return false; - if (!line_len || ptr[line_len - 1] != '\n') { // last line w/o newline - Buf.AssignNoAlias(ptr, line_len); - ptr = Buf.begin(); - } else { - // can safely replace newline with \0 - ptr[line_len - 1] = 0; - --line_len; - } -#else - if (!getline(File, Buf)) - return false; - char* ptr = Buf.begin(); - line_len = Buf.size(); -#endif - if (line_len && ptr[line_len - 1] == '\r') - ptr[line_len - 1] = 0; - - if (pool) { - char* nptr = pool->append(ptr); - Y_ASSERT(!strcmp(ptr, nptr)); - ptr = nptr; - } - - ++NR; - Fields.clear(); - Split(ptr, Fields); - NF = Fields.size(); - - if (FieldsRequired != -1 && FieldsRequired != (int)NF) - ythrow yexception() << File.name() << " line " << NR << ": " << NF << " fields, expected " << FieldsRequired; - - return true; -} - -int prnstr::f(const char* c, ...) { - va_list params; - int n = asize - pos, k; - va_start(params, c); - while ((k = vsnprintf(buf + pos, n, c, params)) >= n) { - n += asize, asize *= 2; - while (k + pos >= n) - n += asize, asize *= 2; - char* t = new char[asize]; - memcpy(t, buf, pos); - delete[] buf; - buf = t; - va_end(params); - va_start(params, c); - } - pos += k; - va_end(params); - return k; -} -int prnstr::s(const char* c, size_t k) { - if (!c) - return 0; - size_t n = asize - pos; - if (k >= n) { - n += asize, asize *= 2; - while (k + pos >= n) - n += asize, asize *= 2; - char* t = new char[asize]; - memcpy(t, buf, pos); - delete[] buf; - buf = t; - } - memcpy(buf + pos, c, k); - pos += k; - buf[pos] = 0; - return k; -} -void prnstr::clear() { - pos = 0; - if (asize > 32768) { - asize = 32768; - delete[] buf; - buf = new char[asize]; - } -} - -void prnstr::swap(prnstr& w) { - std::swap(buf, w.buf); - std::swap(pos, w.pos); - std::swap(asize, w.asize); -} - -FILE* read_or_die(const char* fname) { - FILE* f = fopen(fname, "rb"); - if (!f) - err(1, "%s", fname); - return f; -} -FILE* write_or_die(const char* fname) { - FILE* f = fopen(fname, "wb"); - if (!f) - err(1, "%s", fname); - return f; -} -FILE* fopen_or_die(const char* fname, const char* mode) { - FILE* f = fopen(fname, mode); - if (!f) - err(1, "%s (mode '%s')", fname, mode); - return f; -} - -FILE* fopen_chk(const char* fname, const char* mode) { - FILE* f = fopen(fname, mode); - if (!f) - ythrow yexception() << fname << " (mode '" << mode << "'): " << LastSystemErrorText(); - return f; -} - -void fclose_chk(FILE* f, const char* fname) { - if (fclose(f)) - ythrow yexception() << "file " << fname << ": " << LastSystemErrorText(); -} diff --git a/library/cpp/deprecated/fgood/ffb.h b/library/cpp/deprecated/fgood/ffb.h deleted file mode 100644 index ca229eb65a..0000000000 --- a/library/cpp/deprecated/fgood/ffb.h +++ /dev/null @@ -1,264 +0,0 @@ -#pragma once - -#include "fgood.h" - -#include <util/string/util.h> // str_spn -#include <util/string/split.h> // str_spn -#include <util/memory/segmented_string_pool.h> -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/generic/noncopyable.h> - -#include <utility> - -#include <cstdarg> -#include <cstring> - -struct ffb: public TFILEPtr { - ffb() { - } - ffb(FILE* file); - ffb(const char* name, const char* mode) { - open(name, mode); - } - void operator=(FILE* f); // take ownership - void open(const char* name, const char* mode); - int f(const char* c, ...) { - va_list args; - va_start(args, c); - return vfprintf(*this, c, args); - } - void s(const char* c) { - fsput(c, strlen(c)); - } - void b(const void* cc, int n) { - fsput((const char*)cc, n); - } - void B(const void* cc, int N) { - fsput((const char*)cc, N); - } - void c(char c) { - fputc(c); - } - void cbe(wchar16 c) { // big endian utf-16 - fputc(char(c >> 8)); //Hi8 - fputc(char(c & 255)); //Lo8 - } - void sbe(const wchar16* c) { - for (; *c; c++) - cbe(*c); - } - void fclose() { - close(); - } -}; - -// split fields of tab-delimited line of text -// here and below fb actual size must be fb_sz + 1 to allow fb[fb_sz] be zero -int sf(char** fb, char* buf, size_t fb_sz); -int sf(char** fb, char* buf /* fb_sz == 32 */); - -// split fields of char-delimited line of text -// Achtung: delim = ' ' imitates awk: initial separators are skipped, -// repeated seps treated as one, all chars less than ' ' treated as separators. -int sf(char fs, char** fb, char* buf, size_t fb_sz = 32); - -// split fields of string-delimited line of text (fs is NOT a regexp) -// (usually fs is "@@") -int sf(const char* fs, char** fb, char* buf, size_t fb_sz = 32); - -// split fields of char-delimited line of text, set of char-separators is given -// Achtung: repeated seps treated as one, initial seps are skipped -// newlines are NOT ignored. -int sf(const char* seps, char* buf, char** fb, size_t fb_sz = 32); - -inline char* chomp(char* buf) { - char* c = buf + strlen(buf); - if (c > buf && c[-1] == '\n') { - *--c = 0; -#ifdef _win32_ - if (c > buf && c[-1] == '\r') - *--c = 0; -#endif - } - return buf; -} - -inline char* chomp_cr(char* buf) { - char* c = buf + strlen(buf); - if (c > buf && c[-1] == '\n') - *--c = 0; - if (c > buf && c[-1] == '\r') - *--c = 0; - return buf; -} - -class TLineSplitter { -protected: - enum { // Default: Split string by SepStr - SplitByAnySep = 1, // Split string by Sep - NoEmptyFields = 2 // Skip all empty fields between separators - }; - -private: - ui32 Flags; - const str_spn Sep; // collection of separators - const char* SepStr; // pointer exact string to separate by - size_t SepStrLen; // length of separator string - -public: - TLineSplitter(const char* sep, bool noEmpty) - : Flags(noEmpty ? NoEmptyFields : 0) - , Sep(TString(sep, 1).data()) - , SepStr(sep) - , SepStrLen(strlen(sep)) - { - } - TLineSplitter(const str_spn& sep, bool noEmpty = false) - : Flags(SplitByAnySep | (noEmpty ? NoEmptyFields : 0)) - , Sep(sep) - , SepStr(nullptr) - , SepStrLen(1) - { - } - bool AnySep() const { - return Flags & SplitByAnySep; - } - bool SkipEmpty() const { - return Flags & NoEmptyFields; - } - /// Separates string onto tokens - /// Expecting a zero-terminated string - /// By default returns empty fields between sequential separators - void operator()(char* p, TVector<char*>& fields) const; - /// Same, but for const string - fills vector of pairs (pointer, length) - void operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const; -}; - -/** - * Use library/cpp/map_text_file/map_tsv_file.h instead. - */ -class TSFReader { - TString Buf; // buffer used for non-'\n'-terminated string and for non-freebsd work - TLineSplitter Split; - TVector<char*> Fields; - size_t NF; // Fields.size() - size_t NR; - - TFILEPtr File; - - bool OpenPipe; // internal flag that turns open() to popen() - - i32 FieldsRequired; // if != -1, != nf, terminate program - -public: - // char separator - // Achtung: delim = ' ' imitates awk: initial separators are skipped, - // all chars less than ' ' treated as separators. - TSFReader(const char* fname = nullptr, char sep = '\t', i32 nf_reqired = -1); - // exact string separator - TSFReader(const char* fname, const char* sep, i32 nf_reqired = -1); - // fully customizable - TSFReader(const char* fname, const TLineSplitter& spl, i32 nf_reqired = -1); - - void Open(const char* fname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); // use "/dev/stdin" for stdin - void Popen(const char* pname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); - - bool NextLine(segmented_string_pool* pool = nullptr); - - bool IsOpen() const { - return (FILE*)File != nullptr; - } - bool IsEof() const { - return feof(File); - } - void Close() { - File.close(); - } - void Rewind() { - File.seek(0, SEEK_SET); - } - void Seek(i64 offset, int mode = SEEK_SET) { - File.seek(offset, mode); - } - i64 Tell() const { - return ftell(File); - } - char*& operator[](size_t ind) { - //if (ind >= NF) - // throw yexception("Can't return reference to unexisting field %" PRISZT, ind); - return Fields[ind]; - } - const char* operator[](size_t ind) const { - if (ind >= NF) - return nullptr; - return Fields[ind]; - } - operator int() const { // note: empty input line makes 0 fields - return (int)NF; - } - const char* Name() const { - return File.name().data(); - } - size_t Line() const { - return NR; - } - const TVector<char*>& GetFields() const { - return Fields; - } -}; - -struct prnstr { - char* buf; - int pos; - int asize; - prnstr() - : pos(0) - { - asize = 32; - buf = new char[asize]; - } - explicit prnstr(int asz) - : pos(0) - { - asize = asz; - buf = new char[asize]; - } - int f(const char* c, ...); - int s(const char* c1, const char* c2); - int s(const char* c1, const char* c2, const char* c3); - int s(const char* c, size_t len); - //int s(const char *c); - int s(const char* c) { - return c ? s(c, strlen(c)) : 0; - } - int s(const TString& c); - int s_htmesc(const char* c, bool enc_utf = false); - int s_htmesc_w(const char* c); - int c(char c); - int cu(wchar32 c); //for utf-8 - void restart() { - *buf = 0; - pos = 0; - } - const char* operator~() const { - return buf; - } - int operator+() const { - return pos; - } - ~prnstr() { - delete[] buf; - } - void clear(); - void swap(prnstr& w); -}; - -// functions that terminate program upon failure -FILE* read_or_die(const char* fname); -FILE* write_or_die(const char* fname); -FILE* fopen_or_die(const char* fname, const char* mode); - -// functions that throw upon failure -FILE* fopen_chk(const char* fname, const char* mode); -void fclose_chk(FILE* f, const char* fname_dbg); diff --git a/library/cpp/deprecated/fgood/fgood.cpp b/library/cpp/deprecated/fgood/fgood.cpp deleted file mode 100644 index 5d4725bfae..0000000000 --- a/library/cpp/deprecated/fgood/fgood.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "fgood.h" - -#include <util/generic/cast.h> -#include <util/string/cast.h> -#include <util/system/fstat.h> - -#ifdef _win32_ -#include <io.h> -#endif - -i64 TFILEPtr::length() const { -#ifdef _win32_ - FHANDLE fd = (FHANDLE)_get_osfhandle(fileno(m_file)); -#else - FHANDLE fd = fileno(m_file); -#endif - i64 rv = GetFileLength(fd); - if (rv < 0) - ythrow yexception() << "TFILEPtr::length() " << Name.data() << ": " << LastSystemErrorText(); - return rv; -} - -FILE* OpenFILEOrFail(const TString& name, const char* mode) { - FILE* res = ::fopen(name.data(), mode); - if (!res) { - ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); - } - return res; -} - -void TFILECloser::Destroy(FILE* file) { - ::fclose(file); -} - -#ifdef _freebsd_ // fgetln -#define getline getline_alt_4test -#endif // _freebsd_ - -bool getline(TFILEPtr& f, TString& s) { - char buf[4096]; - char* buf_ptr; - if (s.capacity() > sizeof(buf)) { - s.resize(s.capacity()); - if ((buf_ptr = fgets(s.begin(), IntegerCast<int>(s.capacity()), f)) == nullptr) - return false; - } else { - if ((buf_ptr = fgets(buf, sizeof(buf), f)) == nullptr) - return false; - } - size_t buf_len = strlen(buf_ptr); - bool line_complete = buf_len && buf_ptr[buf_len - 1] == '\n'; - if (line_complete) - buf_len--; - if (buf_ptr == s.begin()) - s.resize(buf_len); - else - s.AssignNoAlias(buf, buf_len); - if (line_complete) - return true; - while (fgets(buf, sizeof(buf), f)) { - size_t buf_len2 = strlen(buf); - if (buf_len2 && buf[buf_len2 - 1] == '\n') { - buf[buf_len2 - 1] = 0; - s.append(buf, buf_len2 - 1); - return true; - } - s.append(buf, buf_len2); - } - return true; -} diff --git a/library/cpp/deprecated/fgood/fgood.h b/library/cpp/deprecated/fgood/fgood.h deleted file mode 100644 index 0aaf910c0f..0000000000 --- a/library/cpp/deprecated/fgood/fgood.h +++ /dev/null @@ -1,328 +0,0 @@ -#pragma once - -#include <util/system/yassert.h> -#include <util/system/defaults.h> -#include <util/generic/string.h> -#include <util/generic/yexception.h> -#include <util/generic/ptr.h> - -#include "fput.h" - -#include <cstdio> - -#include <fcntl.h> - -#ifdef _unix_ -extern "C" int __ungetc(int, FILE*); -#endif - -#if (!defined(__FreeBSD__) && !defined(__linux__) && !defined(_darwin_) && !defined(_cygwin_)) || defined(_bionic_) -#define feof_unlocked(_stream) feof(_stream) -#define ferror_unlocked(_stream) ferror(_stream) -#endif - -#ifndef _unix_ -#if defined(_MSC_VER) && (_MSC_VER < 1900) -#define getc_unlocked(_stream) (--(_stream)->_cnt >= 0 ? 0xff & *(_stream)->_ptr++ : _filbuf(_stream)) -#define putc_unlocked(_c, _stream) (--(_stream)->_cnt >= 0 ? 0xff & (*(_stream)->_ptr++ = (char)(_c)) : _flsbuf((_c), (_stream))) -#else -#define getc_unlocked(_stream) getc(_stream) -#define putc_unlocked(_c, _stream) putc(_c, _stream) -#endif -#endif - -inline bool fgood(FILE* f) { - return !feof_unlocked(f) && !ferror_unlocked(f); -} - -#ifdef _win32_ -// These functions will work only with static MSVC runtime linkage. For dynamic linkage, -// fseeki64.c and ftelli64.c from CRT sources should be included in project -extern "C" int __cdecl _fseeki64(FILE*, __int64, int); -extern "C" __int64 __cdecl _ftelli64(FILE*); - -inline i64 ftello(FILE* stream) { - return _ftelli64(stream); -} - -inline int fseeko(FILE* stream, i64 offset, int origin) { - return _fseeki64(stream, offset, origin); -} -#endif - -class TFILEPtr { -private: - enum { SHOULD_CLOSE = 1, - IS_PIPE = 2 }; - FILE* m_file; - int m_Flags; - TString Name; - -public: - TFILEPtr() noexcept { - m_file = nullptr; - m_Flags = 0; - } - TFILEPtr(const TString& name, const char* mode) { - m_file = nullptr; - m_Flags = 0; - open(name, mode); - } - TFILEPtr(const TFILEPtr& src) noexcept { - m_file = src.m_file; - m_Flags = 0; - } - TFILEPtr& operator=(const TFILEPtr& src) { - if (src.m_file != m_file) { - close(); - m_file = src.m_file; - m_Flags = 0; - } - return *this; - } - explicit TFILEPtr(FILE* f) noexcept { // take ownership - m_file = f; - m_Flags = SHOULD_CLOSE; - } - TFILEPtr& operator=(FILE* f) { // take ownership - if (f != m_file) { - close(); - m_file = f; - m_Flags = SHOULD_CLOSE; - } - return *this; - } - const TString& name() const { - return Name; - } - operator FILE*() const noexcept { - return m_file; - } - FILE* operator->() const noexcept { - return m_file; - } - bool operator!() const noexcept { - return m_file == nullptr; - } - bool operator!=(FILE* f) const noexcept { - return m_file != f; - } - bool operator==(FILE* f) const noexcept { - return m_file == f; - } - ~TFILEPtr() { - close(); - } - void Y_PRINTF_FORMAT(2, 3) check(const char* message, ...) const { - if (Y_UNLIKELY(!fgood(m_file))) { - va_list args; - va_start(args, message); - char buf[512]; - vsnprintf(buf, 512, message, args); - // XXX: errno is undefined here - ythrow yexception() << buf << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); - } - } - TFILEPtr& assign(FILE* f, const char* name = nullptr) { // take ownership and have a name - *this = f; - if (name) - Name = name; - return *this; - } - void open(const TString& name, const char* mode) { - Y_ASSERT(!name.empty()); - Y_ASSERT(m_file == nullptr); - m_file = ::fopen(name.data(), mode); - if (!m_file) - ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); - m_Flags = SHOULD_CLOSE; - Name = name; - } - void popen(const TString& command, const char* mode) { - Y_ASSERT(!command.empty()); - Y_ASSERT(m_file == nullptr); - m_file = ::popen(command.data(), mode); - if (!m_file) - ythrow yexception() << "can't execute \'" << command << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); - m_Flags = IS_PIPE | SHOULD_CLOSE; - Name = command; - } - void close() { - if (m_file != nullptr && (m_Flags & SHOULD_CLOSE)) { - if ((m_Flags & IS_PIPE) ? ::pclose(m_file) : ::fclose(m_file)) { - m_file = nullptr; - m_Flags = 0; - if (!UncaughtException()) - ythrow yexception() << "can't close file " << Name.data() << ": " << LastSystemErrorText(); - } - } - m_file = nullptr; - m_Flags = 0; - Name.clear(); - } - size_t write(const void* buffer, size_t size, size_t count) const { - Y_ASSERT(m_file != nullptr); - size_t r = ::fwrite(buffer, size, count, m_file); - check("can't write %lu bytes", (unsigned long)size * count); - return r; - } - size_t read(void* buffer, size_t size, size_t count) const { - Y_ASSERT(m_file != nullptr); - size_t r = ::fread(buffer, size, count, m_file); - if (ferror_unlocked(m_file)) - ythrow yexception() << "can't read " << (unsigned long)size * count << " bytes: " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); - return r; - } - char* fgets(char* buffer, int size) const { - Y_ASSERT(m_file != nullptr); - char* r = ::fgets(buffer, size, m_file); - if (ferror_unlocked(m_file)) - ythrow yexception() << "can't read string of maximum size " << size << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); - return r; - } - void Y_PRINTF_FORMAT(2, 3) fprintf(const char* format, ...) { - Y_ASSERT(m_file != nullptr); - va_list args; - va_start(args, format); - vfprintf(m_file, format, args); - check("can't write"); - } - void seek(i64 offset, int origin) const { - Y_ASSERT(m_file != nullptr); -#if defined(_unix_) || defined(_win32_) - if (fseeko(m_file, offset, origin) != 0) -#else - Y_ASSERT(offset == (i64)(i32)offset); - if (::fseek(m_file, (long)offset, origin) != 0) -#endif - ythrow yexception() << "can't seek " << Name.data() << " by " << offset << ": " << LastSystemErrorText(); - } - i64 length() const; // uses various system headers -> in fileptr.cpp - - void setDirect() const { -#if !defined(_win_) && !defined(_darwin_) - if (!m_file) - ythrow yexception() << "file not open"; - if (fcntl(fileno(m_file), F_SETFL, O_DIRECT) == -1) - ythrow yexception() << "Cannot set O_DIRECT flag"; -#endif - } - - // for convenience - - i64 ftell() const noexcept { -#if defined(_unix_) || defined(_win32_) - return ftello(m_file); -#else - return ftell(m_file); -#endif - } - bool eof() const noexcept { - Y_ASSERT(m_file != nullptr); - return feof_unlocked(m_file) != 0; - } - int fputc(int c) { - Y_ASSERT(m_file != nullptr); - return putc_unlocked(c, m_file); - } - size_t fputs(const char* buffer) const { - return write(buffer, strlen(buffer), 1); - } - int fgetc() { - Y_ASSERT(m_file != nullptr); - return getc_unlocked(m_file); - } - int ungetc(int c) { - Y_ASSERT(m_file != nullptr); - return ::ungetc(c, m_file); - } - template <class T> - size_t fput(const T& a) { - Y_ASSERT(m_file != nullptr); - return ::fput(m_file, a); - } - template <class T> - size_t fget(T& a) { - Y_ASSERT(m_file != nullptr); - return ::fget(m_file, a); - } - size_t fsput(const char* s, size_t l) { - Y_ASSERT(m_file != nullptr); - return ::fsput(m_file, s, l); - } - size_t fsget(char* s, size_t l) { - Y_ASSERT(m_file != nullptr); - return ::fsget(m_file, s, l); - } - - void fflush() { - ::fflush(m_file); - } - - /* This block contains some TFile/TStream - compatible names */ - size_t Read(void* bufferIn, size_t numBytes) { - size_t r = fsget((char*)bufferIn, numBytes); - if (Y_UNLIKELY(ferror_unlocked(m_file))) - ythrow yexception() << "can't read " << numBytes << " bytes: " << LastSystemErrorText() << ", " << Name << " at offset " << (i64)ftell(); - return r; - } - void Write(const void* buffer, size_t numBytes) { - write(buffer, 1, numBytes); - } - i64 Seek(i64 offset, int origin /*SeekDir*/) { - seek(offset, origin); - return ftell(); - } - i64 GetPosition() const noexcept { - return ftell(); - } - i64 GetLength() const noexcept { - return length(); - } - bool ReadLine(TString& st); - - /* Similar to TAutoPtr::Release - return pointer and forget about it. */ - FILE* Release() noexcept { - FILE* result = m_file; - m_file = nullptr; - m_Flags = 0; - Name.clear(); - return result; - } -}; - -inline void fclose(TFILEPtr& F) { - F.close(); -} - -inline void fseek(const TFILEPtr& F, i64 offset, int whence) { - F.seek(offset, whence); -} - -#ifdef _freebsd_ // fgetln -inline bool getline(TFILEPtr& f, TString& s) { - size_t len; - char* buf = fgetln(f, &len); - if (!buf) - return false; - if (len && buf[len - 1] == '\n') - len--; - s.AssignNoAlias(buf, len); - return true; -} -#else -bool getline(TFILEPtr& f, TString& s); -#endif //_freebsd_ - -inline bool TFILEPtr::ReadLine(TString& st) { - return getline(*this, st); -} - -FILE* OpenFILEOrFail(const TString& name, const char* mode); - -//Should be used with THolder -struct TFILECloser { - static void Destroy(FILE* file); -}; - -using TFILEHolder = THolder<FILE, TFILECloser>; diff --git a/library/cpp/deprecated/fgood/fput.h b/library/cpp/deprecated/fgood/fput.h deleted file mode 100644 index 690b06332d..0000000000 --- a/library/cpp/deprecated/fgood/fput.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once - -#include <util/system/defaults.h> -#include <util/system/valgrind.h> - -#include <cstdio> - -#ifdef __FreeBSD__ -#include <cstring> - -template <class T> -Y_FORCE_INLINE size_t fput(FILE* F, const T& a) { - if (Y_LIKELY(F->_w >= int(sizeof(a)))) { - memcpy(F->_p, &a, sizeof(a)); - F->_p += sizeof(a); - F->_w -= sizeof(a); - return 1; - } else { - return fwrite(&a, sizeof(a), 1, F); - } -} - -template <class T> -Y_FORCE_INLINE size_t fget(FILE* F, T& a) { - if (Y_LIKELY(F->_r >= int(sizeof(a)))) { - memcpy(&a, F->_p, sizeof(a)); - F->_p += sizeof(a); - F->_r -= sizeof(a); - return 1; - } else { - return fread(&a, sizeof(a), 1, F); - } -} - -inline size_t fsput(FILE* F, const char* s, size_t l) { - VALGRIND_CHECK_READABLE(s, l); - - if ((size_t)F->_w >= l) { - memcpy(F->_p, s, l); - F->_p += l; - F->_w -= l; - return l; - } else { - return fwrite(s, 1, l, F); - } -} - -inline size_t fsget(FILE* F, char* s, size_t l) { - if ((size_t)F->_r >= l) { - memcpy(s, F->_p, l); - F->_p += l; - F->_r -= l; - return l; - } else { - return fread(s, 1, l, F); - } -} -#else -template <class T> -Y_FORCE_INLINE size_t fput(FILE* F, const T& a) { - return fwrite(&a, sizeof(a), 1, F); -} - -template <class T> -Y_FORCE_INLINE size_t fget(FILE* F, T& a) { - return fread(&a, sizeof(a), 1, F); -} - -inline size_t fsput(FILE* F, const char* s, size_t l) { -#ifdef WITH_VALGRIND - VALGRIND_CHECK_READABLE(s, l); -#endif - return fwrite(s, 1, l, F); -} - -inline size_t fsget(FILE* F, char* s, size_t l) { - return fread(s, 1, l, F); -} -#endif diff --git a/library/cpp/deprecated/fgood/ya.make b/library/cpp/deprecated/fgood/ya.make deleted file mode 100644 index 2394f9ad7a..0000000000 --- a/library/cpp/deprecated/fgood/ya.make +++ /dev/null @@ -1,8 +0,0 @@ -LIBRARY() - -SRCS( - ffb.cpp - fgood.cpp -) - -END() diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index a00407491d..0000000000 --- a/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-mapped_file) -target_link_libraries(cpp-deprecated-mapped_file PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-mapped_file PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp -) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 2bb5db017b..0000000000 --- a/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-mapped_file) -target_link_libraries(cpp-deprecated-mapped_file PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-mapped_file PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp -) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 2bb5db017b..0000000000 --- a/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-mapped_file) -target_link_libraries(cpp-deprecated-mapped_file PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-mapped_file PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp -) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.txt b/library/cpp/deprecated/mapped_file/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/deprecated/mapped_file/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt deleted file mode 100644 index a00407491d..0000000000 --- a/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-deprecated-mapped_file) -target_link_libraries(cpp-deprecated-mapped_file PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-deprecated-mapped_file PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp -) diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp deleted file mode 100644 index b0e4511299..0000000000 --- a/library/cpp/deprecated/mapped_file/mapped_file.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "mapped_file.h" - -#include <util/generic/yexception.h> -#include <util/system/defaults.h> -#include <util/system/hi_lo.h> -#include <util/system/filemap.h> - -TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) { - Map_ = map; - i64 len = Map_->Length(); - if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32)) - ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large"; - - Map_->Map(0, static_cast<size_t>(len)); -} - -TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) - : Map_(nullptr) -{ - init(file, om, dbgName); -} - -void TMappedFile::precharge(size_t off, size_t size) const { - if (!Map_) - return; - - Map_->Precharge(off, size); -} - -void TMappedFile::init(const TString& name) { - THolder<TFileMap> map(new TFileMap(name)); - TMappedFile newFile(map.Get(), name.data()); - Y_UNUSED(map.Release()); - newFile.swap(*this); - newFile.term(); -} - -void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) { - THolder<TFileMap> map(new TFileMap(name, length, om)); - TMappedFile newFile(map.Get(), name.data()); - Y_UNUSED(map.Release()); - newFile.swap(*this); - newFile.term(); -} - -void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) { - THolder<TFileMap> map(new TFileMap(file, om)); - TMappedFile newFile(map.Get(), dbgName); - Y_UNUSED(map.Release()); - newFile.swap(*this); - newFile.term(); -} - -void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { - THolder<TFileMap> map(new TFileMap(name, om)); - TMappedFile newFile(map.Get(), name.data()); - Y_UNUSED(map.Release()); - newFile.swap(*this); - newFile.term(); -} - -void TMappedFile::flush() { - Map_->Flush(); -} diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make deleted file mode 100644 index 309341f1da..0000000000 --- a/library/cpp/deprecated/mapped_file/ya.make +++ /dev/null @@ -1,7 +0,0 @@ -LIBRARY() - -SRCS( - mapped_file.cpp -) - -END() diff --git a/library/cpp/geo/CMakeLists.darwin-x86_64.txt b/library/cpp/geo/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 87e48b4a71..0000000000 --- a/library/cpp/geo/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,24 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-geo) -target_link_libraries(library-cpp-geo PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(library-cpp-geo PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp -) diff --git a/library/cpp/geo/CMakeLists.linux-aarch64.txt b/library/cpp/geo/CMakeLists.linux-aarch64.txt deleted file mode 100644 index cdad35989a..0000000000 --- a/library/cpp/geo/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,25 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-geo) -target_link_libraries(library-cpp-geo PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(library-cpp-geo PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp -) diff --git a/library/cpp/geo/CMakeLists.linux-x86_64.txt b/library/cpp/geo/CMakeLists.linux-x86_64.txt deleted file mode 100644 index cdad35989a..0000000000 --- a/library/cpp/geo/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,25 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-geo) -target_link_libraries(library-cpp-geo PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(library-cpp-geo PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp -) diff --git a/library/cpp/geo/CMakeLists.txt b/library/cpp/geo/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/geo/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/geo/CMakeLists.windows-x86_64.txt b/library/cpp/geo/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 87e48b4a71..0000000000 --- a/library/cpp/geo/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,24 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-geo) -target_link_libraries(library-cpp-geo PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(library-cpp-geo PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp -) diff --git a/library/cpp/geo/bbox.cpp b/library/cpp/geo/bbox.cpp deleted file mode 100644 index aa4258ac22..0000000000 --- a/library/cpp/geo/bbox.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "bbox.h" diff --git a/library/cpp/geo/bbox.h b/library/cpp/geo/bbox.h deleted file mode 100644 index 7ec7e6f7d6..0000000000 --- a/library/cpp/geo/bbox.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#include <util/generic/utility.h> - -#include "point.h" - -namespace NGeo { - - class TGeoBoundingBox { - public: - TGeoBoundingBox() - - = default; - - TGeoBoundingBox(const TGeoPoint& p1, const TGeoPoint& p2) { - MinX_ = Min(p1.Lon(), p2.Lon()); - MaxX_ = Max(p1.Lon(), p2.Lon()); - MinY_ = Min(p1.Lat(), p2.Lat()); - MaxY_ = Max(p1.Lat(), p2.Lat()); - } - - const double& GetMinX() const { - return MinX_; - } - - const double& GetMaxX() const { - return MaxX_; - } - - const double& GetMinY() const { - return MinY_; - } - - const double& GetMaxY() const { - return MaxY_; - } - - double Width() const { - return MaxX_ - MinX_; - } - - double Height() const { - return MaxY_ - MinY_; - } - - private: - double MinX_{std::numeric_limits<double>::quiet_NaN()}; - double MaxX_{std::numeric_limits<double>::quiet_NaN()}; - double MinY_{std::numeric_limits<double>::quiet_NaN()}; - double MaxY_{std::numeric_limits<double>::quiet_NaN()}; - }; - - inline bool operator==(const TGeoBoundingBox& a, const TGeoBoundingBox& b) { - return a.GetMinX() == b.GetMinX() && - a.GetMinY() == b.GetMinY() && - a.GetMaxX() == b.GetMaxX() && - a.GetMaxY() == b.GetMaxY(); - } -} // namespace NGeo diff --git a/library/cpp/geo/geo.cpp b/library/cpp/geo/geo.cpp deleted file mode 100644 index 37adc5c62c..0000000000 --- a/library/cpp/geo/geo.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "geo.h" diff --git a/library/cpp/geo/geo.h b/library/cpp/geo/geo.h deleted file mode 100644 index 1aebacab5c..0000000000 --- a/library/cpp/geo/geo.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include "bbox.h" -#include "point.h" -#include "polygon.h" -#include "size.h" -#include "util.h" -#include "window.h" diff --git a/library/cpp/geo/load_save_helper.cpp b/library/cpp/geo/load_save_helper.cpp deleted file mode 100644 index 13fa7ac6df..0000000000 --- a/library/cpp/geo/load_save_helper.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "load_save_helper.h" -#include <util/stream/input.h> - -void TSerializer<NGeo::TGeoPoint>::Save(IOutputStream* out, const NGeo::TGeoPoint& point) { - double lon = static_cast<double>(point.Lon()); - double lat = static_cast<double>(point.Lat()); - ::Save(out, lon); - ::Save(out, lat); -} - -void TSerializer<NGeo::TGeoPoint>::Load(IInputStream* in, NGeo::TGeoPoint& point) { - double lon = std::numeric_limits<double>::quiet_NaN(); - double lat = std::numeric_limits<double>::quiet_NaN(); - ::Load(in, lon); - ::Load(in, lat); - point = {lon, lat}; -} - -void TSerializer<NGeo::TGeoWindow>::Save(IOutputStream* out, const NGeo::TGeoWindow& window) { - const auto& center = window.GetCenter(); - const auto& size = window.GetSize(); - ::Save(out, center); - ::Save(out, size); -} - -void TSerializer<NGeo::TGeoWindow>::Load(IInputStream* in, NGeo::TGeoWindow& window) { - NGeo::TSize size{}; - NGeo::TGeoPoint center{}; - - ::Load(in, center); - ::Load(in, size); - - window = {center, size}; -} - -void TSerializer<NGeo::TSize>::Save(IOutputStream* out, const NGeo::TSize& size) { - double width = static_cast<double>(size.GetWidth()); - double height = static_cast<double>(size.GetHeight()); - ::Save(out, width); - ::Save(out, height); -} - -void TSerializer<NGeo::TSize>::Load(IInputStream* in, NGeo::TSize& size) { - double width = std::numeric_limits<double>::quiet_NaN(); - double height = std::numeric_limits<double>::quiet_NaN(); - ::Load(in, width); - ::Load(in, height); - size = {width, height}; -} diff --git a/library/cpp/geo/load_save_helper.h b/library/cpp/geo/load_save_helper.h deleted file mode 100644 index 4a5fceea18..0000000000 --- a/library/cpp/geo/load_save_helper.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include <library/cpp/geo/window.h> -#include <util/stream/input.h> -#include <util/ysaveload.h> - -template <> -struct TSerializer<NGeo::TGeoPoint> { - static void Save(IOutputStream*, const NGeo::TGeoPoint&); - static void Load(IInputStream*, NGeo::TGeoPoint&); -}; - -template <> -struct TSerializer<NGeo::TGeoWindow> { - static void Save(IOutputStream*, const NGeo::TGeoWindow&); - static void Load(IInputStream*, NGeo::TGeoWindow&); -}; - -template <> -struct TSerializer<NGeo::TSize> { - static void Save(IOutputStream*, const NGeo::TSize&); - static void Load(IInputStream*, NGeo::TSize&); -}; diff --git a/library/cpp/geo/point.cpp b/library/cpp/geo/point.cpp deleted file mode 100644 index 1d227c967f..0000000000 --- a/library/cpp/geo/point.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include "point.h" -#include "util.h" - -#include <util/generic/ylimits.h> -#include <util/generic/ymath.h> - -#include <cstdlib> -#include <utility> - -namespace NGeo { - namespace { - bool IsNonDegeneratePoint(double lon, double lat) { - return (MIN_LONGITUDE - WORLD_WIDTH < lon && lon < MAX_LONGITUDE + WORLD_WIDTH) && - (MIN_LATITUDE < lat && lat < MAX_LATITUDE); - } - } // namespace - - float TGeoPoint::Distance(const TGeoPoint& p) const noexcept { - auto dp = p - (*this); - return sqrtf(Sqr(GetWidthAtEquator(dp.GetWidth(), (Lat_ + p.Lat()) * 0.5)) + Sqr(dp.GetHeight())); - } - - bool TGeoPoint::IsPole() const noexcept { - return Lat_ <= MIN_LATITUDE || MAX_LATITUDE <= Lat_; - } - - bool TGeoPoint::IsVisibleOnMap() const noexcept { - return -VISIBLE_LATITUDE_BOUND <= Lat_ && Lat_ <= VISIBLE_LATITUDE_BOUND; - } - - TGeoPoint TGeoPoint::Parse(TStringBuf s, TStringBuf delimiter) { - const auto& [lon, lat] = PairFromString(s, delimiter); - Y_ENSURE_EX(IsNonDegeneratePoint(lon, lat), TBadCastException() << "Invalid point: (" << lon << ", " << lat << ")"); - return {lon, lat}; - } - - TMaybe<TGeoPoint> TGeoPoint::TryParse(TStringBuf s, TStringBuf delimiter) { - std::pair<double, double> lonLat; - if (!TryPairFromString(lonLat, s, delimiter)) { - return {}; - } - if (!IsNonDegeneratePoint(lonLat.first, lonLat.second)) { - return {}; - } - return TGeoPoint(lonLat.first, lonLat.second); - } - - TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2) { - return {p1.Lon() - p2.Lon(), p1.Lat() - p2.Lat()}; - } - - /* - Conversion code was imported from http://wiki.yandex-team.ru/YandexMobile/maps/Algorithm/mapengine/coordtransforms - */ - namespace WGS84 { - /* Isometric to geodetic latitude parameters, default to WGS 84 */ - const double ab = 0.00335655146887969400; - const double bb = 0.00000657187271079536; - const double cb = 0.00000001764564338702; - const double db = 0.00000000005328478445; - - const double _a = R; - const double _f = 1.0 / 298.257223563; - const double _b = _a - _f * _a; - const double _e = sqrt(1 - pow(_b / _a, 2)); - const double _e2 = _e * _e; - const double _g = sqrt(1.0 - _e2); - const double _gR2 = _g * R * 2.0; - } // namespace WGS84 - - TGeoPoint MercatorToLL(TMercatorPoint pt) { - using namespace WGS84; - - // Y_ENSURE(pt.IsDefined(), "Point is not defined"); - - /* Isometric latitude*/ - const double xphi = PI / 2.0 - 2.0 * atan(exp(-pt.Y_ / R)); - - double latitude = xphi + ab * sin(2.0 * xphi) + bb * sin(4.0 * xphi) + cb * sin(6.0 * xphi) + db * sin(8.0 * xphi); - double longitude = pt.X_ / R; - - return TGeoPoint{Rad2deg(longitude), Rad2deg(latitude)}; - } - - double GetMercatorY(const TGeoPoint& ll) { - if (Y_UNLIKELY(ll.Lat() == 0.)) { - // shortcut for common case, avoiding floating point errors - return 0.; - } - if (Y_UNLIKELY(ll.Lat() == MIN_LATITUDE)) { - return -std::numeric_limits<double>::infinity(); - } - if (Y_UNLIKELY(ll.Lat() == MAX_LATITUDE)) { - return +std::numeric_limits<double>::infinity(); - } - double lat = Deg2rad(ll.Lat()); - double esinLat = WGS84::_e * sin(lat); - - double tan_temp = tan(PI / 4.e0 + lat / 2.e0); - double pow_temp = pow(tan(PI / 4.e0 + asin(esinLat) / 2), WGS84::_e); - double U = tan_temp / pow_temp; - return WGS84::R * log(U); - } - - TMercatorPoint LLToMercator(TGeoPoint ll) { - // Y_ENSURE(ll.IsValid(), "Point is not defined"); - - // Y_ENSURE(-90. <= ll.Lat() && ll.Lat() <= +90., "Latitude is out of range [-90, 90]"); - - double lon = Deg2rad(ll.Lon()); - double x = WGS84::R * lon; - double y = GetMercatorY(ll); - - return TMercatorPoint{x, y}; - } - - double GeodeticDistance(TGeoPoint p1, TGeoPoint p2) { - using namespace WGS84; - - constexpr double deg2HalfRad = PI / 360.0; - - const double lon1Half = p1.Lon() * deg2HalfRad; - const double lon2Half = p2.Lon() * deg2HalfRad; - - const double lat1Half = p1.Lat() * deg2HalfRad; - const double lat2Half = p2.Lat() * deg2HalfRad; - - const double diffLatHalf = fabs(lat1Half - lat2Half); - const double diffLonHalf = fabs(lon1Half - lon2Half); - - if (diffLatHalf < 0.5e-8 && diffLonHalf < 0.5e-8) { - return 0; - } - - double s = sin(lat1Half + lat2Half); - double s2 = s * s; - double m = _gR2 / (1.0 - _e2 * s2); - - const double w = sin(diffLatHalf); - const double w2 = w * w; - const double cc = Max(1.0 - s2 - w2, 0.0); // cos(lat1Half * 2) * cos(lat2Half * 2) - const double z = sin(diffLonHalf); - - return m * asin(sqrt(w2 + cc * z * z)); - } -} // namespace NGeo diff --git a/library/cpp/geo/point.h b/library/cpp/geo/point.h deleted file mode 100644 index 70c91ab2dd..0000000000 --- a/library/cpp/geo/point.h +++ /dev/null @@ -1,198 +0,0 @@ -#pragma once - -#include <util/generic/string.h> -#include <util/stream/output.h> -#include <util/string/cast.h> -#include <util/generic/maybe.h> - -#include <algorithm> -#include <cmath> - -namespace NGeo { - class TSize; - - class TGeoPoint { - public: - TGeoPoint(double lon, double lat) noexcept - : Lon_(lon) - , Lat_(lat) - { - } - - TGeoPoint() noexcept - : Lon_(BadX) - , Lat_(BadY) - { - } - - double Lon() const noexcept { - return Lon_; - } - - double Lat() const noexcept { - return Lat_; - } - - float Distance(const TGeoPoint& p) const noexcept; - - void swap(TGeoPoint& p) noexcept { - std::swap(Lon_, p.Lon_); - std::swap(Lat_, p.Lat_); - } - - bool IsValid() const { - return (Lon_ != BadX) && (Lat_ != BadY); - } - - /// Returns true if the point represents either North or South Pole - bool IsPole() const noexcept; - - /// Returns true if the point may be shown on the Yandex Map (fits into the valid range of latitudes) - bool IsVisibleOnMap() const noexcept; - - bool operator!() const { - return !IsValid(); - } - - TString ToCgiStr() const { - return ToString(); - } - - TString ToString(const char* delimiter = ",") const { - return TString::Join(::ToString(Lon_), delimiter, ::ToString(Lat_)); - } - - /** - * \note Parsing functions work is safe way. They discard invalid points: - * 1) on the Poles and 'beyond' the Poles; - * 2) not belonging to the 'main' world and +/-1 world to the left or to the right. - * If you need such cases, construct the TGeoPoint manually. - */ - - /// Throws TBadCastException on error - static TGeoPoint Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); - - /// Returns Nothing() on error - static TMaybe<TGeoPoint> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); - - private: - double Lon_; - double Lat_; - - static constexpr double BadX{361.}; - static constexpr double BadY{181.}; - }; - - double GeodeticDistance(TGeoPoint p1, TGeoPoint p2); - - /** - * \class TMercatorPoint - * - * Represents a point in EPSG:3395 projection - * (WGS 84 / World Mercator) - */ - class TMercatorPoint { - public: - friend class TMercatorWindow; - friend TGeoPoint MercatorToLL(TMercatorPoint); - - /** - * Constructs a point with the given coordinates. - */ - constexpr TMercatorPoint(double x, double y) noexcept - : X_{x} - , Y_{y} - { - } - - /** - * Constructs a point with two NaN coordinates. - * - * Should not be called directly. - * If your `point` variable might be undefined, - * declare it explicitly as TMaybe<TMercatorPoint>. - */ - constexpr TMercatorPoint() noexcept - : X_{std::numeric_limits<double>::quiet_NaN()} - , Y_{std::numeric_limits<double>::quiet_NaN()} - { - } - - /** - * Returns the X_ coordinate. - * - * The line X_ == 0 corresponds to the Prime meridian. - */ - constexpr double X() const noexcept { - return X_; - } - - /** - * Returns the Y_ coordinate. - * - * The line Y_ == 0 corresponds to the Equator. - */ - constexpr double Y() const noexcept { - return Y_; - } - - private: - bool IsDefined() const noexcept { - return !std::isnan(X_) && !std::isnan(Y_); - } - - private: - double X_; - double Y_; - }; - - /** - * Operators - */ - - inline bool operator==(const TGeoPoint& p1, const TGeoPoint& p2) { - return p1.Lon() == p2.Lon() && p1.Lat() == p2.Lat(); - } - - inline bool operator==(const TMercatorPoint& p1, const TMercatorPoint& p2) { - return p1.X() == p2.X() && p1.Y() == p2.Y(); - } - - inline bool operator<(const TGeoPoint& p1, const TGeoPoint& p2) { - if (p1.Lon() != p2.Lon()) { - return p1.Lon() < p2.Lon(); - } - return p1.Lat() < p2.Lat(); - } - - /** - * Conversion - */ - - namespace WGS84 { - /* Radius of reference ellipsoid, default to WGS 84 */ - const double R = 6378137.0; - } // namespace WGS84 - - using TPointLL = TGeoPoint; - using TPointXY = TMercatorPoint; - - TGeoPoint MercatorToLL(TMercatorPoint); - TMercatorPoint LLToMercator(TGeoPoint); - - /** - * Input/output - */ - - TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2); -} // namespace NGeo - -template <> -inline void Out<NGeo::TGeoPoint>(IOutputStream& o, const NGeo::TGeoPoint& p) { - o << '[' << p.Lon() << ", " << p.Lat() << ']'; -} - -template <> -inline void Out<NGeo::TMercatorPoint>(IOutputStream& o, const NGeo::TMercatorPoint& p) { - o << '[' << p.X() << ", " << p.Y() << ']'; -} diff --git a/library/cpp/geo/polygon.cpp b/library/cpp/geo/polygon.cpp deleted file mode 100644 index 44e5c38b5f..0000000000 --- a/library/cpp/geo/polygon.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "polygon.h" -namespace NGeo { - TMaybe<TGeoPolygon> TGeoPolygon::TryParse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) { - TVector<TGeoPoint> points; - - for (const auto& pointString : StringSplitter(s).SplitByString(pointsDelimiter).SkipEmpty()) { - auto curPoint = TGeoPoint::TryParse(pointString.Token(), llDelimiter); - if (!curPoint) { - return {}; - } - points.push_back(*curPoint); - } - - if (points.size() < 3) { - return {}; - } - - return TGeoPolygon(points); - } - - TGeoPolygon TGeoPolygon::Parse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) { - auto res = TGeoPolygon::TryParse(s, llDelimiter, pointsDelimiter); - if (!res) { - ythrow yexception() << "Can't parse polygon from input string: " << s; - } - return *res; - } -} // namespace NGeo diff --git a/library/cpp/geo/polygon.h b/library/cpp/geo/polygon.h deleted file mode 100644 index 1528345fec..0000000000 --- a/library/cpp/geo/polygon.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -#include "point.h" -#include "window.h" - -#include <util/ysaveload.h> -#include <util/generic/algorithm.h> -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/generic/yexception.h> -#include <util/stream/output.h> -#include <util/string/cast.h> -#include <util/string/join.h> -#include <util/string/split.h> - -#include <algorithm> -#include <functional> - -namespace NGeo { - class TGeoPolygon { - private: - TVector<TGeoPoint> Points_; - TGeoWindow Window_; - - public: - TGeoPolygon() = default; - - explicit TGeoPolygon(const TVector<TGeoPoint>& points) - : Points_(points) - { - CalcWindow(); - } - - const TVector<TGeoPoint>& GetPoints() const { - return Points_; - } - - const TGeoWindow& GetWindow() const { - return Window_; - } - - void swap(TGeoPolygon& o) noexcept { - Points_.swap(o.Points_); - Window_.swap(o.Window_); - } - - bool IsValid() const noexcept { - return !Points_.empty() && Window_.IsValid(); - } - - bool operator!() const { - return !IsValid(); - } - - /** - * try to parse TGeoPolygon from string which stores points - * coords are separated by llDelimiter, points are separated by pointsDelimiter - * return parsed TGeoPolygon on success, otherwise throw exception - */ - static TGeoPolygon Parse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" ")); - - /** - * try to parse TGeoPolygon from string which stores points - * coords are separated by llDelimiter, points are separated by pointsDelimiter - * return TMaybe of parsed TGeoPolygon on success, otherwise return empty TMaybe - */ - static TMaybe<TGeoPolygon> TryParse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" ")); - - private: - void CalcWindow() { - auto getLon = std::mem_fn(&TGeoPoint::Lon); - double lowerX = MinElementBy(Points_.begin(), Points_.end(), getLon)->Lon(); - double upperX = MaxElementBy(Points_.begin(), Points_.end(), getLon)->Lon(); - - auto getLat = std::mem_fn(&TGeoPoint::Lat); - double lowerY = MinElementBy(Points_.begin(), Points_.end(), getLat)->Lat(); - double upperY = MaxElementBy(Points_.begin(), Points_.end(), getLat)->Lat(); - - Window_ = TGeoWindow{TGeoPoint{lowerX, lowerY}, TGeoPoint{upperX, upperY}}; - } - }; - - inline bool operator==(const TGeoPolygon& p1, const TGeoPolygon& p2) { - return p1.GetPoints() == p2.GetPoints(); - } - - inline bool operator!=(const TGeoPolygon& p1, const TGeoPolygon& p2) { - return !(p1 == p2); - } -} // namespace NGeo diff --git a/library/cpp/geo/size.cpp b/library/cpp/geo/size.cpp deleted file mode 100644 index f1bd8ab763..0000000000 --- a/library/cpp/geo/size.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "size.h" - -#include "util.h" - -namespace NGeo { - const double TSize::BadWidth = -1.; - const double TSize::BadHeight = -1.; - - namespace { - bool IsNonNegativeSize(double width, double height) { - return width >= 0. && height >= 0.; - } - } // namespace - - TSize TSize::Parse(TStringBuf s, TStringBuf delimiter) { - const auto& [width, height] = PairFromString(s, delimiter); - Y_ENSURE_EX(IsNonNegativeSize(width, height), TBadCastException() << "Negative window size"); - return {width, height}; - } - - TMaybe<TSize> TSize::TryParse(TStringBuf s, TStringBuf delimiter) { - std::pair<double, double> lonLat; - if (!TryPairFromString(lonLat, s, delimiter)) { - return {}; - } - if (!IsNonNegativeSize(lonLat.first, lonLat.second)) { - return {}; - } - return TSize{lonLat.first, lonLat.second}; - } -} // namespace NGeo diff --git a/library/cpp/geo/size.h b/library/cpp/geo/size.h deleted file mode 100644 index b619c6d899..0000000000 --- a/library/cpp/geo/size.h +++ /dev/null @@ -1,93 +0,0 @@ -#pragma once - -#include <util/generic/string.h> -#include <util/stream/output.h> -#include <util/string/cast.h> - -namespace NGeo { - class TSize { - public: - TSize(double width, double height) noexcept - : Width_(width) - , Height_(height) - { - } - - explicit TSize(double size) noexcept - : Width_(size) - , Height_(size) - { - } - - TSize() noexcept - : Width_(BadWidth) - , Height_(BadHeight) - { - } - - double GetWidth() const noexcept { - return Width_; - } - - double GetHeight() const noexcept { - return Height_; - } - - void swap(TSize& s) noexcept { - std::swap(Width_, s.Width_); - std::swap(Height_, s.Height_); - } - - bool IsValid() const { - return (Width_ != BadWidth) && (Height_ != BadHeight); - } - - void Stretch(double multiplier) { - Width_ *= multiplier; - Height_ *= multiplier; - } - - void Inflate(double additionX, double additionY) { - Width_ += additionX; - Height_ += additionY; - } - - bool operator!() const { - return !IsValid(); - } - - TString ToCgiStr() const { - TString s = ToString(Width_); - s.append(','); - s.append(ToString(Height_)); - return s; - } - - /** - * try to parse TSize - * return parsed TSize on success, otherwise throw exception - */ - static TSize Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); - - /** - * try to parse TSize - * return TMaybe of parsed TSize on success, otherwise return empty TMaybe - */ - static TMaybe<TSize> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); - - private: - double Width_; - double Height_; - static const double BadWidth; - static const double BadHeight; - }; - - inline bool operator==(const TSize& p1, const TSize& p2) { - return p1.GetHeight() == p2.GetHeight() && p1.GetWidth() == p2.GetWidth(); - } -} // namespace NGeo - -template <> -inline void Out<NGeo::TSize>(IOutputStream& o, const NGeo::TSize& s) { - o << '<' << s.GetWidth() << ", " << s.GetHeight() << '>'; -} diff --git a/library/cpp/geo/style/ya.make b/library/cpp/geo/style/ya.make deleted file mode 100644 index f72d50f27e..0000000000 --- a/library/cpp/geo/style/ya.make +++ /dev/null @@ -1,8 +0,0 @@ -CPP_STYLE_TEST_14() - -STYLE( - library/cpp/geo/**/*.cpp - library/cpp/geo/**/*.h -) - -END() diff --git a/library/cpp/geo/ut/load_save_helper_ut.cpp b/library/cpp/geo/ut/load_save_helper_ut.cpp deleted file mode 100644 index f251f56630..0000000000 --- a/library/cpp/geo/ut/load_save_helper_ut.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "load_save_helper.h" -#include "point.h" - -#include <library/cpp/testing/unittest/registar.h> -#include <util/stream/str.h> -#include <util/ysaveload.h> - -namespace { - void CheckSave(const NGeo::TGeoPoint& point) { - TStringStream output; - ::Save(&output, point); - TStringStream answer; - ::Save(&answer, static_cast<double>(point.Lon())); - ::Save(&answer, static_cast<double>(point.Lat())); - UNIT_ASSERT_EQUAL(output.Str(), answer.Str()); - } - - void CheckLoad(const double x, const double y) { - TStringStream input; - ::Save(&input, x); - ::Save(&input, y); - NGeo::TGeoPoint output; - ::Load(&input, output); - - const double eps = 1.E-8; - UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps); - UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps); - } - - void CheckLoadAfterSavePointLL(double x, double y) { - NGeo::TGeoPoint answer = {x, y}; - TStringStream iostream; - ::Save(&iostream, answer); - NGeo::TGeoPoint output; - ::Load(&iostream, output); - - const double eps = 1.E-8; - UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps); - UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps); - } - - void CheckLoadAfterSaveWindowLL(NGeo::TGeoPoint center, NGeo::TSize size) { - NGeo::TGeoWindow answer = {center, size}; - TStringStream iostream; - ::Save(&iostream, answer); - NGeo::TGeoWindow output; - ::Load(&iostream, output); - UNIT_ASSERT_EQUAL(output.GetCenter(), answer.GetCenter()); - UNIT_ASSERT_EQUAL(output.GetSize(), answer.GetSize()); - } -} // namespace - -Y_UNIT_TEST_SUITE(TSaveLoadForPointLL) { - Y_UNIT_TEST(TestSave) { - // {27.561481, 53.902496} Minsk Lon and Lat - CheckSave({27.561481, 53.902496}); - CheckSave({-27.561481, 53.902496}); - CheckSave({27.561481, -53.902496}); - CheckSave({-27.561481, -53.902496}); - } - - Y_UNIT_TEST(TestLoad) { - CheckLoad(27.561481, 53.902496); - CheckLoad(-27.561481, 53.902496); - CheckLoad(27.561481, -53.902496); - CheckLoad(-27.561481, -53.902496); - } - - Y_UNIT_TEST(TestSaveLoad) { - CheckLoadAfterSavePointLL(27.561481, 53.902496); - CheckLoadAfterSavePointLL(-27.561481, 53.902496); - CheckLoadAfterSavePointLL(27.561481, -53.902496); - CheckLoadAfterSavePointLL(-27.561481, -53.902496); - CheckLoadAfterSavePointLL(0, 0); - } -} - -Y_UNIT_TEST_SUITE(TSaveLoadForWindowLL) { - Y_UNIT_TEST(TestSave) { - CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {1, 2}); - CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {2, 1}); - CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {1, 2}); - CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {2, 1}); - CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {1, 2}); - CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {2, 1}); - CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {1, 2}); - CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {2, 1}); - CheckLoadAfterSaveWindowLL({0, 0}, {0, 0}); - } -} diff --git a/library/cpp/geo/ut/point_ut.cpp b/library/cpp/geo/ut/point_ut.cpp deleted file mode 100644 index bbf8f32cea..0000000000 --- a/library/cpp/geo/ut/point_ut.cpp +++ /dev/null @@ -1,171 +0,0 @@ -#include "point.h" - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NGeo; - -namespace { - void CheckMercator(TGeoPoint input, TMercatorPoint answer, double eps = 1.e-8) { - auto output = LLToMercator(input); - UNIT_ASSERT_DOUBLES_EQUAL(output.X(), answer.X(), eps); - UNIT_ASSERT_DOUBLES_EQUAL(output.Y(), answer.Y(), eps); - } - - void CheckGeo(TMercatorPoint input, TGeoPoint answer, double eps = 1.e-8) { - auto output = MercatorToLL(input); - UNIT_ASSERT_DOUBLES_EQUAL(output.Lon(), answer.Lon(), eps); - UNIT_ASSERT_DOUBLES_EQUAL(output.Lat(), answer.Lat(), eps); - } -} // namespace - -Y_UNIT_TEST_SUITE(TPointTest) { - Y_UNIT_TEST(TestGeoPointFromString) { - UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15,0.67"), - TGeoPoint(0.15, 0.67)); - UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-52.,-27."), - TGeoPoint(-52., -27.)); - UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15 0.67", " "), - TGeoPoint(0.15, 0.67)); - UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-27. -52", " "), - TGeoPoint(-27., -52.)); - UNIT_ASSERT_EQUAL(TGeoPoint::Parse("182,55"), - TGeoPoint(182., 55.)); - - // current behavior - UNIT_ASSERT(TGeoPoint::TryParse(TString{}).Empty()); - UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("Hello,world"), TBadCastException); - UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("640 17", " "), TBadCastException); - UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("50.,100"), TBadCastException); - UNIT_ASSERT_EQUAL(TGeoPoint::Parse(" 0.01, 0.01"), TGeoPoint(0.01, 0.01)); - UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01 , 0.01"), TBadCastException); - UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01, 0.01 "), TBadCastException); - } -} - -Y_UNIT_TEST_SUITE(TConversionTest) { - Y_UNIT_TEST(TestConversionGeoToMercator) { - // test data is obtained using PostGIS: - // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(lon, lat), 4326), 3395)) - - CheckMercator({27.547028, 53.893962}, {3066521.12982805, 7115552.47353991}); - CheckMercator({-70.862782, -53.002613}, {-7888408.80843475, -6949331.55685883}); - CheckMercator({37.588536, 55.734004}, {4184336.68718463, 7470303.90973406}); - CheckMercator({0., 0.}, {0, 0}); - } - - Y_UNIT_TEST(TestConversionMercatorToGeo) { - // test data is obtained using PostGIS: - // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(X, Y), 3395), 4326)) - - CheckGeo({3066521, 7115552}, {27.5470268337348, 53.8939594873943}); - CheckGeo({-7888409, -6949332}, {-70.8627837208599, -53.0026154014032}); - CheckGeo({4184336, 7470304}, {37.5885298269154, 55.734004457522}); - CheckGeo({0, 0}, {0., 0.}); - } - - Y_UNIT_TEST(TestExactConversion) { - // Zero maps to zero with no epsilons - UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).X(), 0.); - UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).Y(), 0.); - UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lon(), 0.); - UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lat(), 0.); - } - - Y_UNIT_TEST(TestPoles) { - UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, 90}).Y(), std::numeric_limits<double>::infinity()); - UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, -90}).Y(), -std::numeric_limits<double>::infinity()); - - UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, std::numeric_limits<double>::infinity()}).Lat(), 90.); - UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, -std::numeric_limits<double>::infinity()}).Lat(), -90.); - } - - Y_UNIT_TEST(TestNearPoles) { - // Reference values were obtained using mpmath library (floating-point arithmetic with arbitrary precision) - CheckMercator({0., 89.9}, {0., 44884542.157175040}, 1.e-6); - CheckMercator({0., 89.99}, {0., 59570746.872518855}, 1.e-5); - CheckMercator({0., 89.999}, {0., 74256950.065173316}, 1.e-4); - CheckMercator({0., 89.9999}, {0., 88943153.242600886}, 1.e-3); - CheckMercator({0., 89.99999}, {0., 103629356.41987618}, 1.e-1); - CheckMercator({0., 89.999999}, {0., 118315559.59714996}, 1.e-1); - CheckMercator({0., 89.9999999}, {0., 133001762.77442373}, 1.e-0); - CheckMercator({0., 89.99999999}, {0., 147687965.95169749}, 1.e+1); - CheckMercator({0., 89.9999999999999857891452847979962825775146484375}, {0., 233563773.75716050}, 1.e+7); - - CheckGeo({0., 233563773.75716050}, {0., 89.9999999999999857891452847979962825775146484375}, 1.e-15); - CheckGeo({0., 147687965.95169749}, {0., 89.99999999}, 1.e-13); - CheckGeo({0., 133001762.77442373}, {0., 89.9999999}, 1.e-13); - CheckGeo({0., 118315559.59714996}, {0., 89.999999}, 1.e-13); - CheckGeo({0., 103629356.41987618}, {0., 89.99999}, 1.e-13); - CheckGeo({0., 88943153.242600886}, {0., 89.9999}, 1.e-13); - CheckGeo({0., 74256950.065173316}, {0., 89.999}, 1.e-13); - CheckGeo({0., 59570746.872518855}, {0., 89.99}, 1.e-13); - CheckGeo({0., 44884542.157175040}, {0., 89.9}, 1.e-13); - } - - Y_UNIT_TEST(TestVisibleRange) { - UNIT_ASSERT(TGeoPoint(37., 55.).IsVisibleOnMap()); - UNIT_ASSERT(!TGeoPoint(37., 86.).IsVisibleOnMap()); - UNIT_ASSERT(TGeoPoint(37., -85.).IsVisibleOnMap()); - UNIT_ASSERT(!TGeoPoint(37., -90.).IsVisibleOnMap()); - } - - Y_UNIT_TEST(TestRoundTripGeoMercatorGeo) { - auto check = [](double longitude, double latitude) { - auto pt = MercatorToLL(LLToMercator(TGeoPoint{longitude, latitude})); - UNIT_ASSERT_DOUBLES_EQUAL_C(longitude, pt.Lon(), 1.e-12, "longitude for point (" << longitude << ", " << latitude << ")"); - UNIT_ASSERT_DOUBLES_EQUAL_C(latitude, pt.Lat(), 1.e-8, "latitude for point (" << longitude << ", " << latitude << ")"); - }; - - check(37., 55.); - check(0.1, 0.1); - check(0.2, 89.9); - check(181., -42.); - check(362., -43.); - check(-183., -87.); - check(1000., -77.); - } - - Y_UNIT_TEST(TestRoundTripMercatorGeoMercator) { - auto check = [](double x, double y) { - auto pt = LLToMercator(MercatorToLL(TMercatorPoint{x, y})); - UNIT_ASSERT_DOUBLES_EQUAL_C(x, pt.X(), 1.e-4, "x for point (" << x << ", " << y << ")"); - UNIT_ASSERT_DOUBLES_EQUAL_C(y, pt.Y(), 1.e-4, "y for point (" << x << ", " << y << ")"); - }; - - check(100., 200.); - check(-123456., 654321.); - check(5.e7, 1.23456789); - check(1.e8, -2.e7); - } -} - -Y_UNIT_TEST_SUITE(TestDistance) { - Y_UNIT_TEST(TestGeodeticDistance) { - const TGeoPoint minsk(27.55, 53.916667); - const TGeoPoint moscow(37.617778, 55.755833); - const TGeoPoint newYork(-73.994167, 40.728333); - const TGeoPoint sydney(151.208333, -33.869444); - - const double eps = 1.E-6; // absolute error - - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, minsk), 0.0, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, moscow), 677190.08871321136, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, newYork), 7129091.7536358498, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, sydney), 15110861.267782301, eps); - - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, minsk), 677190.08871321136, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, moscow), 0.0, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, newYork), 7519517.2469277605, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, sydney), 14467193.188083574, eps); - - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, minsk), 7129091.7536358498, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, moscow), 7519517.2469277605, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, newYork), 0.0, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, sydney), 15954603.669226252, eps); - - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, minsk), 15110861.267782301, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, moscow), 14467193.188083574, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, newYork), 15954603.669226252, eps); - UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, sydney), 0.0, eps); - } -} diff --git a/library/cpp/geo/ut/polygon_ut.cpp b/library/cpp/geo/ut/polygon_ut.cpp deleted file mode 100644 index cd9dee9759..0000000000 --- a/library/cpp/geo/ut/polygon_ut.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "polygon.h" - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NGeo; - -Y_UNIT_TEST_SUITE(TGeoPolygonTest) { - Y_UNIT_TEST(TestEmptyPolygon) { - TGeoPolygon empty; - UNIT_ASSERT(!empty); - UNIT_ASSERT(!empty.IsValid()); - } - - Y_UNIT_TEST(TestPolygon) { - TGeoPolygon polygon({{1., 2.}, {2., 1.}, {2., 4.}, {1., 3.}}); - UNIT_ASSERT(polygon.IsValid()); - UNIT_ASSERT_EQUAL(polygon.GetWindow(), - TGeoWindow(TGeoPoint(1., 1.), TGeoPoint(2., 4.))); - } - - Y_UNIT_TEST(TestParse) { - UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87"}), - NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); - UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87 6.54,3.21"}), - NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}, {6.54, 3.21}})); - - UNIT_ASSERT(TGeoPolygon::TryParse(TString{"1.23,5.67 7.89,10.11"}).Empty()); - UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67~7.89+10.11~11.10+9.87"}, "+", "~"), - NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); - - UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67+~7.89+10.11+~11.10+9.87"}, "+", "+~"), - NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); - } -} diff --git a/library/cpp/geo/ut/size_ut.cpp b/library/cpp/geo/ut/size_ut.cpp deleted file mode 100644 index 41b4a2c257..0000000000 --- a/library/cpp/geo/ut/size_ut.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "size.h" - -#include <library/cpp/testing/unittest/registar.h> -#include <util/generic/maybe.h> - -using namespace NGeo; - -Y_UNIT_TEST_SUITE(TSizeTest) { - Y_UNIT_TEST(TestFromString) { - UNIT_ASSERT_EQUAL(TSize::Parse("0.15,0.67"), TSize(0.15, 0.67)); - UNIT_ASSERT_EQUAL(TSize::Parse("0.15 0.67", " "), TSize(0.15, 0.67)); - - UNIT_ASSERT_EXCEPTION(TSize::Parse(""), TBadCastException); - UNIT_ASSERT_EXCEPTION(TSize::Parse("Hello,world"), TBadCastException); - UNIT_ASSERT_EXCEPTION(TSize::Parse("-1,-1"), TBadCastException); - - UNIT_ASSERT_EQUAL(TSize::Parse("424242 50", " "), TSize(424242., 50.)); - UNIT_ASSERT_EQUAL(TSize::Parse("50.,424242"), TSize(50., 424242.)); - UNIT_ASSERT_EQUAL(TSize::Parse(" 0.01, 0.01"), TSize(0.01, 0.01)); - UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01 ,0.01"), TBadCastException); - UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01,0.01 "), TBadCastException); - } - - Y_UNIT_TEST(TestTryFromString) { - UNIT_ASSERT(TSize::TryParse("1,2")); - UNIT_ASSERT(!TSize::TryParse("-1,-2")); - UNIT_ASSERT(!TSize::TryParse("1,2a")); - } -} diff --git a/library/cpp/geo/ut/util_ut.cpp b/library/cpp/geo/ut/util_ut.cpp deleted file mode 100644 index ebd86cfbd8..0000000000 --- a/library/cpp/geo/ut/util_ut.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include <library/cpp/geo/util.h> - -#include <library/cpp/testing/unittest/registar.h> - -using namespace NGeo; - -Y_UNIT_TEST_SUITE(TGeoUtilTest) { - Y_UNIT_TEST(TestPointFromString) { - UNIT_ASSERT_EQUAL(PairFromString("27.56,53.90"), (std::pair<double, double>(27.56, 53.90))); - UNIT_ASSERT_EQUAL(PairFromString("27.56 53.90", " "), (std::pair<double, double>(27.56, 53.90))); - UNIT_ASSERT_EQUAL(PairFromString("27.56@@53.90", "@@"), (std::pair<double, double>(27.56, 53.90))); - UNIT_ASSERT_EXCEPTION(PairFromString("27.56@@53.90", "@"), TBadCastException); - UNIT_ASSERT_EXCEPTION(PairFromString(""), TBadCastException); - } - - Y_UNIT_TEST(TestTryPointFromString) { - std::pair<double, double> point; - - UNIT_ASSERT(TryPairFromString(point, "27.56,53.90")); - UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); - - UNIT_ASSERT(TryPairFromString(point, "27.56 53.90", " ")); - UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); - - UNIT_ASSERT(TryPairFromString(point, "27.56@@53.90", "@@")); - UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); - - UNIT_ASSERT(!TryPairFromString(point, "27.56@@53.90", "@")); - UNIT_ASSERT(!TryPairFromString(point, "")); - } - - Y_UNIT_TEST(TestVisibleMapBound) { - const double expectedLat = MercatorToLL(TMercatorPoint(0., LLToMercator(TGeoPoint(180., 0.)).X())).Lat(); - UNIT_ASSERT_DOUBLES_EQUAL(VISIBLE_LATITUDE_BOUND, expectedLat, 1.e-14); - } -} diff --git a/library/cpp/geo/ut/window_ut.cpp b/library/cpp/geo/ut/window_ut.cpp deleted file mode 100644 index 194fb4e735..0000000000 --- a/library/cpp/geo/ut/window_ut.cpp +++ /dev/null @@ -1,547 +0,0 @@ -#include "window.h" -#include <library/cpp/testing/unittest/registar.h> -#include <util/generic/ymath.h> - -using namespace NGeo; - -namespace { - constexpr double DEFAULT_EPS = 1.E-5; - - bool CheckGeoPointEqual(const TGeoPoint& found, const TGeoPoint& expected, const double eps = DEFAULT_EPS) { - if (std::isnan(found.Lon()) || std::isnan(found.Lat())) { - Cerr << "NaNs found: (" << found.Lon() << ", " << found.Lat() << ")" << Endl; - return false; - } - if (Abs(found.Lon() - expected.Lon()) > eps) { - Cerr << "longitude differs: " << found.Lon() << " found, " << expected.Lon() << " expected" << Endl; - return false; - } - if (Abs(found.Lat() - expected.Lat()) > eps) { - Cerr << "latitude differs: " << found.Lat() << " found, " << expected.Lat() << " expected" << Endl; - return false; - } - return true; - } - - bool CheckSizeEqual(const TSize& found, const TSize& expected, const double eps = DEFAULT_EPS) { - if (std::isnan(found.GetWidth()) || std::isnan(found.GetHeight())) { - Cerr << "NaNs found: (" << found.GetWidth() << ", " << found.GetHeight() << ")" << Endl; - return false; - } - if (Abs(found.GetWidth() - expected.GetWidth()) > eps) { - Cerr << "width differs: " << found.GetWidth() << " found, " << expected.GetWidth() << " expected" << Endl; - return false; - } - if (Abs(found.GetHeight() - expected.GetHeight()) > eps) { - Cerr << "height differs: " << found.GetHeight() << " found, " << expected.GetHeight() << " expected" << Endl; - return false; - } - return true; - } - - bool CheckGeoWindowEqual(const TGeoWindow& lhs, const TGeoWindow& rhs, const double eps = DEFAULT_EPS) { - return CheckGeoPointEqual(lhs.GetCenter(), rhs.GetCenter(), eps) && CheckSizeEqual(lhs.GetSize(), rhs.GetSize(), eps); - } -} // namespace - -/** - * TGeoWindow - */ -Y_UNIT_TEST_SUITE(TGeoWindowTest) { - Y_UNIT_TEST(TestParser) { - UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23,5.67", "7.65,3.21"), - TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); - UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21", "~"), - TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); - UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21"), TBadCastException); - - UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23~5.67", "7.65~3.21").Empty()); - UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").Defined()); - UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").GetRef(), - TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); - UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23+++5.67+", "7.65+++3.21+", "+++").Empty()); - - UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23,5.67", "0.1,0.2"), - TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); - UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~"), - TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); - UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2"), TBadCastException); - UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2").Empty()); - UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").Defined()); - UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").GetRef(), - TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); - } - - Y_UNIT_TEST(TestConstructor) { - TGeoPoint center{55.50, 82.50}; - TSize size{5.00, 3.00}; - TGeoWindow window(center, size); - - UNIT_ASSERT_EQUAL(window.GetCenter(), center); - UNIT_ASSERT_EQUAL(window.GetSize(), size); - } - - Y_UNIT_TEST(TestPoles) { - { - TGeoWindow northPole{TGeoPoint{180., 90.}, TSize{1.5, 1.5}}; - UNIT_ASSERT(CheckGeoPointEqual(northPole.GetCenter(), TGeoPoint{180., 90.})); - UNIT_ASSERT(CheckGeoPointEqual(northPole.GetLowerLeftCorner(), TGeoPoint{179.25, 88.5})); - UNIT_ASSERT(CheckGeoPointEqual(northPole.GetUpperRightCorner(), TGeoPoint{180.75, 90.0})); - } - { - TGeoWindow tallWindow{TGeoPoint{37., 55.}, TSize{10., 180.}}; - UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetCenter(), TGeoPoint{37., 55.})); - UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetLowerLeftCorner(), TGeoPoint{32., -90.})); - UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetUpperRightCorner(), TGeoPoint{42., 90.})); - } - { - TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 180.}}; - UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.})); - UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.})); - UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.})); - } - { - TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 360.}}; - UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.})); - UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.})); - UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.})); - } - } - - Y_UNIT_TEST(TestBigSize) { - { - TGeoWindow w{TGeoPoint{37., 55.}, TSize{100., 179.}}; - UNIT_ASSERT(CheckGeoPointEqual(w.GetCenter(), TGeoPoint{37., 55.})); - UNIT_ASSERT(CheckGeoPointEqual(w.GetLowerLeftCorner(), TGeoPoint{-13., -89.09540675})); - UNIT_ASSERT(CheckGeoPointEqual(w.GetUpperRightCorner(), TGeoPoint{87., 89.90907637})); - } - } - - Y_UNIT_TEST(TestCenterWhenInitWithCorners) { - UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{5.00, 40.00}, TGeoPoint{25.00, 80.00}).GetCenter(), TGeoPoint{15.00, 67.17797})); - UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{-5.00, -40.00}, TGeoPoint{-25.00, -80.00}).GetCenter(), TGeoPoint{-15.00, -67.17797})); - } - - Y_UNIT_TEST(TestCornersWhenInitWithCenter) { - // check lat calc - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 10.00}).GetLowerLeftCorner().Lat(), 44.73927, DEFAULT_EPS); - - // lat equals to 90 - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 179.99999}).GetUpperRightCorner().Lat(), 90, DEFAULT_EPS); - - // lat equals to -90 - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, -50.00}, TSize{10.00, -179.99999}).GetUpperRightCorner().Lat(), -90, DEFAULT_EPS); - - // check naive lon calc - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{10, 10}, TSize{10, 5}).GetLowerLeftCorner().Lon(), 5, DEFAULT_EPS); - - // check lon equals to 190 (no wrapping) - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{20, 0}, TSize{340, 5}).GetUpperRightCorner().Lon(), 190, DEFAULT_EPS); - - UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{-40, 0}, TSize{-280, 5}).GetUpperRightCorner().Lon(), -180, DEFAULT_EPS); - - // naive calculating when point is (0, 0) - UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetLowerLeftCorner(), TGeoPoint{-80, -80}, DEFAULT_EPS)); - UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetUpperRightCorner(), TGeoPoint{80, 80}, DEFAULT_EPS)); - } - - Y_UNIT_TEST(TestCenterSetter) { - TGeoPoint center{27.56, 53.90}; - TGeoWindow window{}; - window.SetCenter(center); - UNIT_ASSERT_EQUAL(window.GetCenter(), center); - } - - Y_UNIT_TEST(TestEqualOperator) { - TGeoWindow window{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 56.89}}; - UNIT_ASSERT(window == window); - - TGeoWindow anotherWindow{TGeoPoint{60.10, 57.90}, TGeoPoint{60.70, 58.25}}; - UNIT_ASSERT(!(window == anotherWindow)); - } - - Y_UNIT_TEST(TestAssignmentOperator) { - TGeoWindow lhs{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 53.89}}; - TGeoWindow rhs{}; - rhs = lhs; - UNIT_ASSERT_EQUAL(lhs, rhs); - } - - Y_UNIT_TEST(TestContainsMethod) { - // you could see cases here https://tech.yandex.ru/maps/jsbox/2.1/rectangle - // (pay attention that the first coord is lat and the second one is lon) - TGeoWindow window{TGeoPoint{27.45, 53.82}, TGeoPoint{27.65, 53.97}}; - - // point is inside the window - UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.90})); - - // point is to the right of the window - UNIT_ASSERT(!window.Contains(TGeoPoint{27.66, 53.95})); - - // point is to the left of the window - UNIT_ASSERT(!window.Contains(TGeoPoint{27.44, 53.95})); - - // point is under the window - UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.81})); - - // point is above the window - UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.98})); - - // point is on border - UNIT_ASSERT(window.Contains(TGeoPoint{27.45, 53.86})); - UNIT_ASSERT(window.Contains(TGeoPoint{27.65, 53.86})); - UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.82})); - UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.97})); - - // negate coord - UNIT_ASSERT(TGeoWindow(TGeoPoint{-72.17, -38.82}, TGeoPoint{-68.95, -36.70}).Contains(TGeoPoint{-70.40, -37.75})); - - // special cases - UNIT_ASSERT(!TGeoWindow{}.Contains(TGeoPoint{60.09, 57.90})); - - UNIT_ASSERT(TGeoWindow(TGeoPoint{}, TGeoPoint{27.55, 53.90}).Contains(TGeoPoint{27.55, 53.90})); - UNIT_ASSERT(TGeoWindow(TGeoPoint{27.55, 53.90}, TGeoPoint{}).Contains(TGeoPoint{27.55, 53.90})); - } - - Y_UNIT_TEST(TestIntersectsMethod) { - // intersect only by lat - UNIT_ASSERT( - !Intersects( - TGeoWindow{TGeoPoint{27.60, 53.90}, TGeoPoint{27.80, 53.95}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // intersect only by lon - UNIT_ASSERT( - !Intersects( - TGeoWindow{TGeoPoint{27.35, 54}, TGeoPoint{27.45, 54.10}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // one inside another - UNIT_ASSERT( - Intersects( - TGeoWindow{TGeoPoint{27.35, 53.90}, TGeoPoint{27.45, 53.95}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // intersection is point - UNIT_ASSERT( - !Intersects( - TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // intersection is segment - UNIT_ASSERT( - !Intersects( - TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // intersection is area - UNIT_ASSERT( - Intersects( - TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); - - // equal windows - TGeoWindow window{TGeoPoint{27.60, 53.88}, TGeoPoint{27.80, 53.98}}; - UNIT_ASSERT(Intersects(window, window)); - } - - Y_UNIT_TEST(TestIntersectionMethod) { - // non-intersecting window - UNIT_ASSERT( - !(Intersection( - TGeoWindow{TGeoPoint{37.66, 55.66}, TGeoPoint{37.53, 55.64}}, - TGeoWindow{TGeoPoint{37.67, 55.66}, TGeoPoint{37.69, 55.71}}))); - - // one inside another - UNIT_ASSERT(CheckGeoWindowEqual( - Intersection( - TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 10.00}}, - TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}) - .GetRef(), - (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}))); - - // cross - UNIT_ASSERT(CheckGeoWindowEqual( - Intersection( - TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 2.00}}, - TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 10.00}}) - .GetRef(), - (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}))); - - // intersection is a point - UNIT_ASSERT(CheckGeoWindowEqual( - Intersection( - TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) - .GetRef(), - (TGeoWindow{TGeoPoint{27.50, 53.98}, TSize{0, 0}}))); - - // intersection is a segment - UNIT_ASSERT(CheckGeoWindowEqual( - Intersection( - TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) - .GetRef(), - (TGeoWindow{TGeoPoint{27.45, 53.98}, TSize{0.10, 0}}))); - - // intersection is area - UNIT_ASSERT(CheckGeoWindowEqual( - Intersection( - TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}}, - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) - .GetRef(), - (TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.50, 53.98}}))); - - // special cases - UNIT_ASSERT( - !(Intersection( - TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}, - TGeoWindow{}))); - } - - Y_UNIT_TEST(TestDistanceMethod) { - // one window inside another - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}}) - .Distance(TGeoWindow{TGeoPoint{27.55, 54.00}, TGeoPoint{27.70, 54.07}}), - 0, - 1.E-5); - - // gap only by lon - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}}) - .Distance(TGeoWindow{TGeoPoint{27.69, 54.10}, TGeoPoint{27.90, 54.20}}), - 0.052773, - 1.E-5); - - // gap only by lat - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}}) - .Distance(TGeoWindow{TGeoPoint{27.50, 54.20}, TGeoPoint{27.70, 54.30}}), - 0.1, - 1.E-5); - - // gap by lot and lat, you can calculate answer using two previous tests - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}} - .Distance(TGeoWindow{TGeoPoint{27.69, 54.20}, TGeoPoint{27.70, 54.30}})), - 0.11304, - 1.E-5); - - // negate coord - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{-27.50, -53.98}, TGeoPoint{-27.60, -54.10}} - .Distance(TGeoWindow{TGeoPoint{-27.69, -54.20}, TGeoPoint{-27.70, -54.30}})), - 0.11304, - 1.E-5); - } - - Y_UNIT_TEST(TestApproxDistanceMethod) { - // point inside - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}}) - .GetApproxDistance(TGeoPoint{27.60, 54.05}), - 0, - 1.E-5); - - // gap only by lon - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) - .GetApproxDistance(TGeoPoint{27.70, 54.05}), - 6535.3, - 0.1); - - // gap only by lat - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) - .GetApproxDistance(TGeoPoint{27.55, 53.95}), - 5566.0, - 0.1); - - // gap by lot and lat - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) - .GetApproxDistance(TGeoPoint{27.70, 54.20}), - 12900.6, - 0.1); - - // negate coord - UNIT_ASSERT_DOUBLES_EQUAL( - (TGeoWindow{TGeoPoint{-27.50, -54.00}, TGeoPoint{-27.60, -54.10}}) - .GetApproxDistance(TGeoPoint{-27.70, -54.20}), - 12900.6, - 0.1); - } - - Y_UNIT_TEST(TestUnionMethod) { - // one inside another - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00}}, - TGeoWindow{TGeoPoint{37.10, 55.20}, TSize{1.50, 1.00}}), - TGeoWindow(TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00}))); - - // non-intersecting windows - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{TGeoPoint{37.00, 55.00}, TGeoPoint{37.10, 55.10}}, - TGeoWindow{TGeoPoint{37.20, 55.20}, TGeoPoint{37.30, 55.30}}), - TGeoWindow(TGeoPoint{37.00, 55.00}, TGeoPoint{37.30, 55.30}))); - - // negate coords, one inside another - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{2.00, 4.00}}, - TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}}), - TGeoWindow(TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}), 1.E-2)); - - // cross - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}, - TGeoWindow{TGeoPoint{-1.5, 4.20}, TGeoPoint{-0.5, 7.13}}), - TGeoWindow(TGeoPoint{-3.82, 4.20}, TGeoPoint{0.10, 7.13}))); - - // special cases - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}, - TGeoWindow{}), - TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.}))); - - UNIT_ASSERT(CheckGeoWindowEqual( - Union( - TGeoWindow{}, - TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}), - TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.}))); - } - - Y_UNIT_TEST(TestStretchMethod) { - TSize size{0.5, 1}; - TGeoPoint center{27.40, 53.90}; - TGeoWindow window{}; - double multiplier = 0; - - // multiplier is less than 1. - window = {center, size}; - multiplier = 0.5; - - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.14999, 53.39699})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.65000, 54.39699})); - - window.Stretch(multiplier); - UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0.25, 0.5}})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.27499, 53.64925})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.52500, 54.14924})); - - // multiplier is greater than 1. - window = {center, size}; - multiplier = 2.2; - - window.Stretch(multiplier); - UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{1.1, 2.2}})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{26.84999, 52.78545})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.95000, 54.98545})); - - // invalid multiplier - window = {center, size}; - multiplier = 100.; - - window.Stretch(multiplier); - UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{50, 100}})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{2.40000, -18.88352})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{52.39999, 81.26212})); - - // invalid multiplier - window = {center, size}; - multiplier = 0; - - window.Stretch(multiplier); - UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0, 0}})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.39999, 53.90000})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.39999, 53.90000})); - - // invalid multiplier - window = {center, size}; - multiplier = -5.; - - window.Stretch(multiplier); - UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{-2.5, -5}})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{28.64999, 56.32495})); - UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{26.15000, 51.32491})); - } -} - -/** - * TMercatorWindow - */ -Y_UNIT_TEST_SUITE(TMercatorWindowTest) { - Y_UNIT_TEST(TestConstructor) { - // init with two corners - TMercatorPoint lowerLeft{5, 3}; - TMercatorPoint upperRight{10, 20}; - TMercatorWindow window{lowerLeft, upperRight}; - - UNIT_ASSERT_EQUAL(window.GetWidth(), 5.); - UNIT_ASSERT_EQUAL(window.GetHeight(), 17.); - UNIT_ASSERT_EQUAL(window.GetCenter(), (TMercatorPoint{7.5, 11.5})); - - TMercatorPoint center{8, 12}; - TSize size{5, 17}; - window = {center, size}; - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().X(), 10.5); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().Y(), 20.5); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().X(), 5.5); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().Y(), 3.5); - } - - Y_UNIT_TEST(TestInflateMethod) { - TSize size{200, 500}; - TMercatorPoint center{441, 688}; - TMercatorWindow window{}; - int add = 10; - - window = {center, size}; - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(341, 438)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(541, 938)); - window.Inflate(add); - UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520})); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(331, 428)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(551, 948)); - - // negate coords - center = {-441, -688}; - window = {center, size}; - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-541, -938)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-341, -438)); - window.Inflate(add); - UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520})); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-551, -948)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-331, -428)); - - // size becomes negate - size = {6, 12}; - center = {0, 0}; - window = {center, size}; - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-3, -6)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(3, 6)); - - add = -20; - window.Inflate(add); - UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{-34, -28})); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(17, 14)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-17, -14)); - UNIT_ASSERT_EQUAL(window.GetSize(), TSize(-34, -28)); - - // big add param - size = {10, 15}; - center = {5, 10}; - window = {center, size}; - - add = static_cast<int>(1E5); - window.Inflate(add); - UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{200'010, 200'015})); - UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-100'000, -99'997.5)); - UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(100'010, 100'017.5)); - } -} diff --git a/library/cpp/geo/ut/ya.make b/library/cpp/geo/ut/ya.make deleted file mode 100644 index 5bd891db1f..0000000000 --- a/library/cpp/geo/ut/ya.make +++ /dev/null @@ -1,12 +0,0 @@ -UNITTEST_FOR(library/cpp/geo) - -SRCS( - load_save_helper_ut.cpp - polygon_ut.cpp - point_ut.cpp - size_ut.cpp - util_ut.cpp - window_ut.cpp -) - -END() diff --git a/library/cpp/geo/util.cpp b/library/cpp/geo/util.cpp deleted file mode 100644 index e8d0fc378e..0000000000 --- a/library/cpp/geo/util.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "util.h" - -#include <math.h> -#include <util/generic/cast.h> -#include <util/generic/string.h> -#include <util/string/cast.h> -#include <utility> - -namespace NGeo { - bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter) { - TStringBuf lhsStr; - TStringBuf rhsStr; - - double lhs = NAN; - double rhs = NAN; - if ( - !inputStr.TrySplit(delimiter, lhsStr, rhsStr) || - !TryFromString<double>(lhsStr, lhs) || - !TryFromString<double>(rhsStr, rhs)) { - return false; - } - - res = {lhs, rhs}; - return true; - } - - std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter) { - std::pair<double, double> res; - if (!TryPairFromString(res, inputStr, delimiter)) { - ythrow TBadCastException() << "Wrong point string: " << inputStr; - } - return res; - } -} // namespace NGeo diff --git a/library/cpp/geo/util.h b/library/cpp/geo/util.h deleted file mode 100644 index 18b411e6a4..0000000000 --- a/library/cpp/geo/util.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include "point.h" -#include "size.h" -#include "window.h" - -#include <util/generic/ymath.h> - -namespace NGeo { - constexpr double MIN_LATITUDE = -90.; - constexpr double MAX_LATITUDE = +90.; - constexpr double MIN_LONGITUDE = -180.; - constexpr double MAX_LONGITUDE = +180.; - constexpr double WORLD_WIDTH = MAX_LONGITUDE - MIN_LONGITUDE; - constexpr double WORLD_HEIGHT = MAX_LATITUDE - MIN_LATITUDE; - - // The Mercator projection is truncated at certain latitude so that the visible world forms a square. The poles are not shown. - constexpr double VISIBLE_LATITUDE_BOUND = 85.084059050109785; - - inline double Deg2rad(double d) { - return d * PI / 180; - } - - inline double Rad2deg(double d) { - return d * 180 / PI; - } - - inline double GetLongitudeFromMetersAtEquator(double meters) { - return Rad2deg(meters * (1. / WGS84::R)); - } - - inline double GetMetersFromDeg(double angle) { - return Deg2rad(angle) * NGeo::WGS84::R; - } - - inline double GetLatCos(double latDegree) { - return cos(Deg2rad(latDegree)); - } - - /** - * Get Inversed cosinus of latitude - * It is more precise, than division of two big doubles - * It is safe for lattitue at 90 degrees - */ - inline double GetInversedLatCosSafe(double latDegree) { - return 1. / Max(0.001, cos(Deg2rad(latDegree))); - } - - /** - * Gets Lontitude width for given width at equator and latitude - */ - inline double GetWidthAtLatitude(double widthEquator, double latDegree) { - return widthEquator * GetInversedLatCosSafe(latDegree); - } - - inline double GetWidthAtLatitude(double widthEquator, const TGeoPoint& p) { - return GetWidthAtLatitude(widthEquator, p.Lat()); - } - - /* - * Returns Normalised width at equator for specified width at latitude and latitude - */ - - inline double GetWidthAtEquator(double widthAtLatitude, double latDegree) { - return widthAtLatitude * GetLatCos(latDegree); - } - - inline double GetWidthAtEquator(double widthAtLatitude, const TGeoPoint& p) { - return GetWidthAtEquator(widthAtLatitude, p.Lat()); - } - - /* - * Same for size - */ - - inline TSize GetSizeAtLatitude(const TSize& sizeAtEquator, const TGeoPoint& at) { - return TSize(GetWidthAtLatitude(sizeAtEquator.GetWidth(), at), sizeAtEquator.GetHeight()); - } - - inline TSize GetSizeAtEquator(const TSize& sizeAtLatitude, const TGeoPoint& at) { - return TSize(GetWidthAtEquator(sizeAtLatitude.GetWidth(), at), sizeAtLatitude.GetHeight()); - } - - inline TGeoWindow ConstructWindowFromEquatorSize(const TGeoPoint& center, const TSize& sizeAtEquator) { - return TGeoWindow(center, GetSizeAtLatitude(sizeAtEquator, center)); - } - - inline double SquaredDiagonal(const NGeo::TSize& size, double latitude) { - return Sqr(NGeo::GetWidthAtEquator(size.GetWidth(), latitude)) + Sqr(size.GetHeight()); - } - - inline double Diagonal(const NGeo::TSize& size, double latitude) { - return sqrt(SquaredDiagonal(size, latitude)); - } - - /** - * try to parse two coords from string - * return pair of coords on success, otherwise throw exception - */ - std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter = TStringBuf(",")); - - /** - * try to parse two coords from string - * write result to first param and return true on success, otherwise return false - */ - bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter = TStringBuf(",")); -} // namespace NGeo diff --git a/library/cpp/geo/window.cpp b/library/cpp/geo/window.cpp deleted file mode 100644 index 2ad2b61b71..0000000000 --- a/library/cpp/geo/window.cpp +++ /dev/null @@ -1,297 +0,0 @@ -#include "window.h" - -#include "util.h" - -#include <util/generic/ylimits.h> -#include <util/generic/ymath.h> -#include <util/generic/maybe.h> - -#include <cstdlib> -#include <utility> - -namespace NGeo { - namespace { - TMercatorPoint GetMiddlePoint(const TMercatorPoint& p1, const TMercatorPoint& p2) { - return TMercatorPoint{(p1.X() + p2.X()) / 2, (p1.Y() + p2.Y()) / 2}; - } - - struct TLatBounds { - double LatMin; - double LatMax; - }; - } // namespace - - bool TrySpan2LatitudeDegenerateCases(double ll, double lspan, TLatBounds& result) { - // TODO(sobols@): Compare with eps? - if (Y_UNLIKELY(lspan >= 180.)) { - result.LatMin = -90.; - result.LatMax = +90.; - return true; - } - if (Y_UNLIKELY(ll == +90.)) { - result.LatMin = ll - lspan; - result.LatMax = ll; - return true; - } - if (Y_UNLIKELY(ll == -90.)) { - result.LatMin = ll; - result.LatMax = ll + lspan; - return true; - } - return false; - } - - /** - * Finds such latitudes lmin, lmax that: - * 1) lmin <= ll <= lmax, - * 2) lmax - lmin == lspan, - * 3) MercatorY(ll) - MercatorY(lmin) == MercatorY(lmax) - MercatorY(ll) - * (the ll parallel is a center between lmin and lmax parallels in Mercator projection) - * - * \returns a pair (lmin, lmax) - */ - TLatBounds Span2Latitude(double ll, double lspan) { - TLatBounds result{}; - if (TrySpan2LatitudeDegenerateCases(ll, lspan, result)) { - return result; - } - - const double lc = Deg2rad(ll); - const double h = Deg2rad(lspan); - - // Spherical (Pseudo) Mercator: - // MercatorY(lc) = R * ln(tan(lc / 2 + PI / 4)). - // Note that - // ln(a) - ln(b) = ln(a / b) - // That'a why - // MercatorY(lc) - MercatorY(lmin) == MercatorY(lmin + h) - MercatorY(lc) <=> - // <=> tan(lc / 2 + PI / 4) / tan(lmin / 2 + PI / 4) == - // == tan(lmin / 2 + h / 2 + PI / 4) / tan(lc / 2 + PI / 4). - // Also note that - // tan(x + y) == (tan(x) + tan(y)) / (1 - tan(x) * tan(y)), - // so - // tan(lmin / 2 + h / 2 + PI / 4) == - // == (tan(lmin / 2 + PI / 4) + tan(h / 2)) / (1 - tan(lmin / 2 + PI / 4) * tan(h / 2)) - - const double yx = tan(lc / 2 + PI / 4); - - // Let x be tan(lmin / 2 + PI / 4), - // then - // yx / x == (x + tan(h / 2)) / ((1 - x * tan(h / 2)) * yx), - // or - // yx^2 * (1 - x * tan(h / 2)) == (x + tan(h / 2)) * x. - // Now we solve a quadratic equation: - // x^2 + bx + c == 0 - - const double C = yx * yx; - - const double b = (C + 1) * tan(h / 2), c = -C; - const double D = b * b - 4 * c; - const double root = (-b + sqrt(D)) / 2; - - result.LatMin = Rad2deg((atan(root) - PI / 4) * 2); - result.LatMax = result.LatMin + lspan; - return result; - } - - void TGeoWindow::CalcCorners() { - if (!IsValid()) { - return; - } - const TLatBounds latBounds = Span2Latitude(Center_.Lat(), Size_.GetHeight()); - - if (-90. < latBounds.LatMin && latBounds.LatMax < +90.) { - TMercatorPoint lowerLeftCornerM = LLToMercator(TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin)); - TMercatorPoint upperRightCornerM = LLToMercator(TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax)); - TMercatorPoint centerM = LLToMercator(Center_); - - double w = upperRightCornerM.X() - lowerLeftCornerM.X(); - double h = upperRightCornerM.Y() - lowerLeftCornerM.Y(); - - LowerLeftCorner_ = MercatorToLL(TMercatorPoint(centerM.X() - w / 2, centerM.Y() - h / 2)); - UpperRightCorner_ = MercatorToLL(TMercatorPoint(centerM.X() + w / 2, centerM.Y() + h / 2)); - } else { - LowerLeftCorner_ = TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin); - UpperRightCorner_ = TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax); - } - } - - void TGeoWindow::CalcCenterAndSpan() { - if (!LowerLeftCorner_ || !UpperRightCorner_) { - return; - } - - TMercatorPoint lower = LLToMercator(LowerLeftCorner_); - TMercatorPoint upper = LLToMercator(UpperRightCorner_); - TMercatorPoint center = GetMiddlePoint(lower, upper); - Center_ = MercatorToLL(center); - - Size_ = TSize(UpperRightCorner_.Lon() - LowerLeftCorner_.Lon(), - UpperRightCorner_.Lat() - LowerLeftCorner_.Lat()); - } - - bool TGeoWindow::Contains(const TGeoPoint& p) const { - return LowerLeftCorner_.Lon() <= p.Lon() && p.Lon() <= UpperRightCorner_.Lon() && - LowerLeftCorner_.Lat() <= p.Lat() && p.Lat() <= UpperRightCorner_.Lat(); - } - - double TGeoWindow::Diameter() const { - return Diagonal(Size_, Center_.Lat()); - } - - double TGeoWindow::Distance(const TGeoWindow& w) const { - const double minX = Max(GetLowerLeftCorner().Lon(), w.GetLowerLeftCorner().Lon()); - const double maxX = Min(GetUpperRightCorner().Lon(), w.GetUpperRightCorner().Lon()); - const double minY = Max(GetLowerLeftCorner().Lat(), w.GetLowerLeftCorner().Lat()); - const double maxY = Min(GetUpperRightCorner().Lat(), w.GetUpperRightCorner().Lat()); - double xGap = minX > maxX ? (minX - maxX) : 0.; - double yGap = minY > maxY ? (minY - maxY) : 0.; - return sqrtf(Sqr(xGap * cos((minY + maxY) * 0.5 * PI / 180)) + Sqr(yGap)); - } - - double TWindowLL::GetApproxDistance(const TPointLL& point) const { - const double metresInDegree = WGS84::R * PI / 180; - return Distance(TWindowLL{point, point}) * metresInDegree; - } - - TGeoWindow TGeoWindow::ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) { - auto leftCorner = TGeoPoint::Parse(leftCornerStr, delimiter); - auto rightCorner = TGeoPoint::Parse(rightCornerStr, delimiter); - - return {leftCorner, rightCorner}; - } - - TMaybe<TGeoWindow> TGeoWindow::TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) { - auto leftCorner = TGeoPoint::TryParse(leftCornerStr, delimiter); - auto rightCorner = TGeoPoint::TryParse(rightCornerStr, delimiter); - if (!leftCorner || !rightCorner) { - return {}; - } - - return TGeoWindow{*leftCorner, *rightCorner}; - } - - TGeoWindow TGeoWindow::ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) { - TGeoPoint ll = TGeoPoint::Parse(llStr, delimiter); - TSize spn = TSize::Parse(spnStr, delimiter); - - return {ll, spn}; - } - - TMaybe<TGeoWindow> TGeoWindow::TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) { - auto ll = TGeoPoint::TryParse(llStr, delimiter); - auto spn = TSize::TryParse(spnStr, delimiter); - - if (!ll || !spn) { - return {}; - } - - return TGeoWindow{*ll, *spn}; - } - /** - * TMercatorWindow - */ - - TMercatorWindow::TMercatorWindow() noexcept - : HalfWidth_{std::numeric_limits<double>::quiet_NaN()} - , HalfHeight_{std::numeric_limits<double>::quiet_NaN()} - { - } - - TMercatorWindow::TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept - : Center_{center} - , HalfWidth_{size.GetWidth() / 2} - , HalfHeight_{size.GetHeight() / 2} - { - } - - TMercatorWindow::TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept - : Center_{GetMiddlePoint(firstPoint, secondPoint)} - , HalfWidth_{Abs(secondPoint.X() - firstPoint.X()) / 2} - , HalfHeight_{Abs(secondPoint.Y() - firstPoint.Y()) / 2} - { - } - - bool TMercatorWindow::Contains(const TMercatorPoint& pt) const noexcept { - return (Center_.X() - HalfWidth_ <= pt.X()) && - (pt.X() <= Center_.X() + HalfWidth_) && - (Center_.Y() - HalfHeight_ <= pt.Y()) && - (pt.Y() <= Center_.Y() + HalfHeight_); - } - - /** - * Conversion - */ - - TMercatorWindow LLToMercator(const TGeoWindow& window) { - return TMercatorWindow{LLToMercator(window.GetLowerLeftCorner()), LLToMercator(window.GetUpperRightCorner())}; - } - - TGeoWindow MercatorToLL(const TMercatorWindow& window) { - return TGeoWindow{MercatorToLL(window.GetLowerLeftCorner()), MercatorToLL(window.GetUpperRightCorner())}; - } - - /** - * Operators - */ - - TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs) { - const double minX = Max(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon()); - const double maxX = Min(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon()); - const double minY = Max(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat()); - const double maxY = Min(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat()); - if (minX > maxX || minY > maxY) { - return {}; - } - return TGeoWindow(TGeoPoint(minX, minY), TGeoPoint(maxX, maxY)); - } - - TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { - if (!lhs || !rhs) { - return {}; - } - return Intersection(*lhs, *rhs); - } - - TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs) { - const double minX = Min(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon()); - const double maxX = Max(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon()); - const double minY = Min(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat()); - const double maxY = Max(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat()); - return TGeoWindow{TGeoPoint{minX, minY}, TGeoPoint{maxX, maxY}}; - } - - TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { - if (!lhs) { - return rhs; - } - if (!rhs) { - return lhs; - } - return Union(*lhs, *rhs); - } - - bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point) { - if (!window) { - return false; - } - return window.GetRef().Contains(point); - } - - bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs) { - bool haveHorizIntersection = - !(lhs.GetUpperRightCorner().Lon() <= rhs.GetLowerLeftCorner().Lon() || - rhs.GetUpperRightCorner().Lon() <= lhs.GetLowerLeftCorner().Lon()); - bool haveVertIntersection = - !(lhs.GetUpperRightCorner().Lat() <= rhs.GetLowerLeftCorner().Lat() || - rhs.GetUpperRightCorner().Lat() <= lhs.GetLowerLeftCorner().Lat()); - return haveHorizIntersection && haveVertIntersection; - } - - bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { - if (!lhs || !rhs) { - return false; - } - return Intersects(*lhs, *rhs); - } -} // namespace NGeo diff --git a/library/cpp/geo/window.h b/library/cpp/geo/window.h deleted file mode 100644 index 1205d8351b..0000000000 --- a/library/cpp/geo/window.h +++ /dev/null @@ -1,264 +0,0 @@ -#pragma once - -#include "point.h" -#include "size.h" -#include <util/generic/string.h> -#include <util/generic/yexception.h> -#include <util/string/cast.h> -#include <util/generic/maybe.h> - -#include <algorithm> - -namespace NGeo { - class TGeoWindow { - public: - TGeoWindow() noexcept - - = default; - - TGeoWindow(const TGeoPoint& center, const TSize& size) noexcept - : Center_(center) - , Size_(size) - { - CalcCorners(); - } - - TGeoWindow(const TGeoPoint& firstPoint, const TGeoPoint& secondPoint) noexcept - : LowerLeftCorner_{std::min(firstPoint.Lon(), secondPoint.Lon()), - std::min(firstPoint.Lat(), secondPoint.Lat())} - , UpperRightCorner_{std::max(firstPoint.Lon(), secondPoint.Lon()), - std::max(firstPoint.Lat(), secondPoint.Lat())} - { - CalcCenterAndSpan(); - } - - const TGeoPoint& GetCenter() const noexcept { - return Center_; - } - - void SetCenter(const TGeoPoint& newCenter) { - Center_ = newCenter; - CalcCorners(); - } - - const TSize& GetSize() const noexcept { - return Size_; - } - - void SetSize(const TSize& newSize) { - Size_ = newSize; - CalcCorners(); - } - - const TGeoPoint& GetLowerLeftCorner() const noexcept { - return LowerLeftCorner_; - } - - const TGeoPoint& GetUpperRightCorner() const noexcept { - return UpperRightCorner_; - } - - void swap(TGeoWindow& o) noexcept { - Center_.swap(o.Center_); - Size_.swap(o.Size_); - LowerLeftCorner_.swap(o.LowerLeftCorner_); - UpperRightCorner_.swap(o.UpperRightCorner_); - } - - bool IsValid() const noexcept { - return Center_.IsValid() && Size_.IsValid(); - } - - bool Contains(const TGeoPoint&) const; - - bool Contains(const TGeoWindow& w) const { - return Contains(w.LowerLeftCorner_) && Contains(w.UpperRightCorner_); - } - - void Stretch(double multiplier) { - Size_.Stretch(multiplier); - CalcCorners(); - } - - void Inflate(double additionX, double additionY) { - Size_.Inflate(additionX * 2, additionY * 2); - CalcCorners(); - } - - void Inflate(double addition) { - Inflate(addition, addition); - } - - bool operator!() const { - return !IsValid(); - } - - double Diameter() const; - - double Area() const { - return Size_.GetHeight() * Size_.GetWidth(); - } - - double Distance(const TGeoWindow&) const; - - double GetApproxDistance(const TPointLL& point) const; - - /** - * try to parse TGeoWindow from center and span - * return parsed TGeoWindow on success, otherwise throw exception - */ - static TGeoWindow ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(",")); - - /** - * try to parse TGeoWindow from two corners - * return parsed TGeoWindow on success, otherwise throw exception - */ - static TGeoWindow ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(",")); - - /** - * try to parse TGeoWindow from center and span - * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe - */ - static TMaybe<TGeoWindow> TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(",")); - - /** - * try to parse TGeoWindow from two corners - * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe - */ - static TMaybe<TGeoWindow> TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(",")); - - private: - TGeoPoint Center_; - TSize Size_; - TGeoPoint LowerLeftCorner_; - TGeoPoint UpperRightCorner_; - - void CalcCorners(); - void CalcCenterAndSpan(); - }; - - inline bool operator==(const TGeoWindow& lhs, const TGeoWindow& rhs) { - return lhs.GetCenter() == rhs.GetCenter() && lhs.GetSize() == rhs.GetSize(); - } - - inline bool operator!=(const TGeoWindow& p1, const TGeoWindow& p2) { - return !(p1 == p2); - } - - /** - * \class TMercatorWindow - * - * Represents a window in EPSG:3395 projection - * (WGS 84 / World Mercator) - */ - class TMercatorWindow { - public: - TMercatorWindow() noexcept; - TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept; - TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept; - - const TMercatorPoint& GetCenter() const noexcept { - return Center_; - } - - TSize GetHalfSize() const noexcept { - return {HalfWidth_, HalfHeight_}; - } - - TSize GetSize() const noexcept { - return {GetWidth(), GetHeight()}; - } - - double GetWidth() const noexcept { - return HalfWidth_ * 2; - } - - double GetHeight() const noexcept { - return HalfHeight_ * 2; - } - - TMercatorPoint GetLowerLeftCorner() const noexcept { - return TMercatorPoint{Center_.X() - HalfWidth_, Center_.Y() - HalfHeight_}; - } - - TMercatorPoint GetUpperRightCorner() const noexcept { - return TMercatorPoint{Center_.X() + HalfWidth_, Center_.Y() + HalfHeight_}; - } - - bool Contains(const TMercatorPoint& pt) const noexcept; - - bool Contains(const TMercatorWindow& w) const { - return Contains(w.GetLowerLeftCorner()) && Contains(w.GetUpperRightCorner()); - } - - void Stretch(double multiplier) { - HalfWidth_ *= multiplier; - HalfHeight_ *= multiplier; - } - - void Inflate(double additionX, double additionY) { - HalfWidth_ += additionX; - HalfHeight_ += additionY; - } - - void Inflate(double addition) { - Inflate(addition, addition); - } - - double Area() const { - return GetHeight() * GetWidth(); - } - - private: - bool IsDefined() const { - return Center_.IsDefined() && !std::isnan(HalfWidth_) && !std::isnan(HalfHeight_); - } - - private: - TMercatorPoint Center_; - double HalfWidth_; - double HalfHeight_; - }; - - inline bool operator==(const TMercatorWindow& lhs, const TMercatorWindow& rhs) { - return lhs.GetCenter() == rhs.GetCenter() && lhs.GetHalfSize() == rhs.GetHalfSize(); - } - - inline bool operator!=(const TMercatorWindow& p1, const TMercatorWindow& p2) { - return !(p1 == p2); - } - - /** - * Typedefs - * TODO(sobols@): remove - */ - - using TWindowLL = TGeoWindow; - - /** - * Conversion - */ - - TMercatorWindow LLToMercator(const TGeoWindow&); - TGeoWindow MercatorToLL(const TMercatorWindow&); - - /** - * Utility functions - */ - - bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point); - - TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); - TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs); - - TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); - TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs); - - bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs); - bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); -} // namespace NGeo - -template <> -inline void Out<NGeo::TGeoWindow>(IOutputStream& o, const NGeo::TGeoWindow& obj) { - o << '{' << obj.GetCenter() << ", " << obj.GetSize() << ", " << obj.GetLowerLeftCorner() << ", " << obj.GetUpperRightCorner() << "}"; -} diff --git a/library/cpp/geo/ya.make b/library/cpp/geo/ya.make deleted file mode 100644 index 1d36003c5c..0000000000 --- a/library/cpp/geo/ya.make +++ /dev/null @@ -1,19 +0,0 @@ -LIBRARY() - -SRCS( - bbox.cpp - geo.cpp - point.cpp - polygon.cpp - load_save_helper.cpp - size.cpp - util.cpp - window.cpp -) - -END() - -RECURSE_FOR_TESTS( - ut - style - ) diff --git a/library/cpp/geobase/CMakeLists.darwin-x86_64.txt b/library/cpp/geobase/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index b316e54e8a..0000000000 --- a/library/cpp/geobase/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,30 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geobase) -target_link_libraries(library-cpp-geobase PUBLIC - contrib-libs-cxxsupp - yutil - geobase-library - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geobase PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp -) -generate_enum_serilization(library-cpp-geobase - ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp - INCLUDE_HEADERS - geobase/include/structs.hpp -) diff --git a/library/cpp/geobase/CMakeLists.linux-aarch64.txt b/library/cpp/geobase/CMakeLists.linux-aarch64.txt deleted file mode 100644 index ab3962970d..0000000000 --- a/library/cpp/geobase/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,31 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geobase) -target_link_libraries(library-cpp-geobase PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - geobase-library - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geobase PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp -) -generate_enum_serilization(library-cpp-geobase - ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp - INCLUDE_HEADERS - geobase/include/structs.hpp -) diff --git a/library/cpp/geobase/CMakeLists.linux-x86_64.txt b/library/cpp/geobase/CMakeLists.linux-x86_64.txt deleted file mode 100644 index ab3962970d..0000000000 --- a/library/cpp/geobase/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,31 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geobase) -target_link_libraries(library-cpp-geobase PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - geobase-library - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geobase PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp -) -generate_enum_serilization(library-cpp-geobase - ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp - INCLUDE_HEADERS - geobase/include/structs.hpp -) diff --git a/library/cpp/geobase/CMakeLists.txt b/library/cpp/geobase/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/geobase/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/geobase/CMakeLists.windows-x86_64.txt b/library/cpp/geobase/CMakeLists.windows-x86_64.txt deleted file mode 100644 index b316e54e8a..0000000000 --- a/library/cpp/geobase/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,30 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geobase) -target_link_libraries(library-cpp-geobase PUBLIC - contrib-libs-cxxsupp - yutil - geobase-library - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geobase PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp -) -generate_enum_serilization(library-cpp-geobase - ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp - INCLUDE_HEADERS - geobase/include/structs.hpp -) diff --git a/library/cpp/geobase/geobase.cpp b/library/cpp/geobase/geobase.cpp deleted file mode 100644 index 24086c67a9..0000000000 --- a/library/cpp/geobase/geobase.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include <library/cpp/geobase/lookup.hpp> -#include <library/cpp/geobase/timezone_getter.hpp> -#include <library/cpp/geobase/service_getter.hpp> diff --git a/library/cpp/geobase/lookup.hpp b/library/cpp/geobase/lookup.hpp deleted file mode 100644 index f663750ab2..0000000000 --- a/library/cpp/geobase/lookup.hpp +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include <geobase/include/lookup.hpp> -#include <geobase/include/lookup_wrapper.hpp> -#include <geobase/include/structs.hpp> - -namespace NGeobase { - using TInitTraits = NImpl::TLookup::TInitTraits; - - class TLookup: public NImpl::TLookup { - public: - using parent = NImpl::TLookup; - - explicit TLookup(const std::string& datafile, const TInitTraits traits = {}) - : parent(datafile, traits) - { - } - explicit TLookup(const TInitTraits traits) - : parent(traits) - { - } - explicit TLookup(const void* pData, size_t len) - : parent(pData, len) - { - } - - ~TLookup() { - } - }; - - using TRegion = NImpl::TRegion; - using TGeolocation = NImpl::TGeolocation; - using TLinguistics = NImpl::TLinguistics; - using TGeoPoint = NImpl::TGeoPoint; - - using TLookupWrapper = NImpl::TLookupWrapper; - - using TId = NImpl::Id; - using TIdsList = NImpl::IdsList; - using TRegionsList = NImpl::TRegionsList; - - using TIpBasicTraits = NImpl::TIpBasicTraits; - using TIpTraits = NImpl::TIpTraits; -} diff --git a/library/cpp/geobase/service_getter.hpp b/library/cpp/geobase/service_getter.hpp deleted file mode 100644 index e088081706..0000000000 --- a/library/cpp/geobase/service_getter.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include <geobase/include/service_getter.hpp> - -namespace NGeobase { - using TServiceGetter = NImpl::TServiceGetter; -} diff --git a/library/cpp/geobase/timezone_getter.hpp b/library/cpp/geobase/timezone_getter.hpp deleted file mode 100644 index 5749f1e3d6..0000000000 --- a/library/cpp/geobase/timezone_getter.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include <geobase/include/timezone_getter.hpp> -#include <geobase/include/structs.hpp> - -namespace NGeobase { - using TTimezone = NImpl::TTimezone; - using TTimezoneGetter = NImpl::TTimezoneGetter; -} diff --git a/library/cpp/geobase/ya.make b/library/cpp/geobase/ya.make deleted file mode 100644 index 4a73974903..0000000000 --- a/library/cpp/geobase/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -LIBRARY() - -SRCS( - library/cpp/geobase/geobase.cpp -) - -PEERDIR( - geobase/library -) - -GENERATE_ENUM_SERIALIZATION(geobase/include/structs.hpp) - -END() diff --git a/library/cpp/geohash/CMakeLists.darwin-x86_64.txt b/library/cpp/geohash/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index dfcb278a1f..0000000000 --- a/library/cpp/geohash/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,32 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geohash) -target_link_libraries(library-cpp-geohash PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-geo - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geohash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp -) -generate_enum_serilization(library-cpp-geohash - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h - GEN_HEADER - ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h - INCLUDE_HEADERS - library/cpp/geohash/direction.h -) diff --git a/library/cpp/geohash/CMakeLists.linux-aarch64.txt b/library/cpp/geohash/CMakeLists.linux-aarch64.txt deleted file mode 100644 index a907311df0..0000000000 --- a/library/cpp/geohash/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,33 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geohash) -target_link_libraries(library-cpp-geohash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-geo - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geohash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp -) -generate_enum_serilization(library-cpp-geohash - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h - GEN_HEADER - ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h - INCLUDE_HEADERS - library/cpp/geohash/direction.h -) diff --git a/library/cpp/geohash/CMakeLists.linux-x86_64.txt b/library/cpp/geohash/CMakeLists.linux-x86_64.txt deleted file mode 100644 index a907311df0..0000000000 --- a/library/cpp/geohash/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,33 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geohash) -target_link_libraries(library-cpp-geohash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-geo - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geohash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp -) -generate_enum_serilization(library-cpp-geohash - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h - GEN_HEADER - ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h - INCLUDE_HEADERS - library/cpp/geohash/direction.h -) diff --git a/library/cpp/geohash/CMakeLists.txt b/library/cpp/geohash/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/geohash/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/geohash/CMakeLists.windows-x86_64.txt b/library/cpp/geohash/CMakeLists.windows-x86_64.txt deleted file mode 100644 index dfcb278a1f..0000000000 --- a/library/cpp/geohash/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,32 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-geohash) -target_link_libraries(library-cpp-geohash PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-geo - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-geohash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp -) -generate_enum_serilization(library-cpp-geohash - ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h - GEN_HEADER - ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h - INCLUDE_HEADERS - library/cpp/geohash/direction.h -) diff --git a/library/cpp/geohash/direction.h b/library/cpp/geohash/direction.h deleted file mode 100644 index 88a3e6061d..0000000000 --- a/library/cpp/geohash/direction.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -namespace NGeoHash { - enum EDirection { - NORTH = 0, - NORTH_EAST, - EAST, - SOUTH_EAST, - SOUTH, - SOUTH_WEST, - WEST, - NORTH_WEST, - }; -} diff --git a/library/cpp/geohash/geohash.cpp b/library/cpp/geohash/geohash.cpp deleted file mode 100644 index 6c6d65acab..0000000000 --- a/library/cpp/geohash/geohash.cpp +++ /dev/null @@ -1,413 +0,0 @@ -#include "geohash.h" - -#include <util/generic/xrange.h> - -namespace { - using TNeighbourDescriptors = NGeoHash::TNeighbours<TMaybe<NGeoHash::TGeoHashDescriptor>>; - const auto directions = GetEnumAllValues<NGeoHash::EDirection>(); - - const auto doubleEps = std::numeric_limits<double>::epsilon(); - - const NGeoHash::TBoundingBoxLL& GetGlobalBBox() { - static const NGeoHash::TBoundingBoxLL globalLimits({-180, -90}, {180, 90}); - return globalLimits; - } - - const TStringBuf base32EncodeTable = "0123456789bcdefghjkmnpqrstuvwxyz"; - - const ui64 base32DecodeMask = 0x1F; - constexpr int base32DecodeTableSize = 128; - - using TBase32DecodeTable = std::array<TMaybe<i8>, base32DecodeTableSize>; - - TBase32DecodeTable MakeBase32DecodeTable() { - TBase32DecodeTable result; - result.fill(Nothing()); - for (auto i : xrange(base32EncodeTable.size())) { - result[base32EncodeTable[i]] = i; - } - return result; - } - - const TBase32DecodeTable base32DecodeTable = MakeBase32DecodeTable(); -} - -namespace NGeoHash { - static const ui8 maxSteps = 62; - static const ui8 maxPrecision = TGeoHashDescriptor::StepsToPrecision(maxSteps); // 12 - - static const TNeighbours<std::pair<i8, i8>> neighborBitMoves = { - {1, 0}, // NORTH - {1, 1}, - {0, 1}, - {-1, 1}, - {-1, 0}, - {-1, -1}, - {0, -1}, - {1, -1}, - }; - - ui8 TGeoHashDescriptor::StepsToPrecision(ui8 steps) { - return steps / StepsPerPrecisionUnit; - } - - ui8 TGeoHashDescriptor::PrecisionToSteps(ui8 precision) { - return precision * StepsPerPrecisionUnit; - } - - /* Steps interleave starting from lon so for 5 steps 3 are lon-steps and 2 are lat-steps. - * Thus there are ceil(step/2) lon-steps and floor(step/2) lat-steps */ - std::pair<ui8, ui8> TGeoHashDescriptor::LatLonSteps() const { - return std::make_pair<ui8, ui8>(Steps / 2, (Steps + 1) / 2); - } - - struct TMagicNumber { - ui64 Mask; - ui8 Shift; - }; - - /* Interleave lower bits of x and y, so the bits of x - * are in the even positions and bits from y in the odd. - * e.g. Interleave64(0b101, 0b110) => 0b111001 - * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN - */ - ui64 TGeoHashDescriptor::Interleave64(ui32 x, ui32 y) { - // attention: magic numbers - constexpr TMagicNumber mortonMagicNumbers[] = { - {0x0000FFFF0000FFFF, 16}, - {0x00FF00FF00FF00FF, 8}, - {0x0F0F0F0F0F0F0F0F, 4}, - {0x3333333333333333, 2}, - {0x5555555555555555, 1}}; - - ui64 x64 = x; - ui64 y64 = y; - - for (const auto& magicNumber : mortonMagicNumbers) { - x64 = (x64 | (x64 << magicNumber.Shift)) & magicNumber.Mask; - y64 = (y64 | (y64 << magicNumber.Shift)) & magicNumber.Mask; - } - return x64 | (y64 << 1); - } - - /* Reverse the interleave process - * Deinterleave64(0b111001) => 0b101110 - * derived from http://stackoverflow.com/questions/4909263 */ - std::pair<ui32, ui32> TGeoHashDescriptor::Deinterleave64(ui64 z) { - constexpr TMagicNumber demortonMagicNumbers[] = { - {0x5555555555555555ULL, 0}, - {0x3333333333333333ULL, 1}, - {0x0F0F0F0F0F0F0F0FULL, 2}, - {0x00FF00FF00FF00FFULL, 4}, - {0x0000FFFF0000FFFFULL, 8}, - {0x00000000FFFFFFFFULL, 16}}; - - ui64 x = z; - ui64 y = z >> 1; - - for (const auto& magicNumber : demortonMagicNumbers) { - x = (x | (x >> magicNumber.Shift)) & magicNumber.Mask; - y = (y | (y >> magicNumber.Shift)) & magicNumber.Mask; - } - - return std::make_pair(x, y); - } - - std::pair<ui32, ui32> TGeoHashDescriptor::LatLonBits() const { - auto deinterleaved = Deinterleave64(Bits); - - if (Steps % 2) { - DoSwap(deinterleaved.first, deinterleaved.second); - } - return deinterleaved; - } - - void TGeoHashDescriptor::SetLatLonBits(ui32 latBits, ui32 lonBits) { - if (Steps % 2) { - Bits = Interleave64(lonBits, latBits); - } else { - Bits = Interleave64(latBits, lonBits); - } - } - - void TGeoHashDescriptor::InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) { - Steps = steps; - if (Steps > maxSteps) { - ythrow yexception() << "Invalid steps: available values: 0.." << ::ToString(maxSteps); - } - - if (limits.Width() < doubleEps || limits.Height() < doubleEps) { - ythrow yexception() << "Invalid limits: min/max for one of coordinates are equal"; - } - - if (latitude < limits.GetMinY() || latitude > limits.GetMaxY() || longitude < limits.GetMinX() || longitude > limits.GetMaxX()) { - ythrow yexception() << "Invalid point (" << latitude << ", " << longitude << "): outside of limits"; - } - - double lat01 = (latitude - limits.GetMinY()) / limits.Height(); - double lon01 = (longitude - limits.GetMinX()) / limits.Width(); - - auto llSteps = LatLonSteps(); - - /* convert to fixed point based on the step size */ - lat01 *= (1 << llSteps.first); - lon01 *= (1 << llSteps.second); - - /* If lon_steps > lat_step, last bit is lon-bit, otherwise last bit is lat-bit*/ - SetLatLonBits(lat01, lon01); - } - - TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) { - InitFromLatLon(latitude, longitude, limits, steps); - } - - TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, ui8 steps) { - InitFromLatLon(latitude, longitude, GetGlobalBBox(), steps); - } - - TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps) { - InitFromLatLon(point.Lat(), point.Lon(), limits, steps); - } - - TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps) { - InitFromLatLon(point.Lat(), point.Lon(), GetGlobalBBox(), steps); - } - - TGeoHashDescriptor::TGeoHashDescriptor(const TString& hashString) { - if (hashString.size() > maxPrecision) { - ythrow yexception() << "hashString is too long: max length is " << ::ToString(maxPrecision); - } - - Bits = 0; - for (auto c : hashString) { - Bits <<= StepsPerPrecisionUnit; - Y_ENSURE(c >= 0); - const auto decodedChar = base32DecodeTable[c]; - Y_ENSURE(decodedChar.Defined()); - Bits |= decodedChar.GetRef(); - } - - Steps = PrecisionToSteps(hashString.size()); - } - - ui64 TGeoHashDescriptor::GetBits() const { - return Bits; - } - - ui8 TGeoHashDescriptor::GetSteps() const { - return Steps; - } - - TString TGeoHashDescriptor::ToString() const { - auto precision = StepsToPrecision(Steps); - - TStringStream stream; - - auto bits = Bits; - auto activeSteps = PrecisionToSteps(precision); - - bits >>= (Steps - activeSteps); - for (auto i : xrange(precision)) { - auto ix = (bits >> (StepsPerPrecisionUnit * ((precision - i - 1)))) & base32DecodeMask; - stream << base32EncodeTable[ix]; - } - - return stream.Str(); - } - - TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox(const TBoundingBoxLL& limits) const { - auto llBits = LatLonBits(); - auto llSteps = LatLonSteps(); - - double latMultiplier = limits.Height() / (1ull << llSteps.first); - double lonMultiplier = limits.Width() / (1ull << llSteps.second); - - return { - { - limits.GetMinX() + lonMultiplier * llBits.second, - limits.GetMinY() + latMultiplier * llBits.first, - }, - { - limits.GetMinX() + lonMultiplier * (llBits.second + 1), - limits.GetMinY() + latMultiplier * (llBits.first + 1), - }}; - } - - TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox() const { - return ToBoundingBox(GetGlobalBBox()); - } - - NGeo::TPointLL TGeoHashDescriptor::ToPoint(const TBoundingBoxLL& limits) const { - auto boundingBox = ToBoundingBox(limits); - return { - boundingBox.GetMinX() + boundingBox.Width() / 2, - boundingBox.GetMinY() + boundingBox.Height() / 2}; - } - - NGeo::TPointLL TGeoHashDescriptor::ToPoint() const { - return ToPoint(GetGlobalBBox()); - } - - TMaybe<TGeoHashDescriptor> TGeoHashDescriptor::GetNeighbour(EDirection direction) const { - TGeoHashDescriptor result(0, Steps); - auto llBits = LatLonBits(); - auto llSteps = LatLonSteps(); - std::pair<i8, i8> bitMove = neighborBitMoves[direction]; - - auto newLatBits = llBits.first + bitMove.first; - auto newLonBits = llBits.second + bitMove.second; - - // Overflow in lat means polar, so return Nothing - if (newLatBits >> llSteps.first != 0) { - return Nothing(); - } - - // Overflow in lon means 180-meridian, so just remove overflowed bits - newLonBits &= ((1 << llSteps.second) - 1); - result.SetLatLonBits(newLatBits, newLonBits); - return result; - } - - TNeighbourDescriptors TGeoHashDescriptor::GetNeighbours() const { - TNeighbourDescriptors result; - auto llBits = LatLonBits(); - auto llSteps = LatLonSteps(); - std::pair<i8, i8> bitMove; - - for (auto direction : directions) { - bitMove = neighborBitMoves[direction]; - - auto newLatBits = llBits.first + bitMove.first; - auto newLonBits = llBits.second + bitMove.second; - - // Overflow in lat means polar, so put Nothing - if (newLatBits >> llSteps.first != 0) { - result[direction] = Nothing(); - } else { - result[direction] = TGeoHashDescriptor(0, Steps); - // Overflow in lon means 180-meridian, so just remove overflowed bits - newLonBits &= ((1 << llSteps.second) - 1); - result[direction]->SetLatLonBits(newLatBits, newLonBits); - } - } - - return result; - } - - TVector<TGeoHashDescriptor> TGeoHashDescriptor::GetChildren(ui8 steps = StepsPerPrecisionUnit) const { - TVector<TGeoHashDescriptor> children(Reserve(1 << steps)); - ui8 childrenSteps = steps + Steps; - auto parentBits = Bits << steps; - if (childrenSteps > maxSteps) { - ythrow yexception() << "Resulting geohash steps are too big, available values: 0.." << ::ToString(maxSteps); - } - for (auto residue : xrange(1 << steps)) { - children.emplace_back(parentBits | residue, childrenSteps); - } - return children; - } - - /* Functions */ - - ui64 Encode(double latitude, double longitude, ui8 precision) { - auto descr = TGeoHashDescriptor( - latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision)); - return descr.GetBits(); - } - ui64 Encode(const NGeo::TPointLL& point, ui8 precision) { - return TGeoHashDescriptor( - point, TGeoHashDescriptor::PrecisionToSteps(precision)) - .GetBits(); - } - - TString EncodeToString(double latitude, double longitude, ui8 precision) { - return TGeoHashDescriptor( - latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision)) - .ToString(); - } - TString EncodeToString(const NGeo::TPointLL& point, ui8 precision) { - return TGeoHashDescriptor( - point, TGeoHashDescriptor::PrecisionToSteps(precision)) - .ToString(); - } - - NGeo::TPointLL DecodeToPoint(const TString& hashString) { - return TGeoHashDescriptor(hashString).ToPoint(); - } - NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision) { - return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToPoint(); - } - - TBoundingBoxLL DecodeToBoundingBox(const TString& hashString) { - return TGeoHashDescriptor(hashString).ToBoundingBox(); - } - - TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision) { - return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToBoundingBox(); - } - - TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision) { - auto neighbour = TGeoHashDescriptor( - hash, TGeoHashDescriptor::PrecisionToSteps(precision)) - .GetNeighbour(direction); - - if (neighbour.Defined()) { - return neighbour->GetBits(); - } else { - return Nothing(); - } - } - - TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction) { - auto neighbour = TGeoHashDescriptor(hashString).GetNeighbour(direction); - if (neighbour.Defined()) { - return neighbour->ToString(); - } else { - return Nothing(); - } - } - - TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision) { - TGeoHashBitsNeighbours result; - - auto neighbours = TGeoHashDescriptor( - hash, TGeoHashDescriptor::PrecisionToSteps(precision)) - .GetNeighbours(); - - for (auto direction : directions) { - if (neighbours[direction].Defined()) { - result[direction] = neighbours[direction]->GetBits(); - } else { - result[direction] = Nothing(); - } - } - - return result; - } - - TGeoHashStringNeighbours GetNeighbours(const TString& hashString) { - TGeoHashStringNeighbours result; - - auto neighbours = TGeoHashDescriptor( - hashString) - .GetNeighbours(); - - for (auto direction : directions) { - if (neighbours[direction].Defined()) { - result[direction] = neighbours[direction]->ToString(); - } else { - result[direction] = Nothing(); - } - } - return result; - } - - TVector<TString> GetChildren(const TString& hashString) { - TVector<TString> result(Reserve(base32EncodeTable.size())); - - for (auto ch : base32EncodeTable) { - result.push_back(hashString + ch); - } - return result; - } -} diff --git a/library/cpp/geohash/geohash.h b/library/cpp/geohash/geohash.h deleted file mode 100644 index 7d270612e8..0000000000 --- a/library/cpp/geohash/geohash.h +++ /dev/null @@ -1,123 +0,0 @@ -#pragma once - -/** - * @file - * @brief Strong (because it works) and independent (of contrib/libs/geohash) GeoHash implementation - * GeoHash algo: https://en.wikipedia.org/wiki/Geohash - * Useful links: - * 1. http://geohash.org - Main Site - * 2. https://dou.ua/lenta/articles/geohash - Geohash-based geopoints clusterization - * 3. http://www.movable-type.co.uk/scripts/geohash.html - bidirectional encoding and visualization - */ -#include <library/cpp/geohash/direction.h> -#include <library/cpp/geohash/direction.h_serialized.h> - -#include <library/cpp/geo/geo.h> - -#include <util/generic/maybe.h> -#include <util/generic/string.h> -#include <util/system/types.h> - -#include <array> - -namespace NGeoHash { - using TBoundingBoxLL = NGeo::TGeoBoundingBox; - static constexpr auto directionsCount = GetEnumItemsCount<EDirection>(); - - template <class T> - class TNeighbours: public std::array<T, directionsCount> { - public: - TNeighbours() = default; - - TNeighbours(std::initializer_list<T> list) { - Y_ASSERT(list.size() == directionsCount); - std::copy(list.begin(), list.end(), std::array<T, directionsCount>::begin()); - } - - const T& operator[](EDirection direction) const { - return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction)); - } - - T& operator[](EDirection direction) { - return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction)); - } - }; - - class TGeoHashDescriptor { - public: - TGeoHashDescriptor() noexcept - : Bits(0) - , Steps(0) - { - } - - TGeoHashDescriptor(ui64 bits, ui8 steps) noexcept - : Bits(bits) - , Steps(steps) - { - } - - TGeoHashDescriptor(double latitude, double longitude, ui8 steps); - TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps); - TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps); - TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps); - - explicit TGeoHashDescriptor(const TString& hashString); - - ui64 GetBits() const; - ui8 GetSteps() const; - - TString ToString() const; - - NGeo::TPointLL ToPoint(const TBoundingBoxLL& limits) const; - NGeo::TPointLL ToPoint() const; - - TBoundingBoxLL ToBoundingBox(const TBoundingBoxLL& limits) const; - TBoundingBoxLL ToBoundingBox() const; - - TMaybe<TGeoHashDescriptor> GetNeighbour(EDirection direction) const; - TNeighbours<TMaybe<TGeoHashDescriptor>> GetNeighbours() const; - - TVector<TGeoHashDescriptor> GetChildren(ui8 steps) const; - - static ui8 StepsToPrecision(ui8 steps); - static ui8 PrecisionToSteps(ui8 precision); - - private: - void InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps); - std::pair<ui8, ui8> LatLonSteps() const; - std::pair<ui32, ui32> LatLonBits() const; - void SetLatLonBits(ui32 latBits, ui32 lonBits); - static ui64 Interleave64(ui32 x, ui32 y); - static std::pair<ui32, ui32> Deinterleave64(ui64 interleaved); - - private: - static const ui8 StepsPerPrecisionUnit = 5; - ui64 Bits; - ui8 Steps; - }; - - ui64 Encode(double latitude, double longitude, ui8 precision); - ui64 Encode(const NGeo::TPointLL& point, ui8 precision); - - TString EncodeToString(double latitude, double longitude, ui8 precision); - TString EncodeToString(const NGeo::TPointLL& point, ui8 precision); - - NGeo::TPointLL DecodeToPoint(const TString& hashString); - NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision); - - TBoundingBoxLL DecodeToBoundingBox(const TString& hashString); - TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision); - - TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision); - TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction); - - using TGeoHashBitsNeighbours = TNeighbours<TMaybe<ui64>>; - using TGeoHashStringNeighbours = TNeighbours<TMaybe<TString>>; - - TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision); - TGeoHashStringNeighbours GetNeighbours(const TString& hashString); - - TVector<TString> GetChildren(const TString& hashString); - -} /* namespace NGeoHash */ diff --git a/library/cpp/geohash/ya.make b/library/cpp/geohash/ya.make deleted file mode 100644 index 3350ca1cc6..0000000000 --- a/library/cpp/geohash/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -LIBRARY() - -PEERDIR( - library/cpp/geo -) - -SRCS( - geohash.cpp -) - -GENERATE_ENUM_SERIALIZATION_WITH_HEADER(direction.h) - -END() diff --git a/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt b/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 05b000b7da..0000000000 --- a/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,53 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-ipreg) -target_link_libraries(library-cpp-ipreg PUBLIC - contrib-libs-cxxsupp - yutil - cpp-getopt-small - library-cpp-json - library-cpp-geobase - library-cpp-int128 - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-ipreg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h - INCLUDE_HEADERS - library/cpp/ipreg/address.h -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h - INCLUDE_HEADERS - library/cpp/ipreg/sources.h -) diff --git a/library/cpp/ipreg/CMakeLists.linux-aarch64.txt b/library/cpp/ipreg/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 5e76739840..0000000000 --- a/library/cpp/ipreg/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,54 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-ipreg) -target_link_libraries(library-cpp-ipreg PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-getopt-small - library-cpp-json - library-cpp-geobase - library-cpp-int128 - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-ipreg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h - INCLUDE_HEADERS - library/cpp/ipreg/address.h -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h - INCLUDE_HEADERS - library/cpp/ipreg/sources.h -) diff --git a/library/cpp/ipreg/CMakeLists.linux-x86_64.txt b/library/cpp/ipreg/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 5e76739840..0000000000 --- a/library/cpp/ipreg/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,54 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-ipreg) -target_link_libraries(library-cpp-ipreg PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-getopt-small - library-cpp-json - library-cpp-geobase - library-cpp-int128 - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-ipreg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h - INCLUDE_HEADERS - library/cpp/ipreg/address.h -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h - INCLUDE_HEADERS - library/cpp/ipreg/sources.h -) diff --git a/library/cpp/ipreg/CMakeLists.txt b/library/cpp/ipreg/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/ipreg/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/ipreg/CMakeLists.windows-x86_64.txt b/library/cpp/ipreg/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 05b000b7da..0000000000 --- a/library/cpp/ipreg/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,53 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) - -add_library(library-cpp-ipreg) -target_link_libraries(library-cpp-ipreg PUBLIC - contrib-libs-cxxsupp - yutil - cpp-getopt-small - library-cpp-json - library-cpp-geobase - library-cpp-int128 - tools-enum_parser-enum_serialization_runtime -) -target_sources(library-cpp-ipreg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h - INCLUDE_HEADERS - library/cpp/ipreg/address.h -) -generate_enum_serilization(library-cpp-ipreg - ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h - INCLUDE_HEADERS - library/cpp/ipreg/sources.h -) diff --git a/library/cpp/ipreg/address.cpp b/library/cpp/ipreg/address.cpp deleted file mode 100644 index 83880ccbae..0000000000 --- a/library/cpp/ipreg/address.cpp +++ /dev/null @@ -1,365 +0,0 @@ -#include "address.h" - -#include <util/generic/mem_copy.h> -#include <util/stream/format.h> -#include <util/string/cast.h> -#include <util/string/hex.h> -#include <util/string/printf.h> -#include <util/string/split.h> -#include <util/string/type.h> -#include <util/string/vector.h> -#include <util/system/byteorder.h> -#include <util/network/socket.h> - -#include <sstream> - -namespace NIPREG { - -TAddress TAddress::ParseAny(TStringBuf str) { - if (str.find(':') != TStringBuf::npos) { - return ParseIPv6(str); - } else if (str.find('.') != TStringBuf::npos) { - return ParseIPv4(str); - } else if (IsNumber(str)) { - return ParseIPv4Num(str); // TODO(dieash@) IPv6Num - } - - ythrow yexception() << "Unrecognized IPREG address format: " << str; -} - -TAddress TAddress::ParseIPv6(TStringBuf str) { - TAddress addr; - if (inet_pton(AF_INET6, TString(str).c_str(), &addr.Data) != 1) - ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv6"; - - return addr; -} - -TAddress TAddress::ParseIPv4(TStringBuf str) { - struct in_addr ipv4; - if (inet_aton(TString(str).c_str(), &ipv4) != 1) - ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv4"; - - return FromIPv4Num(InetToHost(ipv4.s_addr)); -} - -TAddress TAddress::ParseIPv4Num(TStringBuf str) { - return FromIPv4Num(FromString<ui32>(str)); -} - -TAddress TAddress::ParseIPv6Num(TStringBuf str) { - return FromUint128(FromString<ui128>(str)); -} - -TAddress TAddress::FromBinary(unsigned char const * const data) { - TAddress addr; - MemCopy<unsigned char>(addr.Data, data, sizeof(addr.Data)); - return addr; -} - -TAddress TAddress::FromBinaryIPv4(unsigned char const * const data) { - return TAddress::FromIPv4Num( - (static_cast<ui32>(data[0]) << 24) | - (static_cast<ui32>(data[1]) << 16) | - (static_cast<ui32>(data[2]) << 8) | - (static_cast<ui32>(data[3])) - ); -} - -TAddress TAddress::FromIPv4Num(ui32 num) { - TAddress addr; - memset((void*)&addr.Data, 0x00, 10); - addr.Data[10] = 0xff; - addr.Data[11] = 0xff; - addr.Data[12] = (num >> 24) & 0xff; - addr.Data[13] = (num >> 16) & 0xff; - addr.Data[14] = (num >> 8) & 0xff; - addr.Data[15] = (num) & 0xff; - return addr; -} - -TAddress TAddress::FromUint128(ui128 intAddr) { - const auto hiBE = HostToInet(GetHigh(intAddr)); - const auto loBE = HostToInet(GetLow(intAddr)); - - TAddress addr; - ui64* dataPtr = reinterpret_cast<ui64*>(addr.Data); - MemCopy<ui64>(dataPtr, &hiBE, 1); - MemCopy<ui64>(dataPtr + 1, &loBE, 1); - - return addr; -} - -namespace { - void SetHostsBits(TAddress& addr, char value) { - addr.Data[ 8] = value; - addr.Data[ 9] = value; - addr.Data[10] = value; - addr.Data[11] = value; - addr.Data[12] = value; - addr.Data[13] = value; - addr.Data[14] = value; - addr.Data[15] = value; - } -} // anon-ns - -TAddress TAddress::MakeNet64Broadcast(TAddress base) { - SetHostsBits(base, 0xff); - return base; -} - -TAddress TAddress::MakeNet64Prefix(TAddress base) { - SetHostsBits(base, 0x00); - return base; -} - -const TAddress& TAddress::Lowest() { - static const TAddress first{{}}; - return first; -} - -const TAddress& TAddress::Highest() { - static const TAddress last{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; - return last; -} - -TString TAddress::AsIPv4() const { - return ToString(Data[12]) + "." + ToString(Data[13]) + "." + ToString(Data[14]) + "." + ToString(Data[15]); -} - -TString TAddress::AsIPv4Num() const { - ui32 addr = (ui32)Data[12] << 24 | (ui32)Data[13] << 16 | (ui32)Data[14] << 8 | Data[15]; - return ToString(addr); -} - -TString TAddress::AsIPv6() const { - TStringStream ss; - - for (size_t octet = 0; octet < sizeof(Data); octet++) { - ss << Hex(Data[octet], HF_FULL); - if (octet < 15 && octet & 1) - ss << ':'; - } - - TString s = ss.Str(); - s.to_lower(); - - return s; -} - -TString TAddress::AsIPv6Num() const { - return ToString(AsUint128()); -} - -TString TAddress::GetTextFromNetOrder() const { - char buf[INET6_ADDRSTRLEN]; - if (inet_ntop(AF_INET6, (void*)(&Data), buf, sizeof(buf)) == NULL) - ythrow yexception() << "Failed to stringify IPREG address"; - - return buf; -} - -namespace { - TString GetHexStr(ui64 v) { - return HexEncode(reinterpret_cast<const char*>(&v), sizeof(v)); - } - - void HexDumpToStream(std::stringstream& ss, ui64 beData) { - const auto dataHexStr = GetHexStr(beData); - const auto hostData = InetToHost(beData); - const auto hostDataStr = GetHexStr(hostData); - ss << "\t/big-end[" << beData << " / " << dataHexStr << "]\t/host[" << hostData << " / " << hostDataStr << "]\n"; - } -} // anon-ns - -TString TAddress::GetHexString(const bool deepView) const { - std::stringstream ss; - ss << HexEncode(TStringBuf(reinterpret_cast<const char*>(Data), 16)); - if (deepView) { - const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); - - const auto hi = *dataPtr; - ss << "\nhigh-data"; HexDumpToStream(ss, hi); - - const auto lo = *(dataPtr + 1); - ss << "\nlow-data"; HexDumpToStream(ss, lo); - } - return ss.str().c_str(); -} - -TString TAddress::AsShortIP() const { - if (IsIPv4()) - return AsIPv4(); - else - return GetTextFromNetOrder(); -} - -TString TAddress::AsShortIPv6() const { - if (IsIPv4()) - return Sprintf("::ffff:%x:%x", (ui32)Data[12] << 8 | (ui32)Data[13], (ui32)Data[14] << 8 | (ui32)Data[15]); - else - return GetTextFromNetOrder(); -} - -TString TAddress::AsLongIP() const { - if (IsIPv4()) - return AsIPv4(); - else - return AsIPv6(); -} - -ui128 TAddress::AsUint128() const { - const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); - return ui128(InetToHost(*dataPtr), InetToHost(*(dataPtr + 1))); -} - -ui64 TAddress::GetHigh64() const { - const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); - return *dataPtr; -} - -ui64 TAddress::GetLow64() const { - const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); - return *(dataPtr + 1); -} - -ui64 TAddress::GetHigh64LE() const { - return InetToHost(GetHigh64()); -} - -ui64 TAddress::GetLow64LE() const { - return InetToHost(GetLow64()); -} - -bool TAddress::IsNet64Broadcast() const { - static const auto NET64_HOSTS_MASK = TAddress::ParseAny("::ffff:ffff:ffff:ffff").GetLow64(); - const auto ownHostsBits = GetLow64(); - return ownHostsBits == NET64_HOSTS_MASK; -} - -bool TAddress::IsNet64Host() const { - const auto isSomeOwnHostsBitsOn = GetLow64() > 0; - return isSomeOwnHostsBitsOn && !IsNet64Broadcast(); -} - -TString TAddress::Format(EAddressFormat format) const { - switch (format) { - case EAddressFormat::IPV6: - return AsIPv6(); - case EAddressFormat::LONG_IP: - return AsLongIP(); - case EAddressFormat::SHORT_IP: - return AsShortIP(); - case EAddressFormat::NUMERIC_IPV4: - return AsIPv4Num(); - case EAddressFormat::NUMERIC_IPV6: - return AsIPv6Num(); - case EAddressFormat::NTOA: - return GetTextFromNetOrder(); - case EAddressFormat::SHORT_IPV6: - return AsShortIPv6(); - } -} - -bool TAddress::IsIPv4() const { - static const unsigned char mask[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; - return memcmp(Data, mask, sizeof(mask)) == 0; -} - -TAddress TAddress::Next() const { - if (Highest() == *this) { - return Highest(); - } - - TAddress addr; - bool carry = 1; - for (ssize_t octet = 15; octet >= 0; octet--) { - addr.Data[octet] = Data[octet] + carry; - carry = carry && !addr.Data[octet]; - } - - return addr; -} - -TAddress TAddress::Prev() const { - if (Lowest() == *this) { - return Lowest(); - } - - TAddress addr{}; - bool carry = 1; - for (ssize_t octet = 15; octet >= 0; octet--) { - addr.Data[octet] = Data[octet] - carry; - carry = carry && !Data[octet]; - } - - return addr; -} - -double TAddress::operator-(const TAddress& rhs) const { - double diff = 0.0; - for (ssize_t octet = 0; octet < 16; octet++) { - diff = diff * 256.0 + (static_cast<int>(Data[octet]) - static_cast<int>(rhs.Data[octet])); - } - return diff; -} - -ui128 TAddress::Distance(const TAddress& a, const TAddress& b) { - const auto& intA = a.AsUint128(); - const auto& intB = b.AsUint128(); - return (a > b) ? (intA - intB) : (intB - intA); -} - -namespace { - constexpr size_t MAX_IPV6_MASK_LEN = 16 * 8; - constexpr size_t MAX_IPV4_MASK_LEN = 4 * 8; - constexpr size_t IPV4_IN6_MASK_BASE = MAX_IPV6_MASK_LEN - MAX_IPV4_MASK_LEN; - - TAddress SetMaskBits(const TAddress& addr, const size_t wantedMaskLen) { - auto maskLen = wantedMaskLen; - if (addr.IsIPv4() && maskLen && maskLen <= MAX_IPV4_MASK_LEN) { - maskLen += IPV4_IN6_MASK_BASE; - } - - if (maskLen == 0 || maskLen > MAX_IPV6_MASK_LEN || (addr.IsIPv4() && maskLen < IPV4_IN6_MASK_BASE)) { - ythrow yexception() << "strange mask (calc/wanted) " << maskLen << "/" << wantedMaskLen << "; " << addr; - } - - const int octetsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) / 8; - const int bitsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) % 8; - - size_t currOctet = 15; - TAddress addrWithMask = addr; - - for (int octetNum = 0; octetNum != octetsForUpdate; ++octetNum) { - addrWithMask.Data[currOctet--] = 0xff; - } - - for (int bitNum = 0; bitNum != bitsForUpdate; ++bitNum) { - addrWithMask.Data[currOctet] ^= 1 << bitNum; - } - - return addrWithMask; - } -} // anon-ns - -TNetwork::TNetwork(const TString& str) - : TNetwork(static_cast<TVector<TString>>(StringSplitter(str).Split('/').SkipEmpty())) -{} - -TNetwork::TNetwork(const TVector<TString>& data) - : TNetwork(data.size() ? data[0] : "", - data.size() > 1 ? FromStringWithDefault<size_t>(data[1]) : 0) -{} - -TNetwork::TNetwork(const TString& net, size_t maskLen) - : begin(TAddress::ParseAny(net)) - , end(SetMaskBits(begin, maskLen)) -{} - -} - -IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr) { - output << addr.AsShortIPv6(); - return output; -} diff --git a/library/cpp/ipreg/address.h b/library/cpp/ipreg/address.h deleted file mode 100644 index 9071418d5b..0000000000 --- a/library/cpp/ipreg/address.h +++ /dev/null @@ -1,137 +0,0 @@ -#pragma once - -#include <library/cpp/int128/int128.h> - -#include <util/generic/string.h> -#include <util/digest/murmur.h> -#include <util/string/cast.h> - -namespace NIPREG { - -struct TAddress { - enum class EAddressFormat { - IPV6 = 0x00 /* "ipv6" */, - LONG_IP = 0x01 /* "long" */, - SHORT_IP = 0x02 /* "short" */, - NUMERIC_IPV4 = 0x03 /* "num4" */, - NTOA = 0x04 /* "n2a" */, - SHORT_IPV6 = 0x05 /* "short-ipv6" */, - NUMERIC_IPV6 = 0x06 /* "num" */, - }; - - unsigned char Data[16] = {0}; // NOTA BENE: network byte order (Big-Endian) - - // Comparison - bool operator==(const TAddress& other) const { - return memcmp(Data, other.Data, sizeof(Data)) == 0; - } - - bool operator<(const TAddress& other) const { - return memcmp(Data, other.Data, sizeof(Data)) < 0; - } - - bool operator>(const TAddress& other) const { - return memcmp(Data, other.Data, sizeof(Data)) > 0; - } - - bool operator!=(const TAddress& other) const { - return !(*this == other); - } - - bool operator<=(const TAddress& other) const { - return !(*this > other); - } - - bool operator>=(const TAddress& other) const { - return !(*this < other); - } - - double operator-(const TAddress& rhs) const; - - // Parsing - static TAddress ParseAny(TStringBuf str); - - static TAddress ParseIPv6(TStringBuf str); - static TAddress ParseIPv4(TStringBuf str); - static TAddress ParseIPv4Num(TStringBuf str); - static TAddress ParseIPv6Num(TStringBuf str); - - static TAddress FromIPv4Num(ui32 num); - static TAddress FromUint128(ui128 addr); - static TAddress FromBinary(unsigned char const * data); - static TAddress FromBinaryIPv4(unsigned char const * const data); - - static TAddress MakeNet64Broadcast(TAddress base); - static TAddress MakeNet64Prefix(TAddress base); - - static const TAddress& Lowest(); - static const TAddress& Highest(); - - // Inspecting - TString AsIPv4() const; - TString AsIPv4Num() const; - TString AsIPv6() const; - TString AsIPv6Num() const; - TString GetTextFromNetOrder() const; - TString GetHexString(bool deepView = false) const; - - TString AsShortIP() const; - TString AsShortIPv6() const; - TString AsLongIP() const; - - ui128 AsUint128() const; - ui64 GetHigh64() const; - ui64 GetLow64() const; - ui64 GetHigh64LE() const; - ui64 GetLow64LE() const; - - bool IsNet64Broadcast() const; - bool IsNet64Host() const; - - TAddress GetNet64() const { - return TAddress::FromUint128(ui128{GetHigh64LE()} << 64); - } - - TAddress GetPrevNet64() const { - return TAddress::FromUint128(ui128{GetHigh64LE() - 1} << 64); - } - - TAddress GetNextNet64() const { - return TAddress::FromUint128(ui128{GetHigh64LE() + 1} << 64); - } - - TString Format(EAddressFormat format) const; - - int GetType() const { return IsIPv4() ? 4 : 6; } - bool IsIPv4() const; - - // Mutating - TAddress Next() const; - TAddress Prev() const; - - static ui128 Distance(const TAddress& a, const TAddress& b); -}; - -using EAddressFormat = TAddress::EAddressFormat; - -struct TNetwork { - TAddress begin; - TAddress end; - - TNetwork(const TString& str = "0.0.0.0/32"); - -private: - TNetwork(const TVector<TString>& data); - TNetwork(const TString& net, size_t mask); -}; - -} // NIPREG - -template <> -struct THash<NIPREG::TAddress> { - inline size_t operator()(const NIPREG::TAddress& address) const { - return MurmurHash<size_t>((const void*)address.Data, 16); - } -}; - -IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr); diff --git a/library/cpp/ipreg/checker.cpp b/library/cpp/ipreg/checker.cpp deleted file mode 100644 index 9c41d27dc0..0000000000 --- a/library/cpp/ipreg/checker.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "checker.h" - -namespace NIPREG { - -void TChecker::CheckNextFatal(const TAddress& first, const TAddress& last) { - if (!CheckNext(first, last)) - ythrow yexception() << "IPREG format error: " << first.AsIPv6() << " - " << last.AsIPv6(); -} - -TFlatChecker::TFlatChecker() : HasState(false) { -} - -bool TFlatChecker::CheckNext(const TAddress& first, const TAddress& last) { - bool result = true; - - if (first > last) - result = false; - - if (HasState && first <= PrevLast) - result = false; - - PrevLast = last; - HasState = true; - - return result; -} - -TIntersectingChecker::TIntersectingChecker() : HasState(false) { -} - -bool TIntersectingChecker::CheckNext(const TAddress& first, const TAddress& last) { - bool result = true; - - if (first > last) - result = false; - - if (HasState && (first < PrevFirst || (first == PrevFirst && last < PrevLast))) - result = false; - - PrevFirst = first; - PrevLast = last; - HasState = true; - - return result; -} - -} diff --git a/library/cpp/ipreg/checker.h b/library/cpp/ipreg/checker.h deleted file mode 100644 index 1a04e62e77..0000000000 --- a/library/cpp/ipreg/checker.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include "address.h" - -namespace NIPREG { - -class TChecker { -public: - virtual ~TChecker() {} - - virtual bool CheckNext(const TAddress& first, const TAddress& last) = 0; - - void CheckNextFatal(const TAddress& first, const TAddress& last); -}; - -class TFlatChecker: public TChecker { -private: - TAddress PrevLast; - bool HasState; - -public: - TFlatChecker(); - virtual bool CheckNext(const TAddress& first, const TAddress& last); -}; - -class TIntersectingChecker: public TChecker { -private: - TAddress PrevFirst; - TAddress PrevLast; - bool HasState; - -public: - TIntersectingChecker(); - virtual bool CheckNext(const TAddress& first, const TAddress& last); -}; - -} diff --git a/library/cpp/ipreg/merge.cpp b/library/cpp/ipreg/merge.cpp deleted file mode 100644 index d31e9dce5d..0000000000 --- a/library/cpp/ipreg/merge.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "merge.h" - -namespace NIPREG { - -void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc) { - bool hasA = a.Next(); - bool hasB = b.Next(); - - TAddress top = TAddress::Lowest(); - TAddress bottom; - - do { - // tweak ranges we've passed - if (hasA && top > a.Get().Last) - hasA = a.Next(); - if (hasB && top > b.Get().Last) - hasB = b.Next(); - - if (!hasA && !hasB) { - // both rangesets have ended - bottom = TAddress::Highest(); - proc(top, bottom, nullptr, nullptr); - break; - } - - const bool inA = hasA && a.Get().First <= top; - const bool inB = hasB && b.Get().First <= top; - - if (!hasA) { - // rangeset a has ended - if (inB) { - bottom = b.Get().Last; - proc(top, bottom, nullptr, &b.Get().Data); - } else { - bottom = b.Get().First.Prev(); - proc(top, bottom, nullptr, nullptr); - } - } else if (!hasB) { - // rangeset b has ended - if (inA) { - bottom = a.Get().Last; - proc(top, bottom, &a.Get().Data, nullptr); - } else { - bottom = a.Get().First.Prev(); - proc(top, bottom, nullptr, nullptr); - } - } else if (inA && inB) { - // inside both ranges - bottom = Min(a.Get().Last, b.Get().Last); - proc(top, bottom, &a.Get().Data, &b.Get().Data); - } else if (inA) { - // only in range a - bottom = Min(a.Get().Last, b.Get().First.Prev()); - proc(top, bottom, &a.Get().Data, nullptr); - } else if (inB) { - // only in range b - bottom = Min(b.Get().Last, a.Get().First.Prev()); - proc(top, bottom, nullptr, &b.Get().Data); - } else { - // outside both ranges - bottom = Min(a.Get().First.Prev(), a.Get().First.Prev()); - proc(top, bottom, nullptr, nullptr); - } - - top = bottom.Next(); - } while (bottom != TAddress::Highest()); -} - -} diff --git a/library/cpp/ipreg/merge.h b/library/cpp/ipreg/merge.h deleted file mode 100644 index 123b88276c..0000000000 --- a/library/cpp/ipreg/merge.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "reader.h" - -#include <functional> - -namespace NIPREG { - -void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc); - -} diff --git a/library/cpp/ipreg/range.cpp b/library/cpp/ipreg/range.cpp deleted file mode 100644 index 1b90022482..0000000000 --- a/library/cpp/ipreg/range.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "range.h" - -#include "util_helpers.h" - -#include <library/cpp/int128/int128.h> -#include <util/generic/maybe.h> -#include <util/string/split.h> -#include <util/string/vector.h> - -#include <stdexcept> - -namespace NIPREG { - -namespace { - EAddressFormat CurrentFormat = EAddressFormat::SHORT_IPV6; - - void throwExceptionWithFormat(const TString& line) { - throw yexception() << "wanted format: ${ip-begin}-${ip-end}[\t${data}]; $input := '" << line << "'"; - } - - void throwIfReverseOrder(TAddress first, TAddress last) { - if (first > last) { - const TString err_msg = "reverse order of addresses (first / last) => " + first.AsIPv6() + " / " + last.AsIPv6(); - throw std::runtime_error(err_msg.data()); - } - } -} // anon-ns - -TRange::TRange(TAddress first, TAddress last, const TString& data) - : First(first) - , Last(last) - , Data(data) -{ - throwIfReverseOrder(First, Last); -} - -TRange::TRange(const TNetwork& net, const TString& data) - : TRange(net.begin, net.end, data) -{ -} - -ui128 TRange::GetAddrsQty() const { - return TAddress::Distance(First, Last) + 1; -} - -TRange TRange::BuildRange(const TString& line, bool isEmptyData, const TString& dataDelim) { - const TVector<TString> parts = StringSplitter(line).SplitBySet(dataDelim.data()).SkipEmpty(); - if (parts.empty()) { - throwExceptionWithFormat(line); - } - - if (TString::npos != parts[0].find('/')) { - const auto data = (2 == parts.size()) ? parts[1] : ""; - return TRange(TNetwork(parts[0]), data); - } - - const TVector<TString> range_parts = StringSplitter(parts[0]).SplitBySet(" -\t").SkipEmpty(); - if (2 != range_parts.size() || range_parts[0].empty() || range_parts[1].empty()) { - throwExceptionWithFormat(line); - } - - if (!isEmptyData && (2 != parts.size() || parts[1].empty())) { - throwExceptionWithFormat(line); - } - - const auto& data = (2 == parts.size()) ? parts[1] : ""; - return TRange(TAddress::ParseAny(range_parts[0]), TAddress::ParseAny(range_parts[1]), data); -} - -bool TRange::Contains(const TRange& range) const { - return First <= range.First && range.Last <= Last; -} - -bool TRange::Contains(const TAddress& ip) const { - return First <= ip && ip <= Last; -} - -void SetIpFullOutFormat() { - CurrentFormat = EAddressFormat::IPV6; -} - -void SetIpShortOutFormat() { - CurrentFormat = EAddressFormat::SHORT_IPV6; -} - -void TRange::DumpTo(IOutputStream& output, bool withData, EAddressFormat format) const { - output << First.Format(format) << '-' << Last.Format(format); - if (withData) { - output << '\t' << Data; - } -} - -bool TRange::IsIpv6Only() const { - return 6 == First.GetType() && 6 == Last.GetType(); -} - -bool TRange::IsIpv4Only() const { - return 4 == First.GetType() && 4 == Last.GetType(); -} - -bool TRange::IsRangeInSingleNet64() const { - return First.GetHigh64() == Last.GetHigh64(); -} - -TRange TRange::BuildRangeByFirst(const TRange& range, int prefix) { - Y_UNUSED(prefix); - return TRange(TAddress::MakeNet64Prefix(range.First), - TAddress::MakeNet64Broadcast(range.IsRangeInSingleNet64() ? range.Last : range.Last.GetPrevNet64()) , - range.Data - ); -} - -TRange TRange::BuildRangeByLast(const TRange& range, int prefix) { - Y_UNUSED(prefix); - const auto prevLast = TAddress::MakeNet64Broadcast(range.Last.GetPrevNet64()); - return TRange(range.First, prevLast, range.Data); -// const auto prevLast = TAddress::MakeNet64Broadcast(range.Last); -// return TRange(TAddress::MakeNet64Prefix(range.First), prevLast, range.Data); -} - -TVector<TRange> SplitRangeNets(const TRange& origRange, bool addOrigSize, int maskLen) { - Y_UNUSED(maskLen); - - static const auto firstCheckedIpv6Prefix = TAddress::ParseAny("2000::"); - - const auto& CalcNetSize = [&](const TRange& range) { - static const auto MAX_FOR_DIGITS_ANSWER = ui128{1 << 30}; - const auto netSize = range.GetAddrsQty(); - return (netSize < MAX_FOR_DIGITS_ANSWER) ? ToString(netSize) : "huge"; - }; - - const auto& AddSizeField = [&](TRange& changedRange, const TRange& origAddrRange) { - if (addOrigSize) { - changedRange.Data = AddJsonAttrs({"orig_net_size"}, changedRange.Data, TMaybe<TString>(CalcNetSize(origAddrRange))); - } - }; - - if (origRange.Last <= firstCheckedIpv6Prefix) { - return {origRange}; - } - - if (origRange.IsRangeInSingleNet64()) { - TRange theOne{ - TAddress::MakeNet64Prefix(origRange.First), - TAddress::MakeNet64Broadcast(origRange.Last), - origRange.Data - }; - AddSizeField(theOne, origRange); - return {theOne}; - } - - TRange range{origRange}; - TVector<TRange> result; { - // 1st - TRange byFirst{TAddress::MakeNet64Prefix(range.First),TAddress::MakeNet64Broadcast(range.First), range.Data}; - AddSizeField(byFirst, {range.First, byFirst.Last, ""}); - result.push_back(byFirst); - - // maybe 2nd - range.First = byFirst.Last.Next(); - if (!range.IsRangeInSingleNet64()) { - const TAddress lastPrefix = TAddress::MakeNet64Prefix(range.Last); - - TRange inTheMiddle{TAddress::MakeNet64Prefix(range.First), lastPrefix.Prev(), range.Data}; - AddSizeField(inTheMiddle, inTheMiddle); - result.push_back(inTheMiddle); - - range.First = lastPrefix; - } - - // the last - TRange byLast{range.First, TAddress::MakeNet64Broadcast(range.Last), range.Data}; - AddSizeField(byLast, {byLast.First, range.Last, ""}); - result.push_back(byLast); - } - return result; -} - -bool operator==(const TRange& lhs, const TRange& rhs) { - return lhs.First == rhs.First && lhs.Last == rhs.Last; -} - -} // ns IPREG - -IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range) { - TString line; - if (!input.ReadLine(line)) { - throw std::runtime_error("unable to load data from stream"); - } - range = NIPREG::TRange::BuildRange(line); - return input; -} - -IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range) { - range.DumpTo(output, true, NIPREG::CurrentFormat); - output << "\n"; - return output; -} diff --git a/library/cpp/ipreg/range.h b/library/cpp/ipreg/range.h deleted file mode 100644 index 15b2c693b0..0000000000 --- a/library/cpp/ipreg/range.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include "address.h" - -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/stream/input.h> -#include <util/stream/output.h> - -#include <stdexcept> - -namespace NIPREG { - -struct TRange { - TAddress First; - TAddress Last; - TString Data; - - TRange() = default; - TRange(TAddress first, TAddress last, const TString& data); - TRange(const TNetwork& net, const TString& data); - - ui128 GetAddrsQty() const; - void DumpTo(IOutputStream& output, bool withData = true, EAddressFormat format = EAddressFormat::SHORT_IP) const; - - static TRange BuildRange(const TString& line, bool isEmptyData = false, const TString& dataDelim = "\t"); - bool Contains(const TRange& range) const; - bool Contains(const TAddress& ip) const; - - static TRange BuildRangeByFirst(const TRange& range, int prefix = 64); - static TRange BuildRangeByLast(const TRange& range, int prefix = 64); - - bool IsIpv6Only() const; - bool IsIpv4Only() const; - - bool IsRangeInSingleNet64() const; -}; -using TGenericEntry = TRange; - -void SetIpFullOutFormat(); -void SetIpShortOutFormat(); - -TVector<TRange> SplitRangeNets(const TRange& range, bool addOrigSize = false, int maskLen = 64); - -bool operator==(const TRange& lhs, const TRange& rhs); -inline bool operator!=(const TRange& lhs, const TRange& rhs) { return !(lhs == rhs); } -} // ns NIPREG - -IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range); -IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range); diff --git a/library/cpp/ipreg/reader.cpp b/library/cpp/ipreg/reader.cpp deleted file mode 100644 index 2e4ae1b178..0000000000 --- a/library/cpp/ipreg/reader.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "reader.h" - -#include <util/stream/file.h> - -namespace NIPREG { - -namespace { - const TString DASH_FNAME = "-"; -} - -TReader::TReader(const TString& filename, bool isEmptyData, const TString& dataDelim) - : OwnedStreamPtr((filename.empty() || filename == DASH_FNAME) ? nullptr : new TFileInput(filename)) - , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cin) - , IsEmptyData(isEmptyData) - , DataDelim(dataDelim) -{ -} - -TReader::TReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim) - : Stream(stream) - , IsEmptyData(isEmptyData) - , DataDelim(dataDelim) -{ -} - -bool TReader::Next() { - TString line; - if (!Stream.ReadLine(line)) - return false; - - CurrentEntry = TRange::BuildRange(line, IsEmptyData, DataDelim); - if (CurrentEntry.Data.empty()) { - if (!IsEmptyData) { - throw yexception() << "empty data part detected for [" << line << "]"; - } - CurrentEntry.Data = ""; - } - return true; -} - -TReverseByLastIpReader::TReverseByLastIpReader(const TString& filename, bool isEmptyData, const TString& dataDelim) - : TParent(filename, isEmptyData, dataDelim) -{ - Valid = TParent::Next(); -} - -TReverseByLastIpReader::TReverseByLastIpReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim) - : TParent(stream, isEmptyData, dataDelim) -{ - Valid = TParent::Next(); -} - -bool TReverseByLastIpReader::Next() { - if (!CurrentEntries.empty()) { - CurrentEntries.pop_back(); - } - - if (CurrentEntries.empty()) { - return PrepareNextEntries(); - } else { - return true; - } -} - -const TGenericEntry& TReverseByLastIpReader::Get() const { - return CurrentEntries.back(); -} - -bool TReverseByLastIpReader::PrepareNextEntries() { - if (!Valid) { - return false; - } - - do { - CurrentEntries.push_back(TParent::Get()); - Valid = TParent::Next(); - } while (Valid && TParent::Get().First == CurrentEntries.back().First); - - return true; -} - -} // NIPREG diff --git a/library/cpp/ipreg/reader.h b/library/cpp/ipreg/reader.h deleted file mode 100644 index b68faedcf9..0000000000 --- a/library/cpp/ipreg/reader.h +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include "range.h" - -#include <util/generic/ptr.h> -#include <util/generic/string.h> -#include <util/stream/input.h> - -namespace NIPREG { - -class TReader { -public: - TReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t"); - TReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t"); - - virtual bool Next(); - - virtual const TGenericEntry& Get() const { - return CurrentEntry; - } - - operator IInputStream&() { - return Stream; - } - - virtual ~TReader() = default; - -private: - TAutoPtr<IInputStream> OwnedStreamPtr; - IInputStream& Stream; - - bool IsEmptyData = false; - const TString DataDelim; - - TGenericEntry CurrentEntry; -}; - -class TReverseByLastIpReader : public TReader { -public: - using TParent = TReader; - - explicit TReverseByLastIpReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t"); - explicit TReverseByLastIpReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t"); - - bool Next() override; - - const TGenericEntry& Get() const override; - -private: - bool PrepareNextEntries(); - -private: - bool Valid = false; - TVector<TGenericEntry> CurrentEntries; -}; - -} // NIPREG diff --git a/library/cpp/ipreg/sources.cpp b/library/cpp/ipreg/sources.cpp deleted file mode 100644 index 70e4b2a6da..0000000000 --- a/library/cpp/ipreg/sources.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "sources.h" - -#include <cstdint> -#include <stdexcept> - -namespace NIPREG { - -const ui32 ML_COEFF_DEFAULT = 50000; -ui32 ML_COEFFICIENT = ML_COEFF_DEFAULT; - -void SetCoefficient(ui32 type, ui32 value) { - switch (type) { - case SOURCE_ML: - ML_COEFFICIENT = value; - break; - default: - throw std::runtime_error("unsupported setcoeff-type"); - } -} - -double GetSourceCoefficient(ui32 type) { - switch (type) { - case SOURCE_MAIL: return 1; - case SOURCE_PHONE: return 3; - case SOURCE_GEO: return 4; - case SOURCE_COUNTRY: return 100; - case SOURCE_DOMAIN_NAME: return 1; - case SOURCE_MANUAL: return 1; - case SOURCE_YANDEX_NETWORK: return 1000; // NB: in yandex_noc source weight := 10K - case SOURCE_SPECIAL_NETWORK: return 1000000; - case SOURCE_PROVIDERS: return 50; - case SOURCE_MAXMIND: return 4; - case SOURCE_UNITED_UID_YANDEX_MAPS: return 0.7; - case SOURCE_RELIABILITY_AROUND: return 1; - case SOURCE_UNITED_UID_WEATHER: return 0.9; - case SOURCE_UNITED_UID_YANDEX_GID: return 1; - case SOURCE_UNITED_UID_SEARCH_QUERY: return 1.5; - case SOURCE_UNITED_UID_SEARCH_IN_REG: return 2; - case SOURCE_BGP_ASPATH_COMMUNITY: return 10; - case SOURCE_ML: return ML_COEFFICIENT; - } - return 0; -} - -bool SourceWantApplyDepthCoeff(ui32 source_type) { - switch (source_type) { - case SOURCE_MAIL: - case SOURCE_PHONE: - case SOURCE_GEO: - case SOURCE_COUNTRY: - case SOURCE_DOMAIN_NAME: - return true; - default: - return false; - } -} - -bool SourceWantApplyNetsizeCoeff(ui32 source_type) { - return SourceWantApplyDepthCoeff(source_type); -} - -bool SourceIsHuman(ui32 source_type) { - switch (source_type) { - case SOURCE_UNITED_UID_SEARCH_QUERY: - case SOURCE_UNITED_UID_SEARCH_IN_REG: - case SOURCE_UNITED_UID_WEATHER: - case SOURCE_UNITED_UID_YANDEX_GID: - case SOURCE_UNITED_UID_YANDEX_MAPS: - return true; - default: - return false; - } -} - -bool SourceIsForRegionNormalize(ui32 source_type) { - return SourceIsHuman(source_type); -} - -bool SourceIsForEnoughHumanData(ui32 source_type) { - switch (source_type) { - case SOURCE_COUNTRY: - case SOURCE_MANUAL: - case SOURCE_PROVIDERS: - case SOURCE_YANDEX_NETWORK: - case SOURCE_SPECIAL_NETWORK: - return true; - default: - return SourceIsHuman(source_type); - } -} - -bool SourceIsForFewHumanData(ui32 source_type) { - return !SourceIsHuman(source_type); -} - -bool SourceIsForReliability(ui32 source_type) { - return SourceIsHuman(source_type) || SOURCE_YANDEX_NETWORK == source_type; -} - -} // NIPREG diff --git a/library/cpp/ipreg/sources.h b/library/cpp/ipreg/sources.h deleted file mode 100644 index a517e57cb8..0000000000 --- a/library/cpp/ipreg/sources.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include <util/system/types.h> - -namespace NIPREG { - -// TODO(dieash@) make some automation/spicification via enabled sources (with full list) -enum ESourceType { - // TODO(dieash@) full list of known src-types in choice-region-data: - // https://yql.yandex-team.ru/Operations/XEo-amim9Z2_PCkcZgQ0Wu-sqXAm1K8NMPesswuPzbk= - SOURCE_UNKNOWN = 0, // stub - SOURCE_MAIL = 1 /* "MAIL" */, // ripe src - SOURCE_PHONE = 2 /* "PHONE" */, // ripe src - SOURCE_GEO = 3 /* "GEO" */, // ripe src - SOURCE_COUNTRY = 4 /* "COUNTRY" */, // ripe, delegated, maxmind src - SOURCE_DOMAIN_NAME = 5 /* "DOMAIN_NAME" */, // ripe src - SOURCE_MANUAL = 6 /* "MANUAL" */, // manual src - SOURCE_YANDEX_NETWORK = 9 /* "YANDEX_NETWORK" */, // yandex-noc src - SOURCE_SPECIAL_NETWORK = 10 /* "SPECIAL_NETWORK" */, // spec-net src - SOURCE_PROVIDERS = 15 /* "PROVIDERS" */, // ripe src - SOURCE_MAXMIND = 17 /* "MAXMIND" */, // maxmind src - SOURCE_UNITED_UID_YANDEX_MAPS = 19 /* "UNITED_UID_YANDEX_MAPS" */, // uuid src - SOURCE_RELIABILITY_AROUND = 20 /* "RELIABILITY_AROUND" */, // rel-around src - SOURCE_UNITED_UID_WEATHER = 21 /* "UNITED_UID_WEATHER" */, // uuid src - SOURCE_UNITED_UID_YANDEX_GID = 22 /* "UNITED_UID_YANDEX_GID" */, // uuid src - SOURCE_UNITED_UID_SEARCH_QUERY = 23 /* "UNITED_UID_SEARCH_QUERY" */, // uuid src - SOURCE_UNITED_UID_SEARCH_IN_REG = 24 /* "UNITED_UID_SEARCH_IN_REG" */, // uuid src - SOURCE_BGP_ASPATH_COMMUNITY = 25 /* "BGP_ASPATH_COMMUNITY" */, // bgp src // NOTA BENE: clash with https://st.yandex-team.ru/IPREG-3722#5b367ec214778c001a5a3f7c - SOURCE_ML_INT_26 = 26 /* "ML_INT_26" */, - SOURCE_ML_INT_27 = 27 /* "ML_INT_27" */, - SOURCE_ML_INT_28 = 28 /* "ML_INT_28" */, - SOURCE_ML_INT_29 = 29 /* "ML_INT_29" */, - SOURCE_ML_INT_30 = 30 /* "ML_INT_30" */, - SOURCE_ML_INT_31 = 31 /* "ML_INT_31" */, - SOURCE_ML_INT_32 = 32 /* "ML_INT_32" */, - SOURCE_ML_INT_33 = 33 /* "ML_INT_33" */, - SOURCE_ML_INT_34 = 34 /* "ML_INT_34" */, - SOURCE_PRECISE_GEO_ML = 35 /* "ML_INT_35" */, - SOURCE_ML = 36 /* "ML" */, // ml src -}; - -double GetSourceCoefficient(ui32 type); -bool SourceWantApplyDepthCoeff(ui32 source_type); -bool SourceWantApplyNetsizeCoeff(ui32 source_type); -bool SourceIsHuman(ui32 source_type); -bool SourceExcludeFromReliability(ui32 source_type); -bool SourceIsForRegionNormalize(ui32 source_type); -bool SourceIsForEnoughHumanData(ui32 source_type); -bool SourceIsForFewHumanData(ui32 source_type); -bool SourceIsForReliability(ui32 source_type); - -void SetCoefficient(ui32 type, ui32 value); -} // namespace NIPREG diff --git a/library/cpp/ipreg/split.cpp b/library/cpp/ipreg/split.cpp deleted file mode 100644 index 19b7b85d51..0000000000 --- a/library/cpp/ipreg/split.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "split.h" - -#include <util/generic/list.h> -#include <util/generic/vector.h> - -namespace NIPREG { - -void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc) { - TList<TGenericEntry> prevEntries; - - bool end; - do { - end = !reader.Next(); - - while (!prevEntries.empty() && (end || prevEntries.front().First < reader.Get().First)) { - // find smallest common range to process - TAddress first = prevEntries.front().First; - TAddress last = end ? TAddress::Highest() : reader.Get().First.Prev(); - - for (const auto& entry: prevEntries) - last = Min(last, entry.Last); - - // extract data for the range - TVector<TString> strings; - auto item = prevEntries.begin(); - while (item != prevEntries.end()) { - Y_ASSERT(item->First == first); - strings.push_back(item->Data); - - if (item->Last == last) { - // item completely processed, remove - auto victim = item; - item++; - prevEntries.erase(victim); - } else { - // item still have part of range left, update it - item->First = last.Next(); - item++; - } - } - - proc(first, last, strings); - } - - if (!end) { - if (!prevEntries.empty()) { - Y_ASSERT(prevEntries.front().First == reader.Get().First); - } - prevEntries.push_back(reader.Get()); - } - } while (!end); -} - -} diff --git a/library/cpp/ipreg/split.h b/library/cpp/ipreg/split.h deleted file mode 100644 index 9710ff5f6d..0000000000 --- a/library/cpp/ipreg/split.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "reader.h" - -#include <util/generic/vector.h> - -#include <functional> - -namespace NIPREG { - -void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc); - -} diff --git a/library/cpp/ipreg/stopwatch.cpp b/library/cpp/ipreg/stopwatch.cpp deleted file mode 100644 index 31d99d2758..0000000000 --- a/library/cpp/ipreg/stopwatch.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "stopwatch.h" - -#include <util/stream/str.h> - -namespace NIPREG { - -TStopWatch::TStopWatch() { - Start = TInstant::Now(); -} - -TStopWatch::~TStopWatch() { - try { - if (TaskRunning) - StopTask(); - - Cerr << "Everything done in " << FormatTime(TInstant::Now() - Start) << Endl; - } catch (...) { - // not much problem if we can't write the summary - } -} - -void TStopWatch::StartTask(const TString& message) { - StopTask(); - - ++TaskOrdNum; - TaskStart = TInstant::Now(); - TaskRunning = true; - Cerr << TaskOrdNum << ". " << message << "...\n"; -} - -void TStopWatch::StopTask() { - if (TaskRunning) { - Cerr << "Done in " << FormatTime(TInstant::Now() - TaskStart) << Endl; - TaskRunning = false; - } -} - -TString TStopWatch::FormatTime(const TDuration& dur) { - auto sec = dur.Seconds(); - - TStringStream ss; - - if (sec < 60) - ss << sec << "s"; - else if (sec < 3600) - ss << sec / 60 << "m " << sec % 60 << "s"; - else - ss << sec / 3600 << "h " << (sec / 60) % 60 << "m"; - - return ss.Str(); -} - -} diff --git a/library/cpp/ipreg/stopwatch.h b/library/cpp/ipreg/stopwatch.h deleted file mode 100644 index 0873a638f6..0000000000 --- a/library/cpp/ipreg/stopwatch.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include <util/datetime/base.h> - -namespace NIPREG { - -class TStopWatch { -private: - TInstant Start; - TInstant TaskStart; - bool TaskRunning = false; - ui32 TaskOrdNum = 0; - -private: - TString FormatTime(const TDuration& dur); - -public: - TStopWatch(); - ~TStopWatch(); - - void StartTask(const TString& message); - void StopTask(); -}; - -} diff --git a/library/cpp/ipreg/util_helpers.cpp b/library/cpp/ipreg/util_helpers.cpp deleted file mode 100644 index 1b64baef55..0000000000 --- a/library/cpp/ipreg/util_helpers.cpp +++ /dev/null @@ -1,705 +0,0 @@ -#include "util_helpers.h" - -#include <library/cpp/ipreg/reader.h> - -#include <library/cpp/json/json_reader.h> -#include <library/cpp/json/json_value.h> -#include <library/cpp/json/json_writer.h> - -#include <library/cpp/geobase/lookup.hpp> - -#include <util/generic/ptr.h> -#include <util/generic/vector.h> -#include <util/stream/file.h> -#include <util/stream/format.h> -#include <util/string/split.h> -#include <util/string/vector.h> -#include <util/stream/str.h> - -namespace NIPREG { - namespace { - double FindNearestCoarsedCoeff(double baseValue) { - using ValueStepPair = std::pair<double, double>; - static const double fix = 0.01; - static const TVector<ValueStepPair> limits = { - { 100., 20. + fix }, - { 500., 50. + fix }, - { 2500., 100. + fix }, - { 10000., 1000. + fix }, - { 50000., 10000. + fix } - }; - - double last_step{}; - for (const auto& pair : limits) { - last_step = pair.second; - if (baseValue <= pair.first) { - break; - } - } - return last_step; - } - - double CalcCoarsedValue(double baseValue) { - if (baseValue < 0.) { - ythrow yexception() << "negative value detected: " << baseValue; - } - - // TODO(dieash) some "strange" calculation below - const auto coarsedCoeff = FindNearestCoarsedCoeff(baseValue); - const double fixedValue = coarsedCoeff * static_cast<int>((baseValue + coarsedCoeff / 2) / coarsedCoeff); - return fixedValue; - } - - const char * const REL_FIELD = "reliability"; - const char * const REG_FIELD = "region_id"; - - void CorrectReliability(NJson::TJsonValue& jsonData, const TString& data) { - jsonData = ParseJsonString(data); - auto& jsonMap = jsonData.GetMapSafe(); - - auto& reliabilityField = jsonMap[REL_FIELD]; - reliabilityField = CalcCoarsedValue(reliabilityField.GetDouble()); - } - - TString SortJson(const TString& data) { - NJson::TJsonValue json = ParseJsonString(data); - return SortJsonData(json); - } - - static TString MergeJsonsData(const TString& data1, const TString& data2, bool sortKeys = false, bool countMerge = false) { - static const char* MERGE_QTY = "_mrg_qty_"; - - auto json1 = ParseJsonString(data1); - const auto& json2 = ParseJsonString(data2); - - if (countMerge && !json1.Has(MERGE_QTY)) { - json1.InsertValue(MERGE_QTY, 1); - } - - for (const auto& item : json2.GetMapSafe()) { - json1.InsertValue(item.first, item.second); - } - - if (countMerge) { - json1.InsertValue(MERGE_QTY, (json1[MERGE_QTY].GetInteger() + 1)); - } - - const auto NoFormat = false; - return NJson::WriteJson(json1, NoFormat, sortKeys); - } - - bool IsJsonEquals(const TVector<TString>& excludeFieldsList, const TString& data1, const TString& data2) { - if (excludeFieldsList.empty()) { - return data1 == data2; - } - - auto json1 = ParseJsonString(data1); - auto json2 = ParseJsonString(data2); - - for (const auto& excludeField : excludeFieldsList) { - json1.EraseValue(excludeField); - json2.EraseValue(excludeField); - } - - return json1 == json2; - } - - class Patcher { - public: - Patcher(TReader& base, TReader& patch, IOutputStream& output, bool sortData) - : BaseStream(base) - , PatchStream(patch) - , Output(output) - , SortData(sortData) - { - GetNext(BaseStream, BaseRangePtr); - GetNext(PatchStream, PatchRangePtr); - } - - void Process() { - while (BaseRangePtr || PatchRangePtr) { - if ( CheckPatch() - || OnlySecond(BaseRangePtr, PatchRangePtr, PatchStream) - || OnlySecond(PatchRangePtr, BaseRangePtr, BaseStream) - || Range1BeforeRange2(BaseRangePtr, PatchRangePtr, BaseStream) - || Range1BeforeRange2(PatchRangePtr, BaseRangePtr, PatchStream) - || FirstEndInSecond(BaseRangePtr, PatchRangePtr) - || FirstEndInSecond(PatchRangePtr, BaseRangePtr) - || FirstStartInSecond(BaseRangePtr, PatchRangePtr, BaseStream, PatchStream)) - { - continue; - } - } - } - - private: - void GetNext(TReader& stream, TAutoPtr<TRange>& rangePtr) { - if (stream.Next()) { - if (rangePtr) { - *rangePtr = stream.Get(); - } else { - rangePtr.Reset(new TRange(stream.Get())); - } - } - else { - rangePtr.Reset(); - } - } - - void Print(const TRange& range) const { - Output << range; - } - - void PrintSorted(const TRange& range) const { - const TRange sortedCopy{range.First, range.Last, SortJson(range.Data)}; - Output << sortedCopy; - } - - bool CheckPatch() { - if (PatchRangePtr && PatchRangePtr->First > PatchRangePtr->Last) { - GetNext(PatchStream, PatchRangePtr); - return true; - } - return false; - } - - bool OnlySecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) { - if (!first && second) { - Print(*second); - GetNext(stream, second); - return true; - } - return false; - } - - bool Range1BeforeRange2(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) { - if (first->Last < second->First) { - Print(*first); - GetNext(stream, first); - return true; - } - return false; - } - - bool FirstEndInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second) { - if (first->First < second->First) { - auto leftBaseRange = *first; - leftBaseRange.Last = second->First.Prev(); - Print(leftBaseRange); - - first->First = second->First; - return true; - } - return false; - } - - bool FirstStartInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream1, TReader& stream2) { - if (first->First >= second->First) { - auto leftBaseRange = *first; - leftBaseRange.Data = MergeJsonsData(first->Data, second->Data); - - if (first->Last <= second->Last) { - second->First = first->Last.Next(); - GetNext(stream1, first); - if (second->First == TAddress::Highest()) { - GetNext(stream2, second); - } - } else { - leftBaseRange.Last = second->Last; - first->First = second->Last.Next(); - GetNext(stream2, second); - } - - SortData ? PrintSorted(leftBaseRange) : Print(leftBaseRange); - return true; - } - return false; - } - - private: - TAutoPtr<TRange> BaseRangePtr; - TAutoPtr<TRange> PatchRangePtr; - - TReader& BaseStream; - TReader& PatchStream; - IOutputStream& Output; - const bool SortData = false; - }; - - struct IpChecker { - static void LessOrEqual(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) { - if (lastIp <= checkedIp) { - return; - } - GenErr(row, " <= ", lastIp, checkedIp); - } - - static void Less(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) { - if (lastIp < checkedIp) { - return; - } - GenErr(row, " < ", lastIp, checkedIp); - } - - static void GenErr(const size_t row, const char* msg, const TAddress& lastIp, const TAddress& checkedIp) { - const TString& errMsg = ">>> row#" + ToString(row) + "; " + lastIp.AsIPv6() + msg + checkedIp.AsIPv6(); - throw std::runtime_error(errMsg.data()); - } - }; - - class MergerBy3 { - public: - MergerBy3(const TString& geodataPath, IOutputStream& output) - : Geobase(geodataPath) - , Out(output) - {} - - void Process(TReader& input, bool ByRegsOnly, bool silentMode) { - while (input.Next()) { - Trio.push_back(input.Get()); - if (3 > Trio.size()) { - continue; - } - - auto& range2Data = (++Trio.begin())->Data; - if (range2Data.npos != range2Data.find("\"is_placeholder\":1")) { - PrintAndDrop1stRange(); - PrintAndDrop1stRange(); - continue; - } - - const auto range1RegId = GetRegionId(Trio.begin()->Data); - const auto range3RegId = GetRegionId(Trio.rbegin()->Data); - if (range1RegId != range3RegId) { - PrintAndDrop1stRange(); - continue; - } - - const auto range2RegId = GetRegionId(range2Data); - const auto& parentsIds = Geobase.GetParentsIds(range1RegId); - if (parentsIds.end() == std::find(parentsIds.begin() + 1, parentsIds.end(), range2RegId)) { - PrintAndDrop1stRange(); - continue; - } - - if (!ByRegsOnly) { - const auto range1Size = Trio.begin()->GetAddrsQty(); - const auto range2Size = (++Trio.begin())->GetAddrsQty(); - const auto range3Size = Trio.rbegin()->GetAddrsQty(); - - if (range2Size > (range1Size + range3Size)) { - PrintAndDrop1stRange(); - continue; - } - } - - range2Data = SubstRegionId(range2Data, range1RegId); - if (!silentMode) { - PrintSubstNote(range2RegId, range1RegId); - } - - PrintAndDrop1stRange(); // 1st - PrintAndDrop1stRange(); // 2nd - } - - while (Trio.end() != Trio.begin()) { - PrintAndDrop1stRange(); - } - } - private: - void PrintAndDrop1stRange() { - Out << *Trio.begin(); - Trio.erase(Trio.begin()); - } - - void PrintSubstNote(const int oldId, const int newId) { - const bool NoData = false; - Cerr << "s/" << oldId << "/" << newId << "/: ["; - - Trio.begin()->DumpTo(Cerr, NoData); - Cerr << "/" << Trio.begin()->GetAddrsQty() << " | "; - - const auto& range2nd = *(++Trio.begin()); - range2nd.DumpTo(Cerr, NoData); - Cerr << "/" << range2nd.GetAddrsQty() << " | "; - - Trio.rbegin()->DumpTo(Cerr, NoData); - Cerr << "/" << Trio.rbegin()->GetAddrsQty() << "]\n"; - } - - - static int GetRegionId(const TString& data) { - const auto& json = ParseJsonString(data); - auto reg_id = json["region_id"].GetIntegerSafe(0); - return 99999 == reg_id ? 10000 : reg_id; - } - - static TString SubstRegionId(const TString& data, const int newId) { - auto json = ParseJsonString(data); - json.InsertValue("region_id", newId); - return SortJsonData(json); - } - - const NGeobase::TLookup Geobase; - IOutputStream& Out; - TList<TRange> Trio; - }; - } // anon-ns - - void DoCoarsening(IInputStream& input, IOutputStream& output) { - TString line; - while (input.ReadLine(line)) { - TVector<TString> parts; - StringSplitter(line).Split('\t').AddTo(&parts); - - NJson::TJsonValue jsonData; - CorrectReliability(jsonData, parts[1]); - output << parts[0] << "\t" << "{\"" - << REG_FIELD << "\":" << jsonData[REG_FIELD] << ",\"" - << REL_FIELD << "\":" << Prec(jsonData[REL_FIELD].GetDouble(), PREC_POINT_DIGITS_STRIP_ZEROES, 2) - << "}\n"; - } - } - - void DoMergeEqualsRange(TReader& input, IOutputStream& output) { - // TODO(dieash@) may be check region for parent/child relation - // , const TString& geodataPath - // NGeobase::TLookup geoLookup(geodataPath); - - TVector<TString> rangeDataList; - TRange lastRange{}; - - const char* REG_ID_ATTR = "region_id"; - const char* ORG_NET_ATTR = "orig_net_size"; - const char* HUGE_SIZE_VALUE = "huge"; - - const int HUGE_SIZE_COEFF = 100; - - const auto CalcRegionBinding = [&]() { - if (rangeDataList.empty()) { - throw std::runtime_error("empty data list"); - } - - if (1 == rangeDataList.size()) { - return rangeDataList[0]; - } - - size_t maxAmount{}; - NJson::TJsonValue maxData; - - THashMap<NGeobase::TId, size_t> reg2amount; - for (const auto& data : rangeDataList) { - const auto& json = ParseJsonString(data); - - const auto id = json[REG_ID_ATTR].GetInteger(); - const auto amount = (json.Has(ORG_NET_ATTR) && HUGE_SIZE_VALUE == json[ORG_NET_ATTR].GetString()) ? HUGE_SIZE_COEFF : FromString<int>(json[ORG_NET_ATTR].GetString()); - reg2amount[id] += amount; - - if (reg2amount[id] > maxAmount) { - maxData = json; - } - } - - maxData.EraseValue(ORG_NET_ATTR); - return SortJsonData(maxData); - }; - - const auto PrintRow = [&]() { - if (rangeDataList.empty()) { - return; - } - lastRange.Data = CalcRegionBinding(); - output << lastRange; - }; - - while (input.Next()) { - auto currRange = input.Get(); - if (currRange != lastRange) { - PrintRow(); - - lastRange = currRange; - rangeDataList = {}; - } - - rangeDataList.push_back(currRange.Data); - } - PrintRow(); - } - - void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits) { - if (!input.Next()) { - return; // empty file here - } - - const bool IsJsonData = traits.ConcatSep.empty(); - - TRange joinedRange = input.Get(); - if (traits.SortData) { - joinedRange.Data = SortJson(joinedRange.Data); - } - - while (input.Next()) { - auto currRange = input.Get(); - if (traits.SortData) { - currRange.Data = SortJson(currRange.Data); - } - - if (currRange.Contains(joinedRange) && joinedRange.Data == currRange.Data) { - joinedRange = currRange; - continue; - } - - if (traits.JoinNestedRanges && joinedRange.Contains(currRange) && joinedRange.Data == currRange.Data) { - continue; - } - - if ( currRange.First != joinedRange.Last.Next() - || ( IsJsonData && !IsJsonEquals(traits.ExcludeFieldsList, currRange.Data, joinedRange.Data)) - || (!IsJsonData && currRange.Data != joinedRange.Data)) - { - output << joinedRange; - joinedRange = currRange; - } else { - if (IsJsonData) { - joinedRange.Data = MergeJsonsData(currRange.Data, joinedRange.Data, traits.SortData, traits.CountMerges); - } else { - joinedRange.Data = (joinedRange.Data == currRange.Data) ? joinedRange.Data : (joinedRange.Data + traits.ConcatSep + currRange.Data); - } - joinedRange.Last = currRange.Last; - } - } - - output << joinedRange; - } - - void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly, bool silentMode) { - MergerBy3 merger(geodata, output); - merger.Process(input, ByRegsOnly, silentMode); - } - - void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData) { - Patcher(base, patch, output, sortData).Process(); - } - - const TString STUB_DATA{"{\"is_placeholder\":1,\"region_id\":10000,\"reliability\":0}"}; - - void AddStubRanges(TReader& input, IOutputStream& output) { - TRange stub{ - TAddress::Lowest(), - TAddress::Lowest(), - STUB_DATA - }; - - while (input.Next()) { - const auto& currRange = input.Get(); - - if (stub.First > currRange.First) { - const TString& errMsg = ">>> bad ranges ($stub.begin > $next.begin) // " + stub.First.AsShortIPv6() + " | " + currRange.First.AsShortIPv6(); - throw std::runtime_error(errMsg.data()); - } - - if (stub.First < currRange.First) { - stub.Last = currRange.First.Prev(); - output << stub; - } - - output << currRange; - stub.First = currRange.Last.Next(); - } - - if (stub.First != TAddress::Highest()) { - stub.Last = TAddress::Highest(); - output << stub; - } - } - - void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output) { - TAddress lastIp = TAddress::Lowest(); - size_t row_number = 0; - - TString line; - while (input.ReadLine(line)) { - ++row_number; - output << line << "\n"; - - const auto& currRange = TRange::BuildRange(line); - if (row_number == 1) { - if (currRange.First != TAddress::Lowest()) { - const TString err_msg = "bad first addr (ip / wanted_ip) => " + currRange.First.AsIPv6() + " / " + TAddress::Lowest().AsIPv6(); - throw std::runtime_error(err_msg); - } - lastIp = currRange.Last; - continue; - } - - if (lastIp == currRange.First || lastIp.Next() != currRange.First) { - const TString err_msg = ">>> row#" + ToString(row_number) + " bad pair (last_ip / next_ip) => " + lastIp.AsIPv6() + " / " + currRange.First.AsIPv6(); - throw std::runtime_error(err_msg); - } - - lastIp = currRange.Last; - } - - if (lastIp != TAddress::Highest()) { - const TString err_msg = "bad last addr (last_ip / wanted_ip) => " + lastIp.AsIPv6() + " / " + TAddress::Highest().AsIPv6(); - throw std::runtime_error(err_msg); - } - } - - void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict) { - TAddress lastIp = TAddress::Lowest(); - - size_t row = 0; - TString line; - while (input.ReadLine(line)) { - ++row; - output << line << "\n"; - - const auto& currRange = TRange::BuildRange(line); - if (row == 1) { - lastIp = currRange.Last; - continue; - } - - if (IsStrict) { - IpChecker::Less(row, lastIp, currRange.First); - } else { - IpChecker::LessOrEqual(row, lastIp, currRange.First); - } - lastIp = currRange.Last; - } - } - - NJson::TJsonValue ParseJsonString(const TString& data) { - const auto throwIfError = true; - - NJson::TJsonValue json; - NJson::ReadJsonFastTree(data, &json, throwIfError); - return json; - } - - TString SortJsonData(const NJson::TJsonValue& json) { - const auto NoFormat = false; - const auto SortKeys = true; - - return NJson::WriteJson(json, NoFormat, SortKeys); - } - - TString SortJsonData(const TString& jsonStr) { - return SortJsonData(ParseJsonString(jsonStr)); - } - - TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue) { - if (addFieldsList.empty()) { - return jsonStr; - } - - auto json = ParseJsonString(jsonStr); - for (const auto& newField : addFieldsList) { - if (!newField.empty()) { - if (attrValue) { - json.InsertValue(newField, *attrValue); - } else { - json.InsertValue(newField, 1); - } - } - } - return json.GetStringRobust(); - } - - TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr) { - if (excludeFieldsList.empty()) { - return jsonStr; - } - - auto json = ParseJsonString(jsonStr); - for (const auto& excludeField : excludeFieldsList) { - if (!excludeField.empty()) { - json.EraseValue(excludeField); - } - } - return json.GetStringRobust(); - } - - TString ExtractJsonAttrs(const TVector<TString>& extractFieldsList, const TString& jsonStr) { - if (extractFieldsList.empty()) { - return jsonStr; - } - - auto json = ParseJsonString(jsonStr); - NJson::TJsonValue newJson; - for (const auto& field : extractFieldsList) { - if (json.Has(field)) { - newJson.InsertValue(field, json[field]); - } - } - if (!newJson.IsDefined()) { - return {}; - } - return newJson.GetStringRobust(); - } - - namespace CliParamsDesc { - const TString InputFnameParam = "input-data"; - const TString OutputFnameParam = "output-data"; - const TString OutputFullIpParam = "show-full-ip"; - const TString PrintStatsParam = "print-stats"; - const TString PrintYtStatsParam = "yt-stats"; - - const TString InputFnameParamDesc = "path to input IPREG-data; leave empty or use '-' for stdin"; - const TString OutputFnameParamDesc = "path to file for output results; leave empty for stdout"; - const TString OutputFullIpParamDesc = "print full ipv6 (by default - short)"; - const TString PrintStatsParamDesc = "print internal statistics; @stderr"; - const TString PrintYtStatsParamDesc = "print YT-stats (by default, file-descriptor 5)"; - } // ns CliParamsDesc - - DefaultCliParams::DefaultCliParams() { - using namespace CliParamsDesc; - - Opts.SetFreeArgsMax(0); - Opts.AddHelpOption('h'); - - Opts.AddLongOption('i', InputFnameParam) - .RequiredArgument("filename") - .DefaultValue(InputFname) - .StoreResult(&InputFname).Help(InputFnameParamDesc); - - Opts.AddLongOption('o', OutputFnameParam) - .RequiredArgument("filename") - .DefaultValue(OutputFname) - .StoreResult(&OutputFname).Help(OutputFnameParamDesc); - - Opts.AddLongOption('f', OutputFullIpParam) - .Optional() - .NoArgument() - .DefaultValue("0") - .OptionalValue("1") - .StoreResult(&OutputFullIp).Help(OutputFullIpParamDesc); - - Opts.AddLongOption(PrintStatsParam) - .Optional() - .NoArgument() - .DefaultValue("0") - .OptionalValue("1") - .StoreResult(&PrintStats).Help(PrintStatsParamDesc); - - Opts.AddLongOption(PrintYtStatsParam) - .Optional() - .NoArgument() - .DefaultValue("0") - .OptionalValue("1") - .StoreResult(&PrintYtStats).Help(PrintYtStatsParamDesc); - } - - void DefaultCliParams::ApplyFlags() const { - if (OutputFullIp) { - SetIpFullOutFormat(); - } - } - - void DefaultCliParams::Parse(int argc, const char **argv) { - NLastGetopt::TOptsParseResult optRes(&GetOpts(), argc, argv); - ApplyFlags(); - } - -} // NIPREG diff --git a/library/cpp/ipreg/util_helpers.h b/library/cpp/ipreg/util_helpers.h deleted file mode 100644 index eab2dfb320..0000000000 --- a/library/cpp/ipreg/util_helpers.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include <library/cpp/getopt/opt.h> -#include <util/generic/string.h> -#include <util/generic/maybe.h> - -class IInputStream; -class IOutputStream; - -namespace NJson { - class TJsonValue; -} - -namespace NIPREG { - class TReader; - - // @input any form of range+payload - // @output $ip.begin-$ip.end \t {"region_id":$reg,"reliability":$rel} - void DoCoarsening(IInputStream& input, IOutputStream& output); - - struct MergeTraits { - const TVector<TString> ExcludeFieldsList; - TString ConcatSep; - bool SortData{}; - bool CountMerges{}; - bool JoinNestedRanges{}; - }; - - void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits); - void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly = false, bool silentMode = false); - void DoMergeEqualsRange(TReader& input, IOutputStream& output); - - void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData = false); - - void AddStubRanges(TReader& input, IOutputStream& output); - - void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output); - void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict = false); - - NJson::TJsonValue ParseJsonString(const TString& data); - TString SortJsonData(const NJson::TJsonValue& json); - TString SortJsonData(const TString& json); - - TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue); - TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr); - TString ExtractJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr); - - extern const TString STUB_DATA; - - struct DefaultCliParams { - DefaultCliParams(); - - NLastGetopt::TOpts& GetOpts() { return Opts; } - void Parse(int argc, const char **argv); - void ApplyFlags() const; - - TString InputFname = "-"; - TString OutputFname = ""; - bool OutputFullIp = false; - bool PrintStats = false; - bool PrintYtStats = false; - - NLastGetopt::TOpts Opts; - }; -} // NIPREG diff --git a/library/cpp/ipreg/writer.cpp b/library/cpp/ipreg/writer.cpp deleted file mode 100644 index 89f8c8b629..0000000000 --- a/library/cpp/ipreg/writer.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "writer.h" - -#include <util/stream/file.h> - -namespace NIPREG { - -TWriter::TWriter(const TString& fname) - : OwnedStreamPtr(fname.empty() ? nullptr : new TFileOutput(fname)) - , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cout) - , AddrSeparator(ADDR_SEP) - , DataSeparator(DATA_SEP) - , SplitMixed(false) -{ -} - -TWriter::TWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed) - : Stream(stream) - , AddressFormat(addressFormat) - , AddrSeparator(addrSep) - , DataSeparator(dataSep) - , SplitMixed(splitMixed) -{ -} - -namespace { - const TAddress IPv4Start = TAddress::ParseIPv4("0.0.0.0"); - const TAddress IPv4End = TAddress::ParseIPv4("255.255.255.255"); - - const TAddress IPv6BeforeV4 = IPv4Start.Prev(); - const TAddress IPv6AfterV4 = IPv4End.Next(); -} - -void TWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange) { - if (SplitMixed) { - if (first < IPv4Start && IPv4Start < last) { - Write(first, IPv6BeforeV4, data, printRange); - Write(IPv4Start, last, data, printRange); - return; - } - - if (first < IPv4End && IPv4End < last) { - Write(first, IPv4End, data, printRange); - Write(IPv6AfterV4, last, data, printRange); - return; - } - } - WriteImpl(first, last, data, printRange); -} - -void TWriter::WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange) { - if (printRange) { - Stream << first.Format(AddressFormat) << AddrSeparator << last.Format(AddressFormat); - } - if (!data.empty()) { - if (printRange) { - Stream << DataSeparator; - } - Stream << data; - } - if (!data.empty() || printRange) { - Stream << "\n"; - } -} - -void TWriter::Finalize() { -} - -TMergingWriter::TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed) - : TWriter(stream, addressFormat, addrSep, dataSep, splitMixed) { -} - -void TMergingWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool) { - if (Initialized && data == StoredData && first == StoredLast.Next()) { - StoredLast = last; - } else { - if (Initialized) - TWriter::Write(StoredFirst, StoredLast, StoredData); - StoredFirst = first; - StoredLast = last; - StoredData = data; - Initialized = true; - } -} - -void TMergingWriter::Finalize() { - if (Initialized) - TWriter::Write(StoredFirst, StoredLast, StoredData); - Initialized = false; -} - -} // NIPREG diff --git a/library/cpp/ipreg/writer.h b/library/cpp/ipreg/writer.h deleted file mode 100644 index a4232a89a6..0000000000 --- a/library/cpp/ipreg/writer.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once - -#include "range.h" - -#include <util/generic/ptr.h> -#include <util/generic/string.h> -#include <util/stream/output.h> - -namespace NIPREG { - -class TWriter { -public: - static constexpr char const * const ADDR_SEP = "-"; - static constexpr char const * const DATA_SEP = "\t"; - -public: - TWriter(const TString& filename = ""); - TWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false); - TWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat) - : TWriter(stream, addressFormat, addrSep, addrSep) - {} - virtual ~TWriter() {} - - void Write(const TGenericEntry& entry, bool printRange = true) { - Write(entry.First, entry.Last, entry.Data, printRange); - } - virtual void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true); - virtual void Finalize(); - - operator IOutputStream&() { - return Stream; - } - -private: - void WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange); - - TAutoPtr<IOutputStream> OwnedStreamPtr; - IOutputStream& Stream; - - EAddressFormat AddressFormat = EAddressFormat::IPV6; - const TString AddrSeparator = ADDR_SEP; - const TString DataSeparator = DATA_SEP; - const bool SplitMixed; -}; - -class TMergingWriter : public TWriter { -public: - TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false); - TMergingWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat) - : TWriter(stream, addressFormat, addrSep, addrSep) - {} - void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true) final override; - void Finalize() final; - -private: - TAddress StoredFirst; - TAddress StoredLast; - TString StoredData; - bool Initialized = false; -}; - -} // NIPREG diff --git a/library/cpp/ipreg/ya.make b/library/cpp/ipreg/ya.make deleted file mode 100644 index b03720f761..0000000000 --- a/library/cpp/ipreg/ya.make +++ /dev/null @@ -1,26 +0,0 @@ -LIBRARY() - -SRCS( - address.cpp - checker.cpp - merge.cpp - range.cpp - reader.cpp - sources.cpp - split.cpp - stopwatch.cpp - writer.cpp - util_helpers.cpp -) - -PEERDIR( - library/cpp/getopt/small - library/cpp/json - library/cpp/geobase - library/cpp/int128 -) - -GENERATE_ENUM_SERIALIZATION(address.h) -GENERATE_ENUM_SERIALIZATION(sources.h) - -END() diff --git a/library/cpp/langmask/CMakeLists.txt b/library/cpp/langmask/CMakeLists.txt deleted file mode 100644 index 499930c4b0..0000000000 --- a/library/cpp/langmask/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(proto) diff --git a/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index e9f692d0f2..0000000000 --- a/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,43 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-langmask-proto) -target_link_libraries(cpp-langmask-proto PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-langmask-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto -) -target_proto_addincls(cpp-langmask-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-langmask-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt b/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 61f975983e..0000000000 --- a/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,44 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-langmask-proto) -target_link_libraries(cpp-langmask-proto PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-langmask-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto -) -target_proto_addincls(cpp-langmask-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-langmask-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 61f975983e..0000000000 --- a/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,44 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-langmask-proto) -target_link_libraries(cpp-langmask-proto PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-langmask-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto -) -target_proto_addincls(cpp-langmask-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-langmask-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/langmask/proto/CMakeLists.txt b/library/cpp/langmask/proto/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/langmask/proto/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt deleted file mode 100644 index e9f692d0f2..0000000000 --- a/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,43 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-langmask-proto) -target_link_libraries(cpp-langmask-proto PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-langmask-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto -) -target_proto_addincls(cpp-langmask-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-langmask-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/langmask/proto/langmask.proto b/library/cpp/langmask/proto/langmask.proto deleted file mode 100644 index be23ecfbba..0000000000 --- a/library/cpp/langmask/proto/langmask.proto +++ /dev/null @@ -1,6 +0,0 @@ -package NProto; - -message TLangMask { - repeated uint32 Bits = 1; // binary - optional string Names = 2; // human readable -} diff --git a/library/cpp/langmask/proto/ya.make b/library/cpp/langmask/proto/ya.make deleted file mode 100644 index 823a0ad261..0000000000 --- a/library/cpp/langmask/proto/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - langmask.proto -) - -IF (NOT PY_PROTOS_FOR) - EXCLUDE_TAGS(GO_PROTO) -ENDIF() - -END() diff --git a/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt b/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index c4d2e9d3a4..0000000000 --- a/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,56 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -find_package(ZLIB REQUIRED) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(library-cpp-microbdb) -target_link_libraries(library-cpp-microbdb PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-fastlz - contrib-libs-libc_compat - contrib-libs-protobuf - contrib-libs-snappy - ZLIB::ZLIB - cpp-deprecated-fgood - cpp-on_disk-st_hash - library-cpp-packedtypes -) -target_proto_messages(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto -) -target_sources(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp -) -target_proto_addincls(library-cpp-microbdb - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(library-cpp-microbdb - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/microbdb/CMakeLists.linux-aarch64.txt b/library/cpp/microbdb/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 302dbd03cd..0000000000 --- a/library/cpp/microbdb/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,57 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -find_package(ZLIB REQUIRED) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(library-cpp-microbdb) -target_link_libraries(library-cpp-microbdb PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-fastlz - contrib-libs-libc_compat - contrib-libs-protobuf - contrib-libs-snappy - ZLIB::ZLIB - cpp-deprecated-fgood - cpp-on_disk-st_hash - library-cpp-packedtypes -) -target_proto_messages(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto -) -target_sources(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp -) -target_proto_addincls(library-cpp-microbdb - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(library-cpp-microbdb - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/microbdb/CMakeLists.linux-x86_64.txt b/library/cpp/microbdb/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 302dbd03cd..0000000000 --- a/library/cpp/microbdb/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,57 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -find_package(ZLIB REQUIRED) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(library-cpp-microbdb) -target_link_libraries(library-cpp-microbdb PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-fastlz - contrib-libs-libc_compat - contrib-libs-protobuf - contrib-libs-snappy - ZLIB::ZLIB - cpp-deprecated-fgood - cpp-on_disk-st_hash - library-cpp-packedtypes -) -target_proto_messages(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto -) -target_sources(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp -) -target_proto_addincls(library-cpp-microbdb - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(library-cpp-microbdb - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/microbdb/CMakeLists.txt b/library/cpp/microbdb/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/microbdb/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/microbdb/CMakeLists.windows-x86_64.txt b/library/cpp/microbdb/CMakeLists.windows-x86_64.txt deleted file mode 100644 index c4d2e9d3a4..0000000000 --- a/library/cpp/microbdb/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,56 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -find_package(ZLIB REQUIRED) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(library-cpp-microbdb) -target_link_libraries(library-cpp-microbdb PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-fastlz - contrib-libs-libc_compat - contrib-libs-protobuf - contrib-libs-snappy - ZLIB::ZLIB - cpp-deprecated-fgood - cpp-on_disk-st_hash - library-cpp-packedtypes -) -target_proto_messages(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto -) -target_sources(library-cpp-microbdb PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp -) -target_proto_addincls(library-cpp-microbdb - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(library-cpp-microbdb - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/microbdb/align.h b/library/cpp/microbdb/align.h deleted file mode 100644 index 2f8567f134..0000000000 --- a/library/cpp/microbdb/align.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include <util/system/defaults.h> - -using TDatAlign = int; - -static inline size_t DatFloor(size_t size) { - return (size - 1) & ~(sizeof(TDatAlign) - 1); -} - -static inline size_t DatCeil(size_t size) { - return DatFloor(size) + sizeof(TDatAlign); -} - -static inline void DatSet(void* ptr, size_t size) { - *(TDatAlign*)((char*)ptr + DatFloor(size)) = 0; -} diff --git a/library/cpp/microbdb/compressed.h b/library/cpp/microbdb/compressed.h deleted file mode 100644 index f0c9edfa92..0000000000 --- a/library/cpp/microbdb/compressed.h +++ /dev/null @@ -1,520 +0,0 @@ -#pragma once - -#include <util/stream/zlib.h> - -#include "microbdb.h" -#include "safeopen.h" - -class TCompressedInputFileManip: public TInputFileManip { -public: - inline i64 GetLength() const { - return -1; // Some microbdb logic rely on unknown size of compressed files - } - - inline i64 Seek(i64 offset, int whence) { - i64 oldPos = DoGetPosition(); - i64 newPos = offset; - switch (whence) { - case SEEK_CUR: - newPos += oldPos; - [[fallthrough]]; // Complier happy. Please fix it! - case SEEK_SET: - break; - default: - return -1L; - } - if (oldPos > newPos) { - VerifyRandomAccess(); - DoSeek(0, SEEK_SET, IsStreamOpen()); - oldPos = 0; - } - const size_t bufsize = 1 << 12; - char buf[bufsize]; - for (i64 i = oldPos; i < newPos; i += bufsize) - InputStream->Read(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i)); - return newPos; - } - - i64 RealSeek(i64 offset, int whence) { - InputStream.Destroy(); - i64 ret = DoSeek(offset, whence, !!CompressedInput); - if (ret != -1) - DoStreamOpen(DoCreateStream(), true); - return ret; - } - -protected: - IInputStream* CreateStream(const TFile& file) override { - CompressedInput.Reset(new TUnbufferedFileInput(file)); - return DoCreateStream(); - } - inline IInputStream* DoCreateStream() { - return new TZLibDecompress(CompressedInput.Get(), ZLib::GZip); - //return new TLzqDecompress(CompressedInput.Get()); - } - THolder<IInputStream> CompressedInput; -}; - -class TCompressedBufferedInputFileManip: public TCompressedInputFileManip { -protected: - IInputStream* CreateStream(const TFile& file) override { - CompressedInput.Reset(new TFileInput(file, 0x100000)); - return DoCreateStream(); - } -}; - -using TCompressedInputPageFile = TInputPageFileImpl<TCompressedInputFileManip>; -using TCompressedBufferedInputPageFile = TInputPageFileImpl<TCompressedBufferedInputFileManip>; - -template <class TVal> -struct TGzKey { - ui64 Offset; - TVal Key; - - static const ui32 RecordSig = TVal::RecordSig + 0x50495a47; - - TGzKey() { - } - - TGzKey(ui64 offset, const TVal& key) - : Offset(offset) - , Key(key) - { - } - - size_t SizeOf() const { - if (this) - return sizeof(Offset) + ::SizeOf(&Key); - else { - size_t sizeOfKey = ::SizeOf((TVal*)NULL); - return sizeOfKey ? (sizeof(Offset) + sizeOfKey) : 0; - } - } -}; - -template <class TVal> -class TInZIndexFile: protected TInDatFileImpl<TGzKey<TVal>> { - typedef TInDatFileImpl<TGzKey<TVal>> TDatFile; - typedef TGzKey<TVal> TGzVal; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - -public: - TInZIndexFile() - : Index0(nullptr) - { - } - - int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { - int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig); - if (ret) - return ret; - if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) { - TDatFile::Close(); - return MBDB_NO_MEMORY; - } - if (SizeOf((TGzVal*)NULL)) - RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TGzVal*)NULL)); - TDatFile::Next(); - memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize()); - return 0; - } - - int Close() { - free(Index0); - Index0 = NULL; - return TDatFile::Close(); - } - - inline int GetError() const { - return TDatFile::GetError(); - } - - int FindKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) { - assert(IsOpen()); - if (!SizeOf((TVal*)NULL)) - return FindVszKey(akey); - int pageno; - i64 offset; - FindKeyOnPage(pageno, offset, Index0, akey); - TDatPage* page = TPageIter::GotoPage(pageno + 1); - int num_add = (int)offset; - FindKeyOnPage(pageno, offset, page, akey); - return pageno + num_add; - } - - using TDatFile::IsOpen; - - int FindVszKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) { - int pageno; - i64 offset; - FindVszKeyOnPage(pageno, offset, Index0, akey); - TDatPage* page = TPageIter::GotoPage(pageno + 1); - int num_add = (int)offset; - FindVszKeyOnPage(pageno, offset, page, akey); - return pageno + num_add; - } - - i64 FindPage(int pageno) { - if (!SizeOf((TVal*)NULL)) - return FindVszPage(pageno); - int recsize = DatCeil(SizeOf((TGzVal*)NULL)); - TDatPage* page = TPageIter::GotoPage(1 + pageno / RecsOnPage); - if (!page) // can happen if pageno is beyond EOF - return -1; - unsigned int localpageno = pageno % RecsOnPage; - if (localpageno >= page->RecNum) // can happen if pageno is beyond EOF - return -1; - TGzVal* v = (TGzVal*)((char*)page + sizeof(TDatPage) + localpageno * recsize); - return v->Offset; - } - - i64 FindVszPage(int pageno) { - TGzVal* cur = (TGzVal*)((char*)Index0 + sizeof(TDatPage)); - TGzVal* prev = cur; - unsigned int n = 0; - while (n < Index0->RecNum && cur->Offset <= (unsigned int)pageno) { - prev = cur; - cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); - n++; - } - TDatPage* page = TPageIter::GotoPage(n); - unsigned int num_add = (unsigned int)(prev->Offset); - n = 0; - cur = (TGzVal*)((char*)page + sizeof(TDatPage)); - while (n < page->RecNum && n + num_add < (unsigned int)pageno) { - cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); - n++; - } - if (n == page->RecNum) // can happen if pageno is beyond EOF - return -1; - return cur->Offset; - } - -protected: - void FindKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* Key) { - int left = 0; - int right = page->RecNum - 1; - int recsize = DatCeil(SizeOf((TGzVal*)NULL)); - while (left < right) { - int middle = (left + right) >> 1; - if (((TGzVal*)((char*)page + sizeof(TDatPage) + middle * recsize))->Key < *Key) - left = middle + 1; - else - right = middle; - } - //borders check (left and right) - pageno = (left == 0 || ((TGzVal*)((char*)page + sizeof(TDatPage) + left * recsize))->Key < *Key) ? left : left - 1; - offset = ((TGzVal*)((char*)page + sizeof(TDatPage) + pageno * recsize))->Offset; - } - - void FindVszKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* key) { - TGzVal* cur = (TGzVal*)((char*)page + sizeof(TDatPage)); - ui32 RecordSig = page->RecNum; - i64 tmpoffset = cur->Offset; - for (; RecordSig > 0 && cur->Key < *key; --RecordSig) { - tmpoffset = cur->Offset; - cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); - } - int idx = page->RecNum - RecordSig - 1; - pageno = (idx >= 0) ? idx : 0; - offset = tmpoffset; - } - - TDatPage* Index0; - int RecsOnPage; -}; - -template <class TKey> -class TCompressedIndexedInputPageFile: public TCompressedInputPageFile { -public: - int GotoPage(int pageno); - -protected: - TInZIndexFile<TKey> KeyFile; -}; - -template <class TVal, class TKey> -class TDirectCompressedInDatFile: public TDirectInDatFile<TVal, TKey, - TInDatFileImpl<TVal, TInputRecordIterator<TVal, - TInputPageIterator<TCompressedIndexedInputPageFile<TKey>>>>> { -}; - -class TCompressedOutputFileManip: public TOutputFileManip { -public: - inline i64 GetLength() const { - return -1; // Some microbdb logic rely on unknown size of compressed files - } - - inline i64 Seek(i64 offset, int whence) { - i64 oldPos = DoGetPosition(); - i64 newPos = offset; - switch (whence) { - case SEEK_CUR: - newPos += oldPos; - [[fallthrough]]; // Compler happy. Please fix it! - case SEEK_SET: - break; - default: - return -1L; - } - if (oldPos > newPos) - return -1L; - - const size_t bufsize = 1 << 12; - char buf[bufsize] = {0}; - for (i64 i = oldPos; i < newPos; i += bufsize) - OutputStream->Write(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i)); - return newPos; - } - - i64 RealSeek(i64 offset, int whence) { - OutputStream.Destroy(); - i64 ret = DoSeek(offset, whence, !!CompressedOutput); - if (ret != -1) - DoStreamOpen(DoCreateStream(), true); - return ret; - } - -protected: - IOutputStream* CreateStream(const TFile& file) override { - CompressedOutput.Reset(new TUnbufferedFileOutput(file)); - return DoCreateStream(); - } - inline IOutputStream* DoCreateStream() { - return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1); - } - THolder<IOutputStream> CompressedOutput; -}; - -class TCompressedBufferedOutputFileManip: public TCompressedOutputFileManip { -protected: - IOutputStream* CreateStream(const TFile& file) override { - CompressedOutput.Reset(new TUnbufferedFileOutput(file)); - return DoCreateStream(); - } - inline IOutputStream* DoCreateStream() { - return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1, 0x100000); - } -}; - -using TCompressedOutputPageFile = TOutputPageFileImpl<TCompressedOutputFileManip>; -using TCompressedBufferedOutputPageFile = TOutputPageFileImpl<TCompressedBufferedOutputFileManip>; - -template <class TVal> -class TOutZIndexFile: public TOutDatFileImpl< - TGzKey<TVal>, - TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>> { - typedef TOutDatFileImpl< - TGzKey<TVal>, - TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>> - TDatFile; - typedef TOutZIndexFile<TVal> TMyType; - typedef TGzKey<TVal> TGzVal; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TIndexer TIndexer; - -public: - TOutZIndexFile() { - TotalRecNum = 0; - TIndexer::SetCallback(this, DispatchCallback); - } - - int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) { - int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); - if (ret) - return ret; - if ((ret = TRecIter::GotoPage(1))) - TDatFile::Close(); - return ret; - } - - int Close() { - TPageIter::Unfreeze(); - if (TRecIter::RecNum) - NextPage(TPageIter::Current()); - int ret = 0; - if (Index0.size() && !(ret = TRecIter::GotoPage(0))) { - typename std::vector<TGzVal>::iterator it, end = Index0.end(); - for (it = Index0.begin(); it != end; ++it) - TRecIter::Push(&*it); - ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError(); - } - Index0.clear(); - int ret1 = TDatFile::Close(); - return ret ? ret : ret1; - } - -protected: - int TotalRecNum; // should be enough because we have GotoPage(int) - std::vector<TGzVal> Index0; - - void NextPage(const TDatPage* page) { - TGzVal* rec = (TGzVal*)((char*)page + sizeof(TDatPage)); - Index0.push_back(TGzVal(TotalRecNum, rec->Key)); - TotalRecNum += TRecIter::RecNum; - } - - static void DispatchCallback(void* This, const TDatPage* page) { - ((TMyType*)This)->NextPage(page); - } -}; - -template <class TVal, class TKey, class TPageFile = TCompressedOutputPageFile> -class TOutDirectCompressedFileImpl: public TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>> { - typedef TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>> - TDatFile; - typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TMyType; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TIndexer TIndexer; - typedef TGzKey<TKey> TMyKey; - typedef TOutZIndexFile<TKey> TKeyFile; - -protected: - using TDatFile::Tell; - -public: - TOutDirectCompressedFileImpl() { - TIndexer::SetCallback(this, DispatchCallback); - } - - int Open(const char* fname, size_t pagesize, size_t ipagesize = 0) { - char iname[FILENAME_MAX]; - int ret; - if (ipagesize == 0) - ipagesize = pagesize; - - ret = TDatFile::Open(fname, pagesize, 1, 1); - ret = ret ? ret : DatNameToIdx(iname, fname); - ret = ret ? ret : KeyFile.Open(iname, ipagesize, 1, 1); - if (ret) - TDatFile::Close(); - return ret; - } - - int Close() { - if (TRecIter::RecNum) - NextPage(TPageIter::Current()); - int ret = KeyFile.Close(); - int ret1 = TDatFile::Close(); - return ret1 ? ret1 : ret; - } - - int GetError() const { - return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError(); - } - -protected: - TKeyFile KeyFile; - - void NextPage(const TDatPage* page) { - size_t sz = SizeOf((TMyKey*)NULL); - TMyKey* rec = KeyFile.Reserve(sz ? sz : MaxSizeOf<TMyKey>()); - if (rec) { - rec->Offset = Tell(); - rec->Key = *(TVal*)((char*)page + sizeof(TDatPage)); - KeyFile.ResetDat(); - } - } - - static void DispatchCallback(void* This, const TDatPage* page) { - ((TMyType*)This)->NextPage(page); - } -}; - -template <class TKey> -int TCompressedIndexedInputPageFile<TKey>::GotoPage(int pageno) { - if (Error) - return Error; - - Eof = 0; - - i64 offset = KeyFile.FindPage(pageno); - if (!offset) - return Error = MBDB_BAD_FILE_SIZE; - - if (offset != FileManip.RealSeek(offset, SEEK_SET)) - Error = MBDB_BAD_FILE_SIZE; - - return Error; -} - -template <typename TVal> -class TCompressedInDatFile: public TInDatFile<TVal, TCompressedInputPageFile> { -public: - TCompressedInDatFile(const char* name, size_t pages, int pagesOrBytes = 1) - : TInDatFile<TVal, TCompressedInputPageFile>(name, pages, pagesOrBytes) - { - } -}; - -template <typename TVal> -class TCompressedOutDatFile: public TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile> { -public: - TCompressedOutDatFile(const char* name, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile>(name, pagesize, pages, pagesOrBytes) - { - } -}; - -template <typename TVal, typename TKey, typename TPageFile = TCompressedOutputPageFile> -class TOutDirectCompressedFile: protected TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> { - typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TBase; - -public: - TOutDirectCompressedFile(const char* name, size_t pagesize, size_t ipagesize = 0) - : Name(strdup(name)) - , PageSize(pagesize) - , IdxPageSize(ipagesize) - { - } - - ~TOutDirectCompressedFile() { - Close(); - free(Name); - Name = NULL; - } - - void Open(const char* fname) { - int ret = TBase::Open(fname, PageSize, IdxPageSize); - if (ret) - ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); - free(Name); - Name = strdup(fname); - } - - void Close() { - int ret; - if ((ret = TBase::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); - if ((ret = TBase::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); - } - - const char* GetName() const { - return Name; - } - - using TBase::Freeze; - using TBase::Push; - using TBase::Reserve; - using TBase::Unfreeze; - -protected: - char* Name; - size_t PageSize, IdxPageSize; -}; - -class TCompressedInterFileTypes { -public: - typedef TCompressedBufferedOutputPageFile TOutPageFile; - typedef TCompressedBufferedInputPageFile TInPageFile; -}; diff --git a/library/cpp/microbdb/extinfo.h b/library/cpp/microbdb/extinfo.h deleted file mode 100644 index c8389e783c..0000000000 --- a/library/cpp/microbdb/extinfo.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include "header.h" - -#include <library/cpp/packedtypes/longs.h> - -#include <util/generic/typetraits.h> - -#include <library/cpp/microbdb/noextinfo.pb.h> - -inline bool operator<(const TNoExtInfo&, const TNoExtInfo&) { - return false; -} - -namespace NMicroBDB { - Y_HAS_MEMBER(TExtInfo); - - template <class, bool> - struct TSelectExtInfo; - - template <class T> - struct TSelectExtInfo<T, false> { - typedef TNoExtInfo TExtInfo; - }; - - template <class T> - struct TSelectExtInfo<T, true> { - typedef typename T::TExtInfo TExtInfo; - }; - - template <class T> - class TExtInfoType { - public: - static const bool Exists = THasTExtInfo<T>::value; - typedef typename TSelectExtInfo<T, Exists>::TExtInfo TResult; - }; - - Y_HAS_MEMBER(MakeExtKey); - - template <class, class, bool> - struct TSelectMakeExtKey; - - template <class TVal, class TKey> - struct TSelectMakeExtKey<TVal, TKey, false> { - static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult*, const TVal* from, const typename TExtInfoType<TVal>::TResult*) { - *to = *from; - } - }; - - template <class TVal, class TKey> - struct TSelectMakeExtKey<TVal, TKey, true> { - static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) { - TVal::MakeExtKey(to, toExt, from, fromExt); - } - }; - - template <typename T> - inline size_t SizeOfExt(const T* rec, size_t* /*out*/ extLenSize = nullptr, size_t* /*out*/ extSize = nullptr) { - if (!TExtInfoType<T>::Exists) { - if (extLenSize) - *extLenSize = 0; - if (extSize) - *extSize = 0; - return SizeOf(rec); - } else { - size_t sz = SizeOf(rec); - i64 l; - int els = in_long(l, (const char*)rec + sz); - if (extLenSize) - *extLenSize = static_cast<size_t>(els); - if (extSize) - *extSize = static_cast<size_t>(l); - return sz; - } - } - - template <class T> - bool GetExtInfo(const T* rec, typename TExtInfoType<T>::TResult* extInfo) { - Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records"); - if (!rec) - return false; - size_t els; - size_t es; - size_t s = SizeOfExt(rec, &els, &es); - const ui8* raw = (const ui8*)rec + s + els; - return extInfo->ParseFromArray(raw, es); - } - - template <class T> - const ui8* GetExtInfoRaw(const T* rec, size_t* len) { - Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records"); - if (!rec) { - *len = 0; - return nullptr; - } - size_t els; - size_t es; - size_t s = SizeOfExt(rec, &els, &es); - *len = els + es; - return (const ui8*)rec + s; - } - - // Compares serialized extInfo (e.g. for stable sort) - template <class T> - int CompareExtInfo(const T* a, const T* b) { - Y_VERIFY(TExtInfoType<T>::Exists, "CompareExtInfo should only be used with extended records"); - size_t elsA, esA; - size_t elsB, esB; - SizeOfExt(a, &elsA, &esA); - SizeOfExt(a, &elsB, &esB); - if (esA != esB) - return esA - esB; - else - return memcmp((const ui8*)a + elsA, (const ui8*)b + elsB, esA); - } - -} - -using NMicroBDB::TExtInfoType; - -template <class TVal, class TKey> -struct TMakeExtKey { - static const bool Exists = NMicroBDB::THasMakeExtKey<TVal>::value; - static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) { - NMicroBDB::TSelectMakeExtKey<TVal, TKey, Exists>::Make(to, toExt, from, fromExt); - } -}; diff --git a/library/cpp/microbdb/file.cpp b/library/cpp/microbdb/file.cpp deleted file mode 100644 index 599a7301a0..0000000000 --- a/library/cpp/microbdb/file.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include "file.h" - -#include <fcntl.h> -#include <errno.h> -#include <sys/stat.h> - -#ifdef _win32_ -#define S_ISREG(x) !!(x & S_IFREG) -#endif - -TFileManipBase::TFileManipBase() - : FileBased(true) -{ -} - -i64 TFileManipBase::DoSeek(i64 offset, int whence, bool isStreamOpen) { - if (!isStreamOpen) - return -1; - VerifyRandomAccess(); - return File.Seek(offset, (SeekDir)whence); -} - -int TFileManipBase::DoFileOpen(const TFile& file) { - File = file; - SetFileBased(IsFileBased()); - return (File.IsOpen()) ? 0 : MBDB_OPEN_ERROR; -} - -int TFileManipBase::DoFileClose() { - if (File.IsOpen()) { - File.Close(); - return MBDB_ALREADY_INITIALIZED; - } - return 0; -} - -int TFileManipBase::IsFileBased() const { - bool fileBased = true; -#if defined(_win_) -#elif defined(_unix_) - FHANDLE h = File.GetHandle(); - struct stat sb; - fileBased = false; - if (h != INVALID_FHANDLE && !::fstat(h, &sb) && S_ISREG(sb.st_mode)) { - fileBased = true; - } -#else -#error -#endif - return fileBased; -} - -TInputFileManip::TInputFileManip() - : InputStream(nullptr) -{ -} - -int TInputFileManip::Open(const char* fname, bool direct) { - int ret; - return (ret = DoClose()) ? ret : DoStreamOpen(TFile(fname, RdOnly | (direct ? DirectAligned : EOpenMode()))); -} - -int TInputFileManip::Open(IInputStream& input) { - int ret; - return (ret = DoClose()) ? ret : DoStreamOpen(&input); -} - -int TInputFileManip::Open(TAutoPtr<IInputStream> input) { - int ret; - return (ret = DoClose()) ? ret : DoStreamOpen(input.Release()); -} - -int TInputFileManip::Init(const TFile& file) { - int ret; - if (ret = DoClose()) - return ret; - DoStreamOpen(file); - return 0; -} - -int TInputFileManip::Close() { - DoClose(); - return 0; -} - -ssize_t TInputFileManip::Read(void* buf, unsigned len) { - if (!IsStreamOpen()) - return -1; - return InputStream->Load(buf, len); -} - -IInputStream* TInputFileManip::CreateStream(const TFile& file) { - return new TUnbufferedFileInput(file); -} - -TMappedInputPageFile::TMappedInputPageFile() - : Pagesize(0) - , Error(0) - , Pagenum(0) - , Recordsig(0) - , Open(false) -{ - Term(); -} - -TMappedInputPageFile::~TMappedInputPageFile() { - Term(); -} - -int TMappedInputPageFile::Init(const char* fname, ui32 recsig, ui32* gotRecordSig, bool) { - Mappedfile.init(fname); - Open = true; - - TDatMetaPage* meta = (TDatMetaPage*)Mappedfile.getData(); - if (gotRecordSig) - *gotRecordSig = meta->RecordSig; - - if (meta->MetaSig != METASIG) - Error = MBDB_BAD_METAPAGE; - else if (meta->RecordSig != recsig) - Error = MBDB_BAD_RECORDSIG; - - if (Error) { - Mappedfile.term(); - return Error; - } - - size_t fsize = Mappedfile.getSize(); - if (fsize < METASIZE) - return Error = MBDB_BAD_FILE_SIZE; - fsize -= METASIZE; - if (fsize % meta->PageSize) - return Error = MBDB_BAD_FILE_SIZE; - Pagenum = (int)(fsize / meta->PageSize); - Pagesize = meta->PageSize; - Recordsig = meta->RecordSig; - Error = 0; - return Error; -} - -int TMappedInputPageFile::Term() { - Mappedfile.term(); - Open = false; - return 0; -} - -TOutputFileManip::TOutputFileManip() - : OutputStream(nullptr) -{ -} - -int TOutputFileManip::Open(const char* fname, EOpenMode mode) { - if (IsStreamOpen()) { - return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip - } - - try { - if (unlink(fname) && errno != ENOENT) { - if (strncmp(fname, "/dev/std", 8)) - return MBDB_OPEN_ERROR; - } - TFile file(fname, mode); - DoStreamOpen(file); - } catch (const TFileError&) { - return MBDB_OPEN_ERROR; - } - return 0; -} - -int TOutputFileManip::Open(IOutputStream& output) { - if (IsStreamOpen()) - return MBDB_ALREADY_INITIALIZED; - DoStreamOpen(&output); - return 0; -} - -int TOutputFileManip::Open(TAutoPtr<IOutputStream> output) { - if (IsStreamOpen()) - return MBDB_ALREADY_INITIALIZED; - DoStreamOpen(output.Release()); - return 0; -} - -int TOutputFileManip::Init(const TFile& file) { - if (IsStreamOpen()) - return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip - DoStreamOpen(file); - return 0; -} - -int TOutputFileManip::Rotate(const char* newfname) { - if (!IsStreamOpen()) { - return MBDB_NOT_INITIALIZED; - } - - try { - TFile file(newfname, WrOnly | OpenAlways | TruncExisting | ARW | AWOther); - DoClose(); - DoStreamOpen(file); - } catch (const TFileError&) { - return MBDB_OPEN_ERROR; - } - return 0; -} - -int TOutputFileManip::Close() { - DoClose(); - return 0; -} - -int TOutputFileManip::Write(const void* buf, unsigned len) { - if (!IsStreamOpen()) - return -1; - OutputStream->Write(buf, len); - return len; -} - -IOutputStream* TOutputFileManip::CreateStream(const TFile& file) { - return new TUnbufferedFileOutput(file); -} diff --git a/library/cpp/microbdb/file.h b/library/cpp/microbdb/file.h deleted file mode 100644 index f7c7818375..0000000000 --- a/library/cpp/microbdb/file.h +++ /dev/null @@ -1,225 +0,0 @@ -#pragma once - -#include "header.h" - -#include <library/cpp/deprecated/mapped_file/mapped_file.h> - -#include <util/generic/noncopyable.h> -#include <util/stream/file.h> -#include <util/system/filemap.h> - -#define FS_BLOCK_SIZE 512 - -class TFileManipBase { -protected: - TFileManipBase(); - - virtual ~TFileManipBase() { - } - - i64 DoSeek(i64 offset, int whence, bool isStreamOpen); - - int DoFileOpen(const TFile& file); - - int DoFileClose(); - - int IsFileBased() const; - - inline void SetFileBased(bool fileBased) { - FileBased = fileBased; - } - - inline i64 DoGetPosition() const { - Y_ASSERT(FileBased); - return File.GetPosition(); - } - - inline i64 DoGetLength() const { - return (FileBased) ? File.GetLength() : -1; - } - - inline void VerifyRandomAccess() const { - Y_VERIFY(FileBased, "non-file stream can not be accessed randomly"); - } - - inline i64 GetPosition() const { - return (i64)File.GetPosition(); - } - -private: - TFile File; - bool FileBased; -}; - -class TInputFileManip: public TFileManipBase { -public: - using TFileManipBase::GetPosition; - - TInputFileManip(); - - int Open(const char* fname, bool direct = false); - - int Open(IInputStream& input); - - int Open(TAutoPtr<IInputStream> input); - - int Init(const TFile& file); - - int Close(); - - ssize_t Read(void* buf, unsigned len); - - inline bool IsOpen() const { - return IsStreamOpen(); - } - - inline i64 GetLength() const { - return DoGetLength(); - } - - inline i64 Seek(i64 offset, int whence) { - return DoSeek(offset, whence, IsStreamOpen()); - } - - inline i64 RealSeek(i64 offset, int whence) { - return Seek(offset, whence); - } - -protected: - inline bool IsStreamOpen() const { - return !!InputStream; - } - - inline int DoStreamOpen(IInputStream* input, bool fileBased = false) { - InputStream.Reset(input); - SetFileBased(fileBased); - return 0; - } - - inline int DoStreamOpen(const TFile& file) { - int ret; - return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), IsFileBased()); - } - - virtual IInputStream* CreateStream(const TFile& file); - - inline bool DoClose() { - if (IsStreamOpen()) { - InputStream.Destroy(); - return DoFileClose(); - } - return 0; - } - - THolder<IInputStream> InputStream; -}; - -class TMappedInputPageFile: private TNonCopyable { -public: - TMappedInputPageFile(); - - ~TMappedInputPageFile(); - - inline int GetError() const { - return Error; - } - - inline size_t GetPageSize() const { - return Pagesize; - } - - inline int GetLastPage() const { - return Pagenum; - } - - inline ui32 GetRecordSig() const { - return Recordsig; - } - - inline bool IsOpen() const { - return Open; - } - - inline char* GetData() const { - return Open ? (char*)Mappedfile.getData() : nullptr; - } - - inline size_t GetSize() const { - return Open ? Mappedfile.getSize() : 0; - } - -protected: - int Init(const char* fname, ui32 recsig, ui32* gotRecordSig = nullptr, bool direct = false); - - int Term(); - - TMappedFile Mappedfile; - size_t Pagesize; - int Error; - int Pagenum; - ui32 Recordsig; - bool Open; -}; - -class TOutputFileManip: public TFileManipBase { -public: - TOutputFileManip(); - - int Open(const char* fname, EOpenMode mode = WrOnly | CreateAlways | ARW | AWOther); - - int Open(IOutputStream& output); - - int Open(TAutoPtr<IOutputStream> output); - - int Init(const TFile& file); - - int Rotate(const char* newfname); - - int Write(const void* buf, unsigned len); - - int Close(); - - inline bool IsOpen() const { - return IsStreamOpen(); - } - - inline i64 GetLength() const { - return DoGetLength(); - } - - inline i64 Seek(i64 offset, int whence) { - return DoSeek(offset, whence, IsStreamOpen()); - } - - inline i64 RealSeek(i64 offset, int whence) { - return Seek(offset, whence); - } - -protected: - inline bool IsStreamOpen() const { - return !!OutputStream; - } - - inline int DoStreamOpen(IOutputStream* output, bool fileBased = false) { - OutputStream.Reset(output); - SetFileBased(fileBased); - return 0; - } - - inline int DoStreamOpen(const TFile& file) { - int ret; - return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), true); - } - - virtual IOutputStream* CreateStream(const TFile& file); - - inline bool DoClose() { - if (IsStreamOpen()) { - OutputStream.Destroy(); - return DoFileClose(); - } - return 0; - } - - THolder<IOutputStream> OutputStream; -}; diff --git a/library/cpp/microbdb/hashes.h b/library/cpp/microbdb/hashes.h deleted file mode 100644 index bfd113c3ba..0000000000 --- a/library/cpp/microbdb/hashes.h +++ /dev/null @@ -1,250 +0,0 @@ -#pragma once - -#include <library/cpp/on_disk/st_hash/static_hash.h> -#include <util/system/sysstat.h> -#include <util/stream/mem.h> -#include <util/string/printf.h> -#include <library/cpp/deprecated/fgood/fgood.h> - -#include "safeopen.h" - -/** This file currently implements creation of mappable read-only hash file. - Basic usage of these "static hashes" is defined in util/static_hash.h (see docs there). - Additional useful wrappers are available in util/static_hash_map.h - - There are two ways to create mappable hash file: - - A) Fill an THashMap/set structure in RAM, then dump it to disk. - This is usually done by save_hash_to_file* functions defined in static_hash.h - (see description in static_hash.h). - - B) Prepare all data using external sorter, then create hash file straight on disk. - This approach is necessary when there isn't enough RAM to hold entire original THashMap. - Implemented in this file as TStaticHashBuilder class. - - Current implementation's major drawback is that the size of the hash must be estimated - before the hash is built (bucketCount), which is not always possible. - Separate implementation with two sort passes is yet to be done. - - Another problem is that maximum stored size of the element (maxRecSize) must also be - known in advance, because we use TDatSorterMemo, etc. - */ - -template <class SizeType> -struct TSthashTmpRec { - SizeType HashVal; - SizeType RecSize; - char Buf[1]; - size_t SizeOf() const { - return &Buf[RecSize] - (char*)this; - } - bool operator<(const TSthashTmpRec& than) const { - return HashVal < than.HashVal; - } - static const ui32 RecordSig = 20100124 + sizeof(SizeType) - 4; -}; - -template <typename T> -struct TReplaceMerger { - T operator()(const T& oldRecord, const T& newRecord) const { - Y_UNUSED(oldRecord); - return newRecord; - } -}; - -/** TStaticHashBuilder template parameters: - HashType - THashMap map/set type for which we construct corresponding mappable hash; - SizeType - type used to store offsets and length in resulting hash; - MergerType - type of object to process records with equal key (see TReplaceMerger for example); - */ - -template <class HashType, class SizeType, class MergerType = TReplaceMerger<typename HashType::mapped_type>> -struct TStaticHashBuilder { - const size_t SrtIOPageSz; - const size_t WrBufSz; - typedef TSthashTmpRec<SizeType> TIoRec; - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, SizeType> TKeySaver; - typedef typename HashType::value_type TValueType; - typedef typename HashType::mapped_type TMappedType; - typedef typename HashType::key_type TKeyType; - - TDatSorterMemo<TIoRec, TCompareByLess> Srt; - TBuffer IoRec, CurrentBlockRecs; - TKeySaver KeySaver; - typename HashType::hasher Hasher; - typename HashType::key_equal Equals; - MergerType merger; - TString HashFileName; - TString OurTmpDir; - size_t BucketCount; - int FreeBits; - - // memSz is the Sorter buffer size; - // maxRecSize is the maximum size (as reported by size_for_st) of our record(s) - TStaticHashBuilder(size_t memSz, size_t maxRecSize) - : SrtIOPageSz((maxRecSize * 16 + 65535) & ~size_t(65535)) - , WrBufSz(memSz / 16 >= SrtIOPageSz ? memSz / 16 : SrtIOPageSz) - , Srt("unused", memSz, SrtIOPageSz, WrBufSz, 0) - , IoRec(sizeof(TIoRec) + maxRecSize) - , CurrentBlockRecs(sizeof(TIoRec) + maxRecSize) - , BucketCount(0) - , FreeBits(0) - { - } - - ~TStaticHashBuilder() { - Close(); - } - - // if tmpDir is supplied, it must exist; - // bucketCount should be HashBucketCount() of the (estimated) element count - void Open(const char* fname, size_t bucketCount, const char* tmpDir = nullptr) { - if (!tmpDir) - tmpDir = ~(OurTmpDir = Sprintf("%s.temp", fname)); - Mkdir(tmpDir, MODE0775); - Srt.Open(tmpDir); - HashFileName = fname; - BucketCount = bucketCount; - int bitCount = 0; - while (((size_t)1 << bitCount) <= BucketCount && bitCount < int(8 * sizeof(size_t))) - ++bitCount; - FreeBits = 8 * sizeof(size_t) - bitCount; - } - - void Push(const TValueType& rec) { - TIoRec* ioRec = MakeIoRec(rec); - Srt.Push(ioRec); - } - TIoRec* MakeIoRec(const TValueType& rec) { - TIoRec* ioRec = (TIoRec*)IoRec.Data(); - size_t mask = (1 << FreeBits) - 1; - size_t hash = Hasher(rec.first); - ioRec->HashVal = ((hash % BucketCount) << FreeBits) + ((hash / BucketCount) & mask); - - TMemoryOutput output(ioRec->Buf, IoRec.Capacity() - offsetof(TIoRec, Buf)); - KeySaver.SaveRecord(&output, rec); - ioRec->RecSize = output.Buf() - ioRec->Buf; - return ioRec; - } - - bool Merge(TVector<std::pair<TKeyType, TMappedType>>& records, size_t newRecordSize) { - TSthashIterator<const TKeyType, const TMappedType, typename HashType::hasher, - typename HashType::key_equal> - newPtr(CurrentBlockRecs.End() - newRecordSize); - for (size_t i = 0; i < records.size(); ++i) { - if (newPtr.KeyEquals(Equals, records[i].first)) { - TMappedType oldValue = records[i].second; - TMappedType newValue = newPtr.Value(); - newValue = merger(oldValue, newValue); - records[i].second = newValue; - return true; - } - } - records.push_back(std::make_pair(newPtr.Key(), newPtr.Value())); - return false; - } - - void PutRecord(const char* buf, size_t rec_size, TFILEPtr& f, SizeType& cur_off) { - f.fsput(buf, rec_size); - cur_off += rec_size; - } - - void Finish() { - Srt.Sort(); - // We use variant 1. - // Variant 1: read sorter once, write records, fseeks to write buckets - // (this doesn't allow fname to be stdout) - // Variant 2: read sorter (probably temp. file) twice: write buckets, then write records - // (this allows fname to be stdout but seems to be longer) - TFILEPtr f(HashFileName, "wb"); - setvbuf(f, nullptr, _IOFBF, WrBufSz); - TVector<SizeType> bucketsBuf(WrBufSz, 0); - // prepare header (note: this code must be unified with save_stl.h) - typedef sthashtable_nvm_sv<typename HashType::hasher, typename HashType::key_equal, SizeType> sv_type; - sv_type sv = {Hasher, Equals, BucketCount, 0, 0}; - // to do: m.b. use just the size of corresponding object? - SizeType cur_off = sizeof(sv_type) + - (sv.num_buckets + 1) * sizeof(SizeType); - SizeType bkt_wroff = sizeof(sv_type), bkt_bufpos = 0, prev_bkt = 0, prev_hash = (SizeType)-1; - bucketsBuf[bkt_bufpos++] = cur_off; - // if might me better to write many zeroes here - f.seek(cur_off, SEEK_SET); - TVector<std::pair<TKeyType, TMappedType>> currentBlock; - bool emptyFile = true; - size_t prevRecSize = 0; - // seek forward - while (true) { - const TIoRec* rec = Srt.Next(); - if (currentBlock.empty() && !emptyFile) { - if (rec && prev_hash == rec->HashVal) { - Merge(currentBlock, prevRecSize); - } else { - // if there is only one record with this hash, don't recode it, just write - PutRecord(CurrentBlockRecs.Data(), prevRecSize, f, cur_off); - sv.num_elements++; - } - } - if (!rec || prev_hash != rec->HashVal) { - // write buckets table - for (size_t i = 0; i < currentBlock.size(); ++i) { - TIoRec* ioRec = MakeIoRec(TValueType(currentBlock[i])); - PutRecord(ioRec->Buf, ioRec->RecSize, f, cur_off); - } - sv.num_elements += currentBlock.size(); - currentBlock.clear(); - CurrentBlockRecs.Clear(); - if (rec) { - prev_hash = rec->HashVal; - } - } - // note: prev_bkt's semantics here is 'cur_bkt - 1', thus we are actually cycling - // until cur_bkt == rec->HashVal *inclusively* - while (!rec || prev_bkt != (rec->HashVal >> FreeBits)) { - bucketsBuf[bkt_bufpos++] = cur_off; - if (bkt_bufpos == bucketsBuf.size()) { - f.seek(bkt_wroff, SEEK_SET); - size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]); - if (f.write(bucketsBuf.begin(), 1, sz) != sz) - throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName; - bkt_wroff += sz; - bkt_bufpos = 0; - f.seek(cur_off, SEEK_SET); - } - prev_bkt++; - if (!rec) { - break; - } - assert(prev_bkt < BucketCount); - } - if (!rec) { - break; - } - emptyFile = false; - CurrentBlockRecs.Append(rec->Buf, rec->RecSize); - if (!currentBlock.empty()) { - Merge(currentBlock, rec->RecSize); - } else { - prevRecSize = rec->RecSize; - } - } - // finish buckets table - f.seek(bkt_wroff, SEEK_SET); - size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]); - if (sz && f.write(bucketsBuf.begin(), 1, sz) != sz) - throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName; - bkt_wroff += sz; - for (; prev_bkt < BucketCount; prev_bkt++) - f.fput(cur_off); - // finally write header - sv.data_end_off = cur_off; - f.seek(0, SEEK_SET); - f.fput(sv); - f.close(); - } - - void Close() { - Srt.Close(); - if (+OurTmpDir) - rmdir(~OurTmpDir); - } -}; diff --git a/library/cpp/microbdb/header.cpp b/library/cpp/microbdb/header.cpp deleted file mode 100644 index f4511d6fb6..0000000000 --- a/library/cpp/microbdb/header.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "header.h" - -#include <util/stream/output.h> -#include <util/stream/format.h> - -TString ToString(EMbdbErrors error) { - TString ret; - switch (error) { - case MBDB_ALREADY_INITIALIZED: - ret = "already initialized"; - break; - case MBDB_NOT_INITIALIZED: - ret = "not initialized"; - break; - case MBDB_BAD_DESCRIPTOR: - ret = "bad descriptor"; - break; - case MBDB_OPEN_ERROR: - ret = "open error"; - break; - case MBDB_READ_ERROR: - ret = "read error"; - break; - case MBDB_WRITE_ERROR: - ret = "write error"; - break; - case MBDB_CLOSE_ERROR: - ret = "close error"; - break; - case MBDB_EXPECTED_EOF: - ret = "expected eof"; - break; - case MBDB_UNEXPECTED_EOF: - ret = "unxepected eof"; - break; - case MBDB_BAD_FILENAME: - ret = "bad filename"; - break; - case MBDB_BAD_METAPAGE: - ret = "bad metapage"; - break; - case MBDB_BAD_RECORDSIG: - ret = "bad recordsig"; - break; - case MBDB_BAD_FILE_SIZE: - ret = "bad file size"; - break; - case MBDB_BAD_PAGESIG: - ret = "bad pagesig"; - break; - case MBDB_BAD_PAGESIZE: - ret = "bad pagesize"; - break; - case MBDB_BAD_PARM: - ret = "bad parm"; - break; - case MBDB_BAD_SYNC: - ret = "bad sync"; - break; - case MBDB_PAGE_OVERFLOW: - ret = "page overflow"; - break; - case MBDB_NO_MEMORY: - ret = "no memory"; - break; - case MBDB_MEMORY_LEAK: - ret = "memory leak"; - break; - case MBDB_NOT_SUPPORTED: - ret = "not supported"; - break; - default: - ret = "unknown"; - break; - } - return ret; -} - -TString ErrorMessage(int error, const TString& text, const TString& path, ui32 recordSig, ui32 gotRecordSig) { - TStringStream str; - str << text; - if (path.size()) - str << " '" << path << "'"; - str << ": " << ToString(static_cast<EMbdbErrors>(error)); - if (recordSig && (!gotRecordSig || recordSig != gotRecordSig)) - str << ". Expected RecordSig: " << Hex(recordSig, HF_ADDX); - if (recordSig && gotRecordSig && recordSig != gotRecordSig) - str << ", got: " << Hex(gotRecordSig, HF_ADDX); - str << ". Last system error text: " << LastSystemErrorText(); - return str.Str(); -} diff --git a/library/cpp/microbdb/header.h b/library/cpp/microbdb/header.h deleted file mode 100644 index 0951d610ea..0000000000 --- a/library/cpp/microbdb/header.h +++ /dev/null @@ -1,159 +0,0 @@ -#pragma once - -#include <util/system/defaults.h> -#include <util/generic/typetraits.h> -#include <util/generic/string.h> -#include <util/str_stl.h> - -#include <stdio.h> - -#define METASIZE (1u << 12) -#define METASIG 0x12345678u -#define PAGESIG 0x87654321u - -enum EMbdbErrors { - MBDB_ALREADY_INITIALIZED = 200, - MBDB_NOT_INITIALIZED = 201, - MBDB_BAD_DESCRIPTOR = 202, - MBDB_OPEN_ERROR = 203, - MBDB_READ_ERROR = 204, - MBDB_WRITE_ERROR = 205, - MBDB_CLOSE_ERROR = 206, - MBDB_EXPECTED_EOF = 207, - MBDB_UNEXPECTED_EOF = 208, - MBDB_BAD_FILENAME = 209, - MBDB_BAD_METAPAGE = 210, - MBDB_BAD_RECORDSIG = 211, - MBDB_BAD_FILE_SIZE = 212, - MBDB_BAD_PAGESIG = 213, - MBDB_BAD_PAGESIZE = 214, - MBDB_BAD_PARM = 215, - MBDB_BAD_SYNC = 216, - MBDB_PAGE_OVERFLOW = 217, - MBDB_NO_MEMORY = 218, - MBDB_MEMORY_LEAK = 219, - MBDB_NOT_SUPPORTED = 220 -}; - -TString ToString(EMbdbErrors error); -TString ErrorMessage(int error, const TString& text, const TString& path = TString(), ui32 recordSig = 0, ui32 gotRecordSig = 0); - -enum EPageFormat { - MBDB_FORMAT_RAW = 0, - MBDB_FORMAT_COMPRESSED = 1, - MBDB_FORMAT_NULL = 255 -}; - -enum ECompressionAlgorithm { - MBDB_COMPRESSION_ZLIB = 1, - MBDB_COMPRESSION_FASTLZ = 2, - MBDB_COMPRESSION_SNAPPY = 3 -}; - -struct TDatMetaPage { - ui32 MetaSig; - ui32 RecordSig; - ui32 PageSize; -}; - -struct TDatPage { - ui32 RecNum; //!< number of records on this page - ui32 PageSig; - ui32 Format : 2; //!< one of EPageFormat - ui32 Reserved : 30; -}; - -/// Additional page header with compression info -struct TCompressedPage { - ui32 BlockCount; - ui32 Algorithm : 4; - ui32 Version : 4; - ui32 Reserved : 24; -}; - -namespace NMicroBDB { - /// Header of compressed block - struct TCompressedHeader { - ui32 Compressed; - ui32 Original; /// original size of block - ui32 Count; /// number of records in block - ui32 Reserved; - }; - - Y_HAS_MEMBER(AssertValid); - - template <typename T, bool TVal> - struct TAssertValid { - void operator()(const T*) { - } - }; - - template <typename T> - struct TAssertValid<T, true> { - void operator()(const T* rec) { - return rec->AssertValid(); - } - }; - - template <typename T> - void AssertValid(const T* rec) { - return NMicroBDB::TAssertValid<T, NMicroBDB::THasAssertValid<T>::value>()(rec); - } - - Y_HAS_MEMBER(SizeOf); - - template <typename T, bool TVal> - struct TGetSizeOf; - - template <typename T> - struct TGetSizeOf<T, true> { - size_t operator()(const T* rec) { - return rec->SizeOf(); - } - }; - - template <typename T> - struct TGetSizeOf<T, false> { - size_t operator()(const T*) { - return sizeof(T); - } - }; - - inline char* GetFirstRecord(const TDatPage* page) { - switch (page->Format) { - case MBDB_FORMAT_RAW: - return (char*)page + sizeof(TDatPage); - case MBDB_FORMAT_COMPRESSED: - // Первая запись на сжатой странице сохраняется несжатой - // сразу же после всех заголовков. - // Алгоритм сохранения смотреть в TOutputRecordIterator::FlushBuffer - return (char*)page + sizeof(TDatPage) + sizeof(TCompressedPage) + sizeof(NMicroBDB::TCompressedHeader); - } - return (char*)nullptr; - } -} - -template <typename T> -size_t SizeOf(const T* rec) { - return NMicroBDB::TGetSizeOf<T, NMicroBDB::THasSizeOf<T>::value>()(rec); -} - -template <typename T> -size_t MaxSizeOf() { - return sizeof(T); -} - -static inline int DatNameToIdx(char iname[/*FILENAME_MAX*/], const char* dname) { - if (!dname || !*dname) - return MBDB_BAD_FILENAME; - const char* ptr; - if (!(ptr = strrchr(dname, '/'))) - ptr = dname; - if (!(ptr = strrchr(ptr, '.'))) - ptr = strchr(dname, 0); - if (ptr - dname > FILENAME_MAX - 5) - return MBDB_BAD_FILENAME; - memcpy(iname, dname, ptr - dname); - strcpy(iname + (ptr - dname), ".idx"); - return 0; -} diff --git a/library/cpp/microbdb/heap.h b/library/cpp/microbdb/heap.h deleted file mode 100644 index ef5a53534c..0000000000 --- a/library/cpp/microbdb/heap.h +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once - -#include "header.h" -#include "extinfo.h" - -#include <util/generic/vector.h> - -#include <errno.h> - -/////////////////////////////////////////////////////////////////////////////// - -/// Default comparator -template <class TVal> -struct TCompareByLess { - inline bool operator()(const TVal* a, const TVal* b) const { - return TLess<TVal>()(*a, *b); - } -}; - -/////////////////////////////////////////////////////////////////////////////// - -template <class TVal, class TIterator, class TCompare = TCompareByLess<TVal>> -class THeapIter { -public: - int Init(TIterator** iters, int count) { - Term(); - if (!count) - return 0; - if (!(Heap = (TIterator**)malloc(count * sizeof(TIterator*)))) - return ENOMEM; - - Count = count; - count = 0; - while (count < Count) - if (count && !(*iters)->Next()) { //here first TIterator is NOT initialized! - Count--; - iters++; - } else { - Heap[count++] = *iters++; - } - count = Count / 2; - while (--count > 0) //Heap[0] is not changed! - Sift(count, Count); //do not try to replace this code by make_heap - return 0; - } - - int Init(TIterator* iters, int count) { - TVector<TIterator*> a(count); - for (int i = 0; i < count; ++i) - a[i] = &iters[i]; - return Init(&a[0], count); - } - - THeapIter() - : Heap(nullptr) - , Count(0) - { - } - - THeapIter(TIterator* a, TIterator* b) - : Heap(nullptr) - , Count(0) - { - TIterator* arr[] = {a, b}; - if (Init(arr, 2)) - ythrow yexception() << "can't Init THeapIter"; - } - - THeapIter(TVector<TIterator>& v) - : Heap(nullptr) - , Count(0) - { - if (Init(&v[0], v.size())) { - ythrow yexception() << "can't Init THeapIter"; - } - } - - ~THeapIter() { - Term(); - } - - inline const TVal* Current() const { - if (!Count) - return nullptr; - return (*Heap)->Current(); - } - - inline const TIterator* CurrentIter() const { - return *Heap; - } - - //for ends of last file will use Heap[0] = Heap[0] ! and - //returns Current of eof so Current of eof MUST return NULL - //possible this is bug and need fixing - const TVal* Next() { - if (!Count) - return nullptr; - if (!(*Heap)->Next()) //on first call unitialized first TIterator - *Heap = Heap[--Count]; //will be correctly initialized - - if (Count == 2) { - if (TCompare()(Heap[1]->Current(), Heap[0]->Current())) - DoSwap(Heap[1], Heap[0]); - } else - Sift(0, Count); - - return Current(); - } - - inline bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { - return (*Heap)->GetExtInfo(extInfo); - } - - inline const ui8* GetExtInfoRaw(size_t* len) const { - return (*Heap)->GetExtInfoRaw(len); - } - - void Term() { - ::free(Heap); - Heap = nullptr; - Count = 0; - } - -protected: - void Sift(int node, int end) { - TIterator* x = Heap[node]; - int son; - for (son = 2 * node + 1; son < end; node = son, son = 2 * node + 1) { - if (son < (end - 1) && TCompare()(Heap[son + 1]->Current(), Heap[son]->Current())) - son++; - if (TCompare()(Heap[son]->Current(), x->Current())) - Heap[node] = Heap[son]; - else - break; - } - Heap[node] = x; - } - - TIterator** Heap; - int Count; -}; - -/////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/microbdb/input.h b/library/cpp/microbdb/input.h deleted file mode 100644 index a214ba6e8a..0000000000 --- a/library/cpp/microbdb/input.h +++ /dev/null @@ -1,1027 +0,0 @@ -#pragma once - -#include "header.h" -#include "file.h" -#include "reader.h" - -#include <util/system/maxlen.h> -#include <util/system/event.h> -#include <util/system/thread.h> - -#include <thread> - -#include <sys/uio.h> - -#include <errno.h> - -template <class TFileManip> -inline ssize_t Readv(TFileManip& fileManip, const struct iovec* iov, int iovcnt) { - ssize_t read_count = 0; - for (int n = 0; n < iovcnt; n++) { - ssize_t last_read = fileManip.Read(iov[n].iov_base, iov[n].iov_len); - if (last_read < 0) - return -1; - read_count += last_read; - } - return read_count; -} - -template <class TVal, typename TBasePageIter> -class TInputRecordIterator: public TBasePageIter { - typedef THolder<NMicroBDB::IBasePageReader<TVal>> TReaderHolder; - -public: - typedef TBasePageIter TPageIter; - - TInputRecordIterator() { - Init(); - } - - ~TInputRecordIterator() { - Term(); - } - - const TVal* Current() const { - return Rec; - } - - bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { - if (!Rec) - return false; - return Reader->GetExtInfo(extInfo); - } - - const ui8* GetExtInfoRaw(size_t* len) const { - if (!Rec) - return nullptr; - return Reader->GetExtInfoRaw(len); - } - - size_t GetRecSize() const { - return Reader->GetRecSize(); - } - - size_t GetExtSize() const { - return Reader->GetExtSize(); - } - - const TVal* Next() { - if (RecNum) - --RecNum; - else { - TDatPage* page = TPageIter::Next(); - if (!page) { - if (TPageIter::IsFrozen() && Reader.Get()) - Reader->SetClearFlag(); - return Rec = nullptr; - } else if (!!SelectReader()) - return Rec = nullptr; - RecNum = TPageIter::Current()->RecNum - 1; - } - return Rec = Reader->Next(); - } - - // Skip(0) == Current(); Skip(1) == Next() - const TVal* Skip(int& num) { - // Y_ASSERT(num >= 0); ? otherwise it gets into infinite loop - while (num > RecNum) { - num -= RecNum + 1; - if (!TPageIter::Next() || !!SelectReader()) { - RecNum = 0; - return Rec = nullptr; - } - RecNum = TPageIter::Current()->RecNum - 1; - Rec = Reader->Next(); - } - ++num; - while (--num) - Next(); - return Rec; - } - - // begin reading from next page - void Reset() { - Rec = NULL; - RecNum = 0; - if (Reader.Get()) - Reader->Reset(); - } - -protected: - int Init() { - Rec = nullptr; - RecNum = 0; - Format = MBDB_FORMAT_NULL; - return 0; - } - - int Term() { - Reader.Reset(nullptr); - Format = MBDB_FORMAT_NULL; - Rec = nullptr; - RecNum = 0; - return 0; - } - - const TVal* GotoPage(int pageno) { - if (!TPageIter::GotoPage(pageno) || !!SelectReader()) - return Rec = nullptr; - RecNum = TPageIter::Current()->RecNum - 1; - return Rec = Reader->Next(); - } - - int SelectReader() { - if (!TPageIter::Current()) - return MBDB_UNEXPECTED_EOF; - if (ui32(Format) != TPageIter::Current()->Format) { - switch (TPageIter::Current()->Format) { - case MBDB_FORMAT_RAW: - Reader.Reset(new NMicroBDB::TRawPageReader<TVal, TPageIter>(this)); - break; - case MBDB_FORMAT_COMPRESSED: - Reader.Reset(new NMicroBDB::TCompressedReader<TVal, TPageIter>(this)); - break; - default: - return MBDB_NOT_SUPPORTED; - } - Format = EPageFormat(TPageIter::Current()->Format); - } else { - Y_ASSERT(Reader.Get() != nullptr); - Reader->Reset(); - } - return 0; - } - - const TVal* Rec; - TReaderHolder Reader; - int RecNum; //!< number of records on the current page after the current record - EPageFormat Format; -}; - -template <class TBaseReader> -class TInputPageIterator: public TBaseReader { -public: - typedef TBaseReader TReader; - - TInputPageIterator() - : Buf(nullptr) - { - Term(); - } - - ~TInputPageIterator() { - Term(); - } - - TDatPage* Current() { - return CurPage; - } - - int Freeze() { - return (Frozen = (PageNum == -1) ? 0 : PageNum); - } - - void Unfreeze() { - Frozen = -1; - } - - inline int IsFrozen() const { - return Frozen + 1; - } - - inline size_t GetPageSize() const { - return TReader::GetPageSize(); - } - - inline int GetPageNum() const { - return PageNum; - } - - inline int IsEof() const { - return Eof; - } - - TDatPage* Next() { - if (PageNum >= Maxpage && ReadBuf()) { - Eof = Eof ? Eof : TReader::IsEof(); - return CurPage = nullptr; - } - return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); - } - - TDatPage* GotoPage(int pageno) { - if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) { - PageNum = pageno; - return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); - } - if (IsFrozen() || TReader::GotoPage(pageno)) - return nullptr; - Maxpage = PageNum = pageno - 1; - Eof = 0; - return Next(); - } - -protected: - int Init(size_t pages, int pagesOrBytes) { - Term(); - if (pagesOrBytes == -1) - Bufpages = TReader::GetLastPage(); - else if (pagesOrBytes) - Bufpages = pages; - else - Bufpages = pages / GetPageSize(); - if (!TReader::GetLastPage()) { - Bufpages = 0; - assert(Eof == 1); - return 0; - } - int lastPage = TReader::GetLastPage(); - if (lastPage >= 0) - Bufpages = (int)Min(lastPage, Bufpages); - Bufpages = Max(2, Bufpages); - Eof = 0; - ABuf.Alloc(Bufpages * GetPageSize()); - return (Buf = ABuf.Begin()) ? 0 : ENOMEM; - // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM; - } - - int Term() { - // free(Buf); - ABuf.Dealloc(); - Buf = nullptr; - Maxpage = PageNum = Frozen = -1; - Bufpages = 0; - Pages = 0; - Eof = 1; - CurPage = nullptr; - return 0; - } - - int ReadBuf() { - int nvec; - iovec vec[2]; - int maxpage = (Frozen == -1 ? Maxpage + 1 : Frozen) + Bufpages - 1; - int minpage = Maxpage + 1; - if (maxpage < minpage) - return EAGAIN; - minpage %= Bufpages; - maxpage %= Bufpages; - if (maxpage < minpage) { - vec[0].iov_base = Buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (Bufpages - minpage); - vec[1].iov_base = Buf; - vec[1].iov_len = GetPageSize() * (maxpage + 1); - nvec = 2; - } else { - vec[0].iov_base = Buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); - nvec = 1; - } - TReader::ReadPages(vec, nvec, &Pages); - Maxpage += Pages; - return !Pages; - } - - int Maxpage, PageNum, Frozen, Bufpages, Eof, Pages; - TDatPage* CurPage; - // TMappedArray<char> ABuf; - TMappedAllocation ABuf; - char* Buf; -}; - -template <class TBaseReader> -class TInputPageIteratorMT: public TBaseReader { -public: - typedef TBaseReader TReader; - - TInputPageIteratorMT() - : CurBuf(0) - , CurReadBuf(0) - , Buf(nullptr) - { - Term(); - } - - ~TInputPageIteratorMT() { - Term(); - } - - TDatPage* Current() { - return CurPage; - } - - int Freeze() { - return (Frozen = (PageNum == -1) ? 0 : PageNum); - } - - void Unfreeze() { - Frozen = -1; - } - - inline int IsFrozen() const { - return Frozen + 1; - } - - inline size_t GetPageSize() const { - return TReader::GetPageSize(); - } - - inline int GetPageNum() const { - return PageNum; - } - - inline int IsEof() const { - return Eof; - } - - TDatPage* Next() { - if (Eof) - return CurPage = nullptr; - if (PageNum >= Maxpage && ReadBuf()) { - Eof = Eof ? Eof : TReader::IsEof(); - return CurPage = nullptr; - } - return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); - } - - TDatPage* GotoPage(int pageno) { - if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) { - PageNum = pageno; - return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); - } - if (IsFrozen() || TReader::GotoPage(pageno)) - return nullptr; - Maxpage = PageNum = pageno - 1; - Eof = 0; - return Next(); - } - - void ReadPages() { - // fprintf(stderr, "ReadPages started\n"); - bool eof = false; - while (!eof) { - QEvent[CurBuf].Wait(); - if (Finish) - return; - int pages = ReadCurBuf(Bufs[CurBuf]); - PagesM[CurBuf] = pages; - eof = !pages; - AEvent[CurBuf].Signal(); - CurBuf ^= 1; - } - } - -protected: - int Init(size_t pages, int pagesOrBytes) { - Term(); - if (pagesOrBytes == -1) - Bufpages = TReader::GetLastPage(); - else if (pagesOrBytes) - Bufpages = pages; - else - Bufpages = pages / GetPageSize(); - if (!TReader::GetLastPage()) { - Bufpages = 0; - assert(Eof == 1); - return 0; - } - int lastPage = TReader::GetLastPage(); - if (lastPage >= 0) - Bufpages = (int)Min(lastPage, Bufpages); - Bufpages = Max(2, Bufpages); - Eof = 0; - ABuf.Alloc(Bufpages * GetPageSize() * 2); - Bufs[0] = ABuf.Begin(); - Bufs[1] = Bufs[0] + Bufpages * GetPageSize(); - // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM; - Finish = false; - ReadThread = std::thread([this]() { - TThread::SetCurrentThreadName("DatReader"); - ReadPages(); - }); - QEvent[0].Signal(); - return Bufs[0] ? 0 : ENOMEM; - } - - void StopThread() { - Finish = true; - QEvent[0].Signal(); - QEvent[1].Signal(); - ReadThread.join(); - } - - int Term() { - // free(Buf); - if (ReadThread.joinable()) - StopThread(); - ABuf.Dealloc(); - Buf = nullptr; - Bufs[0] = nullptr; - Bufs[1] = nullptr; - Maxpage = MaxpageR = PageNum = Frozen = -1; - Bufpages = 0; - Pages = 0; - Eof = 1; - CurPage = nullptr; - return 0; - } - - int ReadCurBuf(char* buf) { - int nvec; - iovec vec[2]; - int maxpage = (Frozen == -1 ? MaxpageR + 1 : Frozen) + Bufpages - 1; - int minpage = MaxpageR + 1; - if (maxpage < minpage) - return EAGAIN; - minpage %= Bufpages; - maxpage %= Bufpages; - if (maxpage < minpage) { - vec[0].iov_base = buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (Bufpages - minpage); - vec[1].iov_base = buf; - vec[1].iov_len = GetPageSize() * (maxpage + 1); - nvec = 2; - } else { - vec[0].iov_base = buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); - nvec = 1; - } - int pages; - TReader::ReadPages(vec, nvec, &pages); - MaxpageR += pages; - return pages; - } - - int ReadBuf() { - QEvent[CurReadBuf ^ 1].Signal(); - AEvent[CurReadBuf].Wait(); - Buf = Bufs[CurReadBuf]; - Maxpage += (Pages = PagesM[CurReadBuf]); - CurReadBuf ^= 1; - return !Pages; - } - - int Maxpage, MaxpageR, PageNum, Frozen, Bufpages, Eof, Pages; - TDatPage* CurPage; - // TMappedArray<char> ABuf; - ui32 CurBuf; - ui32 CurReadBuf; - TMappedAllocation ABuf; - char* Buf; - char* Bufs[2]; - ui32 PagesM[2]; - TAutoEvent QEvent[2]; - TAutoEvent AEvent[2]; - std::thread ReadThread; - bool Finish; -}; - -template <typename TFileManip> -class TInputPageFileImpl: private TNonCopyable { -protected: - TFileManip FileManip; - -public: - TInputPageFileImpl() - : Pagesize(0) - , Fd(-1) - , Eof(1) - , Error(0) - , Pagenum(0) - , Recordsig(0) - { - Term(); - } - - ~TInputPageFileImpl() { - Term(); - } - - inline int IsEof() const { - return Eof; - } - - inline int GetError() const { - return Error; - } - - inline size_t GetPageSize() const { - return Pagesize; - } - - inline int GetLastPage() const { - return Pagenum; - } - - inline ui32 GetRecordSig() const { - return Recordsig; - } - - inline bool IsOpen() const { - return FileManip.IsOpen(); - } - -protected: - int Init(const char* fname, ui32 recsig, ui32* gotrecsig = nullptr, bool direct = false) { - Error = FileManip.Open(fname, direct); - return Error ? Error : Init(TFile(), recsig, gotrecsig); - } - - int Init(const TFile& file, ui32 recsig, ui32* gotrecsig = nullptr) { - if (!file.IsOpen() && !FileManip.IsOpen()) - return MBDB_NOT_INITIALIZED; - if (file.IsOpen() && FileManip.IsOpen()) - return MBDB_ALREADY_INITIALIZED; - if (file.IsOpen()) { - Error = FileManip.Init(file); - if (Error) - return Error; - } - - // TArrayHolder<ui8> buf(new ui8[METASIZE + FS_BLOCK_SIZE]); - // ui8* ptr = (buf.Get() + FS_BLOCK_SIZE - ((ui64)buf.Get() & (FS_BLOCK_SIZE - 1))); - TMappedArray<ui8> buf; - buf.Create(METASIZE); - ui8* ptr = &buf[0]; - TDatMetaPage* meta = (TDatMetaPage*)ptr; - ssize_t size = METASIZE; - ssize_t ret; - while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) { - Y_ASSERT(ret <= size); - size -= ret; - ptr += ret; - } - if (size) { - FileManip.Close(); - return Error = MBDB_BAD_METAPAGE; - } - if (gotrecsig) - *gotrecsig = meta->RecordSig; - return Init(TFile(), meta, recsig); - } - - int Init(TAutoPtr<IInputStream> input, ui32 recsig, ui32* gotrecsig = nullptr) { - if (!input && !FileManip.IsOpen()) - return MBDB_NOT_INITIALIZED; - if (FileManip.IsOpen()) - return MBDB_ALREADY_INITIALIZED; - - Error = FileManip.Open(input); - if (Error) - return Error; - - TArrayHolder<ui8> buf(new ui8[METASIZE]); - ui8* ptr = buf.Get(); - ssize_t size = METASIZE; - ssize_t ret; - while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) { - Y_ASSERT(ret <= size); - size -= ret; - ptr += ret; - } - if (size) { - FileManip.Close(); - return Error = MBDB_BAD_METAPAGE; - } - TDatMetaPage* meta = (TDatMetaPage*)buf.Get(); - if (gotrecsig) - *gotrecsig = meta->RecordSig; - return Init(TFile(), meta, recsig); - } - - int Init(const TFile& file, const TDatMetaPage* meta, ui32 recsig) { - if (!file.IsOpen() && !FileManip.IsOpen()) - return MBDB_NOT_INITIALIZED; - if (file.IsOpen() && FileManip.IsOpen()) - return MBDB_ALREADY_INITIALIZED; - if (file.IsOpen()) { - Error = FileManip.Init(file); - if (Error) - return Error; - } - - if (meta->MetaSig != METASIG) - Error = MBDB_BAD_METAPAGE; - else if (meta->RecordSig != recsig) - Error = MBDB_BAD_RECORDSIG; - - if (Error) { - FileManip.Close(); - return Error; - } - - i64 flength = FileManip.GetLength(); - if (flength >= 0) { - i64 fsize = flength; - fsize -= METASIZE; - if (fsize % meta->PageSize) - return Error = MBDB_BAD_FILE_SIZE; - Pagenum = (int)(fsize / meta->PageSize); - } else { - Pagenum = -1; - } - Pagesize = meta->PageSize; - Recordsig = meta->RecordSig; - Error = Eof = 0; - return Error; - } - - int ReadPages(iovec* vec, int nvec, int* pages) { - *pages = 0; - - if (Eof || Error) - return Error; - - ssize_t size = 0, delta = 0, total = 0; - iovec* pvec = vec; - int vsize = nvec; - - while (vsize && (size = Readv(FileManip, pvec, (int)Min(vsize, 16))) > 0) { - total += size; - if (delta) { - size += delta; - pvec->iov_len += delta; - pvec->iov_base = (char*)pvec->iov_base - delta; - delta = 0; - } - while (size) { - if ((size_t)size >= pvec->iov_len) { - size -= pvec->iov_len; - ++pvec; - --vsize; - } else { - delta = size; - pvec->iov_len -= size; - pvec->iov_base = (char*)pvec->iov_base + size; - size = 0; - } - } - } - if (delta) { - pvec->iov_len += delta; - pvec->iov_base = (char*)pvec->iov_base - delta; - } - if (size < 0) - return Error = errno ? errno : MBDB_READ_ERROR; - if (total % Pagesize) - return Error = MBDB_BAD_FILE_SIZE; - if (vsize) - Eof = 1; - *pages = total / Pagesize; // it would be better to assign it after the for-loops - for (; total; ++vec, total -= size) - for (size = 0; size < total && (size_t)size < vec->iov_len; size += Pagesize) - if (((TDatPage*)((char*)vec->iov_base + size))->PageSig != PAGESIG) - return Error = MBDB_BAD_PAGESIG; - return Error; - } - - int GotoPage(int page) { - if (Error) - return Error; - Eof = 0; - i64 offset = (i64)page * Pagesize + METASIZE; - if (offset != FileManip.Seek(offset, SEEK_SET)) - Error = MBDB_BAD_FILE_SIZE; - return Error; - } - - int Term() { - return FileManip.Close(); - } - - size_t Pagesize; - int Fd; - int Eof; - int Error; - int Pagenum; //!< number of pages in this file - ui32 Recordsig; -}; - -template <class TBaseReader> -class TMappedInputPageIterator: public TBaseReader { -public: - typedef TBaseReader TReader; - - TMappedInputPageIterator() { - Term(); - } - - ~TMappedInputPageIterator() { - Term(); - } - - TDatPage* Current() { - return CurPage; - } - - inline size_t GetPageSize() const { - return TReader::GetPageSize(); - } - - inline int GetPageNum() const { - return PageNum; - } - - inline int IsEof() const { - return Eof; - } - - inline int IsFrozen() const { - return 0; - } - - TDatPage* Next() { - i64 pos = (i64)(++PageNum) * GetPageSize() + METASIZE; - if (pos < 0 || pos >= (i64)TReader::GetSize()) { - Eof = 1; - return CurPage = nullptr; - } - return CurPage = (TDatPage*)((char*)TReader::GetData() + pos); - } - -protected: - int Init(size_t /*pages*/, int /*pagesOrBytes*/) { - Term(); - Eof = 0; - return 0; - } - - int Term() { - PageNum = -1; - Eof = 1; - CurPage = nullptr; - return 0; - } - - TDatPage* GotoPage(int pageno) { - PageNum = pageno - 1; - Eof = 0; - return Next(); - } - - int PageNum, Eof, Pages, Pagenum; - TDatPage* CurPage; -}; - -using TInputPageFile = TInputPageFileImpl<TInputFileManip>; - -template <class TVal, - typename TBaseRecIter = TInputRecordIterator<TVal, TInputPageIterator<TInputPageFile>>> -class TInDatFileImpl: public TBaseRecIter { -public: - typedef TBaseRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TPageIter::TReader TReader; - using TRecIter::GotoPage; - - int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr, bool direct = false) { - int ret = TReader::Init(fname, TVal::RecordSig, gotRecordSig, direct); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Open(const TFile& file, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { - int ret = TReader::Init(file, TVal::RecordSig, gotRecordSig); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Open(TAutoPtr<IInputStream> input, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { - int ret = TReader::Init(input, TVal::RecordSig, gotRecordSig); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Open(const TFile& file, const TDatMetaPage* meta, size_t pages = 1, int pagesOrBytes = 1) { - int ret = TReader::Init(file, meta, TVal::RecordSig); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Close() { - int ret1 = TRecIter::Term(); - int ret2 = TPageIter::Term(); - int ret3 = TReader::Term(); - return ret1 ? ret1 : ret2 ? ret2 : ret3; - } - - const TVal* GotoLastPage() { - return TReader::GetLastPage() <= 0 ? nullptr : TRecIter::GotoPage(TReader::GetLastPage() - 1); - } - -private: - int Open2(size_t pages, int pagesOrBytes) { - int ret = TPageIter::Init(pages, pagesOrBytes); - if (!ret) - ret = TRecIter::Init(); - if (ret) - Close(); - return ret; - } -}; - -template <class TVal> -class TInIndexFile: protected TInDatFileImpl<TVal> { - typedef TInDatFileImpl<TVal> TDatFile; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TExtInfoType<TVal>::TResult TExtInfo; - -public: - using TDatFile::IsOpen; - - TInIndexFile() - : Index0(nullptr) - { - } - - int Open(const char* fname, size_t pages = 2, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { - int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig); - if (ret) - return ret; - if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) { - TDatFile::Close(); - return MBDB_NO_MEMORY; - } - if (!TExtInfoType<TVal>::Exists && SizeOf((TVal*)nullptr)) - RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TVal*)nullptr)); - TDatFile::Next(); - memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize()); - return 0; - } - - int Close() { - free(Index0); - Index0 = nullptr; - return TDatFile::Close(); - } - - inline int GetError() const { - return TDatFile::GetError(); - } - - int FindKey(const TVal* akey, const TExtInfo* extInfo = nullptr) { - assert(IsOpen()); - if (TExtInfoType<TVal>::Exists || !SizeOf((TVal*)nullptr)) - return FindVszKey(akey, extInfo); - int num = FindKeyOnPage(Index0, akey); - TDatPage* page = TPageIter::GotoPage(num + 1); - if (!page) - return 0; - num = FindKeyOnPage(page, akey); - num += (TPageIter::GetPageNum() - 1) * RecsOnPage; - return num; - } - - int FindVszKey(const TVal* akey, const TExtInfo* extInfo = NULL) { - int num = FindVszKeyOnPage(Index0, akey, extInfo); - int num_add = 0; - for (int p = 0; p < num; p++) { - TDatPage* page = TPageIter::GotoPage(p + 1); - if (!page) - return 0; - num_add += page->RecNum; - } - TDatPage* page = TPageIter::GotoPage(num + 1); - if (!page) - return 0; - num = FindVszKeyOnPage(page, akey, extInfo); - num += num_add; - return num; - } - -protected: - int FindKeyOnPage(TDatPage* page, const TVal* key) { - int left = 0; - int right = page->RecNum - 1; - int recsize = DatCeil(SizeOf((TVal*)nullptr)); - while (left < right) { - int middle = (left + right) >> 1; - if (*((TVal*)((char*)page + sizeof(TDatPage) + middle * recsize)) < *key) - left = middle + 1; - else - right = middle; - } - //borders check (left and right) - return (left == 0 || *((TVal*)((char*)page + sizeof(TDatPage) + left * recsize)) < *key) ? left : left - 1; - } - - // will deserialize rawExtinfoA to extInfoA only if necessery - inline bool KeyLess_(const TVal* a, const TVal* b, - TExtInfo* extInfoA, const TExtInfo* extInfoB, - const ui8* rawExtInfoA, size_t rawLen) { - if (*a < *b) { - return true; - } else if (!extInfoB || *b < *a) { - return false; - } else { - // *a == *b && extInfoB - Y_PROTOBUF_SUPPRESS_NODISCARD extInfoA->ParseFromArray(rawExtInfoA, rawLen); - return (*extInfoA < *extInfoB); - } - } - - int FindVszKeyOnPage(TDatPage* page, const TVal* key, const TExtInfo* extInfo) { - TVal* cur = (TVal*)((char*)page + sizeof(TDatPage)); - ui32 recnum = page->RecNum; - if (!TExtInfoType<TVal>::Exists) { - for (; recnum > 0 && *cur < *key; --recnum) - cur = (TVal*)((char*)cur + DatCeil(SizeOf(cur))); - } else { - size_t ll; - size_t l; - size_t sz = NMicroBDB::SizeOfExt(cur, &ll, &l); - TExtInfo ei; - for (; recnum > 0 && KeyLess_(cur, key, &ei, extInfo, (ui8*)cur + sz + ll, l); --recnum) { - cur = (TVal*)((ui8*)cur + DatCeil(sz + ll + l)); - sz = NMicroBDB::SizeOfExt(cur, &ll, &l); - } - } - - int idx = page->RecNum - recnum - 1; - return (idx >= 0) ? idx : 0; - } - - TDatPage* Index0; - int RecsOnPage; -}; - -template <class TVal, class TKey, class TPageIterator = TInputPageIterator<TInputPageFile>> -class TKeyFileMixin: public TInDatFileImpl<TVal, TInputRecordIterator<TVal, TPageIterator>> { -protected: - TInIndexFile<TKey> KeyFile; -}; - -template <class TVal, class TKey, class TBase = TKeyFileMixin<TVal, TKey>> -class TDirectInDatFile: public TBase { - typedef TBase TDatFile; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TDatFile::TPageIter TPageIter; - -public: - void Open(const char* path, size_t pages = 1, size_t keypages = 1, int pagesOrBytes = 1) { - int ret; - ui32 gotRecordSig = 0; - - ret = TDatFile::Open(path, pages, pagesOrBytes, &gotRecordSig); - if (ret) { - ythrow yexception() << ErrorMessage(ret, "Failed to open input file", path, TVal::RecordSig, gotRecordSig); - } - char KeyName[PATH_MAX + 1]; - if (DatNameToIdx(KeyName, path)) { - ythrow yexception() << ErrorMessage(MBDB_BAD_FILENAME, "Failed to open input file", path); - } - gotRecordSig = 0; - ret = KeyFile.Open(KeyName, keypages, 1, &gotRecordSig); - if (ret) { - ythrow yexception() << ErrorMessage(ret, "Failed to open input keyfile", KeyName, TKey::RecordSig, gotRecordSig); - } - } - - void Close() { - int ret; - - if (TDatFile::IsOpen() && (ret = TDatFile::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing input file"); - if ((ret = TDatFile::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing input file"); - - if (KeyFile.IsOpen() && (ret = KeyFile.GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing input keyfile"); - if ((ret = KeyFile.Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing input keyfile"); - } - - const TVal* FindRecord(const TKey* key, const typename TExtInfoType<TKey>::TResult* extInfo = nullptr) { - int page = KeyFile.FindKey(key, extInfo); - const TVal* val = TRecIter::GotoPage(page); - if (!TExtInfoType<TVal>::Exists || !extInfo) { - TKey k; - while (val) { - TMakeExtKey<TVal, TKey>::Make(&k, nullptr, val, nullptr); - if (!(k < *key)) - break; - val = TRecIter::Next(); - } - } else { - typename TExtInfoType<TVal>::TResult valExt; - TKey k; - typename TExtInfoType<TKey>::TResult kExt; - while (val) { - TRecIter::GetExtInfo(&valExt); - TMakeExtKey<TVal, TKey>::Make(&k, &kExt, val, &valExt); - if (*key < k || !(k < *key) && !(kExt < *extInfo)) // k > *key || k == *key && kExt >= *extInfo - break; - val = TRecIter::Next(); - } - } - return val; - } - - int FindPagesNo(const TKey* key, const typename TExtInfoType<TVal>::TResult* extInfo = NULL) { - return KeyFile.FindKey(key, extInfo); - } - -protected: - using TBase::KeyFile; -}; diff --git a/library/cpp/microbdb/microbdb.cpp b/library/cpp/microbdb/microbdb.cpp deleted file mode 100644 index c10dbdf126..0000000000 --- a/library/cpp/microbdb/microbdb.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "microbdb.h" diff --git a/library/cpp/microbdb/microbdb.h b/library/cpp/microbdb/microbdb.h deleted file mode 100644 index 7521887337..0000000000 --- a/library/cpp/microbdb/microbdb.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include <util/folder/dirut.h> - -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4706) /*assignment within conditional expression*/ -#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/ -#endif - -#include "align.h" -#include "extinfo.h" -#include "header.h" -#include "reader.h" -#include "heap.h" -#include "file.h" -#include "sorter.h" -#include "input.h" -#include "output.h" -#include "sorterdef.h" - -inline int MakeSorterTempl(char path[/*FILENAME_MAX*/], const char* prefix) { - int ret = MakeTempDir(path, prefix); - if (!ret && strlcat(path, "%06d", FILENAME_MAX) > FILENAME_MAX - 100) - ret = EINVAL; - if (ret) - path[0] = 0; - return ret; -} - -inline int GetMeta(TFile& file, TDatMetaPage* meta) { - ui8 buf[METASIZE], *ptr = buf; - ssize_t size = sizeof(buf), ret; - while (size && (ret = file.Read(ptr, size)) > 0) { - size -= ret; - ptr += ret; - } - if (size) - return MBDB_BAD_FILE_SIZE; - ptr = buf; // gcc 4.4 warning fix - *meta = *(TDatMetaPage*)ptr; - return (meta->MetaSig == METASIG) ? 0 : MBDB_BAD_METAPAGE; -} - -template <class TRec> -inline bool IsDatFile(const char* fname) { - TDatMetaPage meta; - TFile f(fname, RdOnly); - return !GetMeta(f, &meta) && meta.RecordSig == TRec::RecordSig; -} - -#if defined(_MSC_VER) -#pragma warning(pop) -#endif diff --git a/library/cpp/microbdb/noextinfo.proto b/library/cpp/microbdb/noextinfo.proto deleted file mode 100644 index 6a78882e07..0000000000 --- a/library/cpp/microbdb/noextinfo.proto +++ /dev/null @@ -1,4 +0,0 @@ - -message TNoExtInfo { -} - diff --git a/library/cpp/microbdb/output.h b/library/cpp/microbdb/output.h deleted file mode 100644 index d0ecab2108..0000000000 --- a/library/cpp/microbdb/output.h +++ /dev/null @@ -1,1049 +0,0 @@ -#pragma once - -#include "header.h" -#include "file.h" - -#include <util/generic/buffer.h> -#include <util/memory/tempbuf.h> - -#include <sys/uio.h> - -template <class TFileManip> -inline ssize_t Writev(TFileManip& fileManip, const struct iovec* iov, int iovcnt) { - ssize_t written_count = 0; - for (int n = 0; n < iovcnt; n++) { - ssize_t last_write = fileManip.Write(iov[n].iov_base, iov[n].iov_len); - if (last_write < 0) - return -1; - written_count += last_write; - } - return written_count; -} - -//********************************************************************* -struct TFakeIndexer { - inline void NextPage(TDatPage*) noexcept { - } -}; - -struct TCallbackIndexer { - typedef void (*TCallback)(void* This, const TDatPage* page); - - TCallbackIndexer() { - Callback = nullptr; - } - - void SetCallback(void* t, TCallback c) { - This = t; - Callback = c; - } - - void NextPage(TDatPage* dat) { - Callback(This, dat); - } - - TCallback Callback; - void* This; -}; - -template <class TVal, typename TBasePageIter, typename TBaseIndexer = TFakeIndexer, typename TCompressor = TFakeCompression> -class TOutputRecordIterator; - -template <class TVal, typename TBasePageIter, typename TBaseIndexer> -class TOutputRecordIterator<TVal, TBasePageIter, TBaseIndexer, TFakeCompression> - : public TBasePageIter, public TBaseIndexer { -public: - enum EOffset { - WrongOffset = size_t(-1) - }; - - typedef TBasePageIter TPageIter; - typedef TBaseIndexer TIndexer; - - TOutputRecordIterator() { - Clear(); - } - - ~TOutputRecordIterator() { - Term(); - } - - inline const TVal* Current() const { - return Rec; - } - - const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { - NMicroBDB::AssertValid(v); - size_t len = SizeOf(v); - if (!TExtInfoType<TVal>::Exists) - return (Reserve(len)) ? (TVal*)memcpy(Rec, v, len) : nullptr; - else if (extInfo) { - size_t extSize = extInfo->ByteSize(); - size_t extLenSize = len_long((i64)extSize); - if (!Reserve(len + extLenSize + extSize)) - return nullptr; - memcpy(Rec, v, len); - out_long((i64)extSize, (char*)Rec + len); - extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize); - return Rec; - } else { - size_t extLenSize = len_long((i64)0); - if (!Reserve(len + extLenSize)) - return nullptr; - memcpy(Rec, v, len); - out_long((i64)0, (char*)Rec + len); - return Rec; - } - } - - const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { - NMicroBDB::AssertValid(v); - size_t sz = SizeOf(v); - if (!Reserve(sz + extLen)) - return nullptr; - memcpy(Rec, v, sz); - memcpy((ui8*)Rec + sz, extInfoRaw, extLen); - return Rec; - } - - // use values stored in microbdb readers/writers internal buffer only. - // method expects serialized extInfo after this record - const TVal* PushWithExtInfo(const TVal* v) { - NMicroBDB::AssertValid(v); - size_t extSize; - size_t extLenSize; - size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize); - sz += extLenSize + extSize; - if (!Reserve(sz)) - return nullptr; - memcpy(Rec, v, sz); - return Rec; - } - - TVal* Reserve(size_t len) { - if (CurLen + DatCeil(len) > TPageIter::GetPageSize()) { - if (sizeof(TDatPage) + DatCeil(len) > TPageIter::GetPageSize()) - return Rec = nullptr; - if (TPageIter::Current() && RecNum) { - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_RAW; - memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); - TIndexer::NextPage(TPageIter::Current()); - RecNum = 0; - } - if (!TPageIter::Next()) { - CurLen = TPageIter::GetPageSize(); - return Rec = nullptr; - } - CurLen = sizeof(TDatPage); - } - LenForOffset = CurLen; - Rec = (TVal*)((char*)TPageIter::Current() + CurLen); - DatSet(Rec, len); - - CurLen += DatCeil(len); - - ++RecNum; - return Rec; - } - - void Flush() { - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_RAW; - } - - size_t Offset() const { - return Rec ? TPageIter::Offset() + LenForOffset : WrongOffset; - } - - void ResetDat() { - CurLen = (char*)Rec - (char*)TPageIter::Current(); - size_t len; - if (!TExtInfoType<TVal>::Exists) { - len = SizeOf(Rec); - } else { - size_t ll; - size_t l; - len = NMicroBDB::SizeOfExt(Rec, &ll, &l); - len += ll + l; - } - CurLen += DatCeil(len); - } - -protected: - void Clear() { - Rec = nullptr; - RecNum = 0; - CurLen = 0; - LenForOffset = 0; - } - - int Init() { - Clear(); - CurLen = TPageIter::GetPageSize(); - return 0; - } - - int Term() { - if (TPageIter::Current()) { - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_RAW; - memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); - RecNum = 0; - } - int ret = !TPageIter::Current() && RecNum; - Clear(); - return ret; - } - - int GotoPage(int pageno) { - if (TPageIter::Current()) { - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_RAW; - memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); - } - int ret = TPageIter::GotoPage(pageno); - if (!ret) { - RecNum = 0; - CurLen = sizeof(TDatPage); - } - return ret; - } - - TVal* Rec; - int RecNum; - size_t CurLen; - size_t LenForOffset; -}; - -template <class TVal, typename TBasePageIter, typename TBaseIndexer, typename TAlgorithm> -class TOutputRecordIterator - : public TBasePageIter, - public TBaseIndexer, - private TAlgorithm { - class TPageBuffer { - public: - void Init(size_t page) { - Pos = 0; - RecNum = 0; - Size = Min(page / 2, size_t(64 << 10)); - Data.Reset(new ui8[Size]); - } - - void Clear() { - Pos = 0; - RecNum = 0; - } - - inline bool Empty() const { - return RecNum == 0; - } - - public: - size_t Size; - size_t Pos; - int RecNum; - TArrayHolder<ui8> Data; - }; - -public: - typedef TBasePageIter TPageIter; - typedef TBaseIndexer TIndexer; - - TOutputRecordIterator() - : Rec(nullptr) - , RecNum(0) - { - } - - ~TOutputRecordIterator() { - Term(); - } - - const TVal* Current() const { - return Rec; - } - - const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { - NMicroBDB::AssertValid(v); - size_t len = SizeOf(v); - if (!TExtInfoType<TVal>::Exists) - return (Reserve(len)) ? (TVal*)memcpy((TVal*)Rec, v, len) : nullptr; - else if (extInfo) { - size_t extSize = extInfo->ByteSize(); - size_t extLenSize = len_long((i64)extSize); - if (!Reserve(len + extLenSize + extSize)) - return nullptr; - memcpy(Rec, v, len); - out_long((i64)extSize, (char*)Rec + len); - extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize); - return Rec; - } else { - size_t extLenSize = len_long((i64)0); - if (!Reserve(len + extLenSize)) - return nullptr; - memcpy(Rec, v, len); - out_long((i64)0, (char*)Rec + len); - return Rec; - } - } - - const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { - NMicroBDB::AssertValid(v); - size_t sz = SizeOf(v); - if (!Reserve(sz + extLen)) - return NULL; - memcpy(Rec, v, sz); - memcpy((ui8*)Rec + sz, extInfoRaw, extLen); - return Rec; - } - - // use values stored in microbdb readers/writers internal buffer only. - // method expects serialized extInfo after this record - const TVal* PushWithExtInfo(const TVal* v) { - NMicroBDB::AssertValid(v); - size_t extSize; - size_t extLenSize; - size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize); - sz += extLenSize + extSize; - if (!Reserve(sz)) - return nullptr; - memcpy(Rec, v, sz); - return Rec; - } - - TVal* Reserve(const size_t len) { - const size_t aligned = DatCeil(len); - - if (!TPageIter::Current()) { // Allocate fist page - if (!TPageIter::Next()) { - CurLen = TPageIter::GetPageSize(); - return Rec = nullptr; - } - CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); - } - - if (Buffer.Pos + aligned > Buffer.Size) { - if (Buffer.Pos == 0) - return Rec = nullptr; - if (FlushBuffer()) - return Rec = nullptr; - if (Buffer.Pos + aligned + sizeof(TDatPage) + sizeof(TCompressedPage) > Buffer.Size) - return Rec = nullptr; - } - - Rec = (TVal*)((char*)Buffer.Data.Get() + Buffer.Pos); - DatSet(Rec, len); // len is correct because DatSet set align tail to zero - - Buffer.RecNum++; - Buffer.Pos += aligned; - ++RecNum; - return Rec; - } - - void Flush() { - if (!Buffer.Empty()) { - FlushBuffer(); - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; - } - } - - size_t Offset() const { - // According to vadya@ there is no evil to return 0 all the time - return 0; - } - - void ResetDat() { - Buffer.Pos = (char*)Rec - (char*)Buffer.Data.Get(); - size_t len = SizeOf(Rec); - Buffer.Pos += DatCeil(len); - } - -protected: - void Clear() { - RecNum = 0; - Rec = nullptr; - Count = 0; - CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); - Buffer.Clear(); - } - - int Init() { - Clear(); - Buffer.Init(TPageIter::GetPageSize()); - TAlgorithm::Init(); - return 0; - } - - int Term() { - if (TPageIter::Current()) - Commit(); - int ret = !TPageIter::Current() && RecNum; - Clear(); - TAlgorithm::Term(); - return ret; - } - - int GotoPage(int pageno) { - if (TPageIter::Current()) - Commit(); - int ret = TPageIter::GotoPage(pageno); - if (!ret) - Reset(); - return ret; - } - -private: - void Commit() { - Flush(); - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; - SetCompressedPageHeader(); - - memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); - RecNum = 0; - Count = 0; - } - - inline void SetCompressedPageHeader() { - TCompressedPage* const hdr = (TCompressedPage*)((ui8*)TPageIter::Current() + sizeof(TDatPage)); - - hdr->BlockCount = Count; - hdr->Algorithm = TAlgorithm::Code; - hdr->Version = 0; - hdr->Reserved = 0; - } - - inline void Reset() { - RecNum = 0; - CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); - Count = 0; - Buffer.Clear(); - } - - int FlushBuffer() { - TArrayHolder<ui8> data; - const ui8* const buf = Buffer.Data.Get(); - size_t first = 0; - - if (!TExtInfoType<TVal>::Exists) - first = DatCeil(SizeOf((TVal*)buf)); - else { - size_t ll; - size_t l; - first = NMicroBDB::SizeOfExt((const TVal*)buf, &ll, &l); - first = DatCeil(first + ll + l); - } - - size_t total = sizeof(NMicroBDB::TCompressedHeader) + first + ((Buffer.RecNum == 1) ? 0 : TAlgorithm::CompressBound(Buffer.Pos - first)); - size_t real = total; - - { - ui8* p = nullptr; - NMicroBDB::TCompressedHeader* hdr = nullptr; - - // 1. Choose data destination (temporary buffer or dat-page) - if (CurLen + total > TPageIter::GetPageSize()) { - data.Reset(new ui8[total]); - - hdr = (NMicroBDB::TCompressedHeader*)data.Get(); - p = data.Get() + sizeof(NMicroBDB::TCompressedHeader); - } else { - p = (ui8*)TPageIter::Current() + CurLen; - hdr = (NMicroBDB::TCompressedHeader*)p; - p += sizeof(NMicroBDB::TCompressedHeader); - } - - // 2. Compress data - - // Fill header and first record - hdr->Original = Buffer.Pos; - hdr->Compressed = 0; - hdr->Count = Buffer.RecNum; - hdr->Reserved = 0; - memcpy(p, Buffer.Data.Get(), first); - // Fill compressed part - if (Buffer.RecNum > 1) { - size_t size = TAlgorithm::CompressBound(Buffer.Pos - first); - - p += first; - TAlgorithm::Compress(p, size, buf + first, Buffer.Pos - first); - - hdr->Compressed = size; - - real = sizeof(NMicroBDB::TCompressedHeader) + first + size; - } - } - - Y_ASSERT(sizeof(TDatPage) + sizeof(TCompressedPage) + real <= TPageIter::GetPageSize()); - - // 3. Check page capacity - - if (CurLen + real > TPageIter::GetPageSize()) { - Y_ASSERT(data.Get() != nullptr); - - if (TPageIter::Current() && RecNum) { - RecNum = RecNum - Buffer.RecNum; - TPageIter::Current()->RecNum = RecNum; - TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; - SetCompressedPageHeader(); - memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); - TIndexer::NextPage(TPageIter::Current()); - RecNum = Buffer.RecNum; - Count = 0; - } - if (!TPageIter::Next()) { - CurLen = TPageIter::GetPageSize(); - return MBDB_NO_MEMORY; - } - CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); - } - - // 4. Flush data and reset buffer state - - if (data.Get()) - memcpy((ui8*)TPageIter::Current() + CurLen, data.Get(), real); - CurLen += real; - ++Count; - Buffer.Clear(); - return 0; - } - -private: - size_t CurLen; - TPageBuffer Buffer; - TVal* Rec; - ui32 Count; //! < count of compressed blocks on page -public: - int RecNum; -}; - -template <typename TBaseWriter> -class TOutputPageIterator: public TBaseWriter { -public: - typedef TBaseWriter TWriter; - - TOutputPageIterator() - : Buf(nullptr) - { - Clear(); - } - - ~TOutputPageIterator() { - Term(); - } - - TDatPage* Current() { - return CurPage; - } - - size_t Offset() const { - //Cout << "PS = " << TWriter::GetPageSize() << "; PN = " << PageNum << "; MS = " << METASIZE << Endl; - return TWriter::GetPageSize() * PageNum + METASIZE; - } - - int Freeze() { - return (Frozen = (PageNum == -1) ? 0 : (int)PageNum); - } - - void Unfreeze() { - Frozen = -1; - } - - inline int IsFrozen() const { - return Frozen + 1; - } - - inline size_t GetPageSize() const { - return TWriter::GetPageSize(); - } - - inline int GetPageNum() const { - return (int)PageNum; - } - - TDatPage* Next() { - if (PageNum >= Maxpage && WriteBuf()) - return CurPage = nullptr; - CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); - memset(CurPage, 0, sizeof(TDatPage)); - return CurPage; - } - -protected: - int Init(size_t pages, int pagesOrBytes) { - Term(); - if (pagesOrBytes) - Bufpages = pages; - else - Bufpages = pages / GetPageSize(); - Bufpages = Max<size_t>(1, Bufpages); - Maxpage = Bufpages - 1; - // if (!(Buf = (char*)malloc(Bufpages * GetPageSize()))) - // return ENOMEM; - ABuf.Alloc(Bufpages * GetPageSize()); - Buf = ABuf.Begin(); - if (TWriter::Memo) - Freeze(); - return 0; - } - - int Term() { - Unfreeze(); - int ret = (PageNum < 0) ? 0 : WriteBuf(); - Clear(); - return ret; - } - - int GotoPage(int pageno) { - int ret = EAGAIN; - if (IsFrozen() || PageNum >= 0 && ((ret = WriteBuf())) || ((ret = TWriter::GotoPage(pageno)))) - return ret; - PageNum = pageno; - Maxpage = Bufpages - 1 + pageno; - CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); - memset(CurPage, 0, sizeof(TDatPage)); - return 0; - } - - void Clear() { - ABuf.Dealloc(); - Buf = nullptr; - Maxpage = PageNum = Frozen = -1; - Bufpages = 0; - CurPage = nullptr; - } - - int WriteBuf() { - int nvec; - iovec vec[2]; - ssize_t minpage = Maxpage - Bufpages + 1; - ssize_t maxpage = Frozen == -1 ? PageNum : Frozen - 1; - if (maxpage < minpage) - return EAGAIN; - minpage %= Bufpages; - maxpage %= Bufpages; - if (maxpage < minpage) { - vec[0].iov_base = Buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (Bufpages - minpage); - vec[1].iov_base = Buf; - vec[1].iov_len = GetPageSize() * (maxpage + 1); - nvec = 2; - } else { - vec[0].iov_base = Buf + GetPageSize() * minpage; - vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); - nvec = 1; - } - if (TWriter::WritePages(vec, nvec)) - return EIO; - Maxpage += (maxpage < minpage) ? (Bufpages - minpage + maxpage + 1) : (maxpage - minpage + 1); - return 0; - } - - ssize_t Maxpage; - ssize_t Bufpages; - ssize_t PageNum; - int Frozen; - TDatPage* CurPage; - char* Buf; - TMappedAllocation ABuf; -}; - -template <class TFileManip> -class TOutputPageFileImpl: private TNonCopyable { -public: - TOutputPageFileImpl() - : Pagesize(0) - , Eof(1) - , Error(0) - , Memo(0) - , Recordsig(0) - { - } - - ~TOutputPageFileImpl() { - Term(); - } - - inline int IsEof() const { - return Eof; - } - - inline int GetError() const { - return Error; - } - - inline bool IsOpen() const { - return FileManip.IsOpen(); - } - - inline size_t GetPageSize() const { - return Pagesize; - } - - inline ui32 GetRecordSig() const { - return Recordsig; - } - - int Init(const char* fname, size_t pagesize, ui32 recsig, bool direct = false) { - Memo = 0; - if (FileManip.IsOpen()) - return MBDB_ALREADY_INITIALIZED; - - if (!fname) { - Eof = Error = 0; - Pagesize = pagesize; - Recordsig = recsig; - Memo = 1; - return 0; - } - - Error = FileManip.Open(fname, WrOnly | CreateAlways | ARW | AWOther | (direct ? DirectAligned : EOpenMode())); - if (Error) - return Error; - Error = Init(TFile(), pagesize, recsig); - if (Error) { - FileManip.Close(); - unlink(fname); - } - return Error; - } - - int Init(TAutoPtr<IOutputStream> output, size_t pagesize, ui32 recsig) { - Memo = 0; - if (FileManip.IsOpen()) { - return MBDB_ALREADY_INITIALIZED; - } - - if (!output) { - Eof = Error = 0; - Pagesize = pagesize; - Recordsig = recsig; - Memo = 1; - return 0; - } - - Error = FileManip.Open(output); - if (Error) - return Error; - Error = Init(TFile(), pagesize, recsig); - if (Error) { - FileManip.Close(); - } - return Error; - } - - int Init(const TFile& file, size_t pagesize, ui32 recsig) { - Memo = 0; - if (!file.IsOpen() && !FileManip.IsOpen()) - return MBDB_NOT_INITIALIZED; - if (file.IsOpen() && FileManip.IsOpen()) - return MBDB_ALREADY_INITIALIZED; - if (file.IsOpen()) { - Error = FileManip.Init(file); - if (Error) - return Error; - } - - Eof = 1; - TTempBuf buf(METASIZE + FS_BLOCK_SIZE); - const char* ptr = (buf.Data() + FS_BLOCK_SIZE - ((ui64)buf.Data() & (FS_BLOCK_SIZE - 1))); - TDatMetaPage* meta = (TDatMetaPage*)ptr; - - memset(buf.Data(), 0, buf.Size()); - meta->MetaSig = METASIG; - meta->PageSize = Pagesize = pagesize; - meta->RecordSig = Recordsig = recsig; - - ssize_t size = METASIZE, ret = 0; - while (size && (ret = FileManip.Write(ptr, (unsigned)size)) > 0) { - size -= ret; - ptr += ret; - } - if (size || ret <= 0) { - Term(); - return Error = errno ? errno : MBDB_WRITE_ERROR; - } - - Error = Eof = 0; - return Error; - } - -protected: - int WritePages(iovec* vec, int nvec) { - if (Error || Memo) - return Error; - - ssize_t size, delta; - iovec* pvec; - int vsize; - - for (vsize = 0, pvec = vec; vsize < nvec; vsize++, pvec++) - for (size = 0; (size_t)size < pvec->iov_len; size += Pagesize) - ((TDatPage*)((char*)pvec->iov_base + size))->PageSig = PAGESIG; - - delta = size = 0; - pvec = vec; - vsize = nvec; - while (vsize && (size = Writev(FileManip, pvec, (int)Min(vsize, 16))) > 0) { - if (delta) { - size += delta; - pvec->iov_len += delta; - pvec->iov_base = (char*)pvec->iov_base - delta; - delta = 0; - } - while (size) { - if ((size_t)size >= pvec->iov_len) { - size -= pvec->iov_len; - ++pvec; - --vsize; - } else { - delta = size; - pvec->iov_len -= size; - pvec->iov_base = (char*)pvec->iov_base + size; - size = 0; - } - } - } - if (delta) { - pvec->iov_len += delta; - pvec->iov_base = (char*)pvec->iov_base - delta; - } - return Error = (!size && !vsize) ? 0 : errno ? errno : MBDB_WRITE_ERROR; - } - - i64 Tell() { - return FileManip.RealSeek(0, SEEK_CUR); - } - - int GotoPage(int pageno) { - if (Error || Memo) - return Error; - Eof = 0; - i64 offset = (i64)pageno * Pagesize + METASIZE; - if (offset != FileManip.Seek(offset, SEEK_SET)) - Error = MBDB_BAD_FILE_SIZE; - return Error; - } - - int Term() { - int ret = FileManip.Close(); - Eof = 1; - Memo = 0; - if (!Error) - Error = ret; - return Error; - } - - size_t Pagesize; - int Eof; - int Error; - int Memo; - ui32 Recordsig; - -private: - TFileManip FileManip; -}; - -using TOutputPageFile = TOutputPageFileImpl<TOutputFileManip>; - -template <class TVal, - typename TBaseRecIter = TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>>> -class TOutDatFileImpl: public TBaseRecIter { -public: - typedef TBaseRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TPageIter::TWriter TWriter; - - int Open(const char* fname, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1, bool direct = false) { - int ret = TWriter::Init(fname, pagesize, TVal::RecordSig, direct); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Open(const TFile& file, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) { - int ret = TWriter::Init(file, pagesize, TVal::RecordSig); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Open(TAutoPtr<IOutputStream> output, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) { - int ret = TWriter::Init(output, pagesize, TVal::RecordSig); - return ret ? ret : Open2(pages, pagesOrBytes); - } - - int Close() { - int ret1 = TRecIter::Term(); - int ret2 = TPageIter::Term(); - int ret3 = TWriter::Term(); - return ret1 ? ret1 : ret2 ? ret2 : ret3; - } - -private: - int Open2(size_t pages, int pagesOrBytes) { - int ret = TPageIter::Init(pages, pagesOrBytes); - if (!ret) - ret = TRecIter::Init(); - if (ret) - Close(); - return ret; - } -}; - -template <class TVal> -class TOutIndexFile: public TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>> { - typedef TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>> - TDatFile; - typedef TOutIndexFile<TVal> TMyType; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TIndexer TIndexer; - -public: - TOutIndexFile() { - TIndexer::SetCallback(this, DispatchCallback); - } - - int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) { - int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); - if (ret) - return ret; - if ((ret = TRecIter::GotoPage(1))) { - TDatFile::Close(); - return ret; - } - Index0.Clear(); - return ret; - } - - int Close() { - TPageIter::Unfreeze(); - if (TRecIter::RecNum) { - TRecIter::Flush(); - NextPage(TPageIter::Current()); - } - int ret = 0; - if (Index0.Size() && !(ret = TRecIter::GotoPage(0))) { - const char* ptr = Index0.Begin(); - size_t recSize; - while (ptr < Index0.End()) { - Y_ASSERT((size_t)(Index0.End() - ptr) >= sizeof(size_t)); - memcpy(&recSize, ptr, sizeof(size_t)); - ptr += sizeof(size_t); - Y_ASSERT((size_t)(Index0.End() - ptr) >= recSize); - ui8* buf = (ui8*)TRecIter::Reserve(recSize); - if (!buf) { - ret = MBDB_PAGE_OVERFLOW; - break; - } - memcpy(buf, ptr, recSize); - TRecIter::ResetDat(); - ptr += recSize; - } - Index0.Clear(); - ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError(); - } - int ret1 = TDatFile::Close(); - return ret ? ret : ret1; - } - -protected: - TBuffer Index0; - - void NextPage(const TDatPage* page) { - const TVal* first = (const TVal*)NMicroBDB::GetFirstRecord(page); - size_t sz; - if (!TExtInfoType<TVal>::Exists) { - sz = SizeOf(first); - } else { - size_t ll; - size_t l; - sz = NMicroBDB::SizeOfExt(first, &ll, &l); - sz += ll + l; - } - Index0.Append((const char*)&sz, sizeof(size_t)); - Index0.Append((const char*)first, sz); - } - - static void DispatchCallback(void* This, const TDatPage* page) { - ((TMyType*)This)->NextPage(page); - } -}; - -template <class TVal, class TKey, typename TCompressor = TFakeCompression, class TPageFile = TOutputPageFile> -class TOutDirectFileImpl: public TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>> { - typedef TOutDatFileImpl< - TVal, - TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>> - TDatFile; - typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TMyType; - typedef typename TDatFile::TRecIter TRecIter; - typedef typename TRecIter::TPageIter TPageIter; - typedef typename TRecIter::TIndexer TIndexer; - typedef TOutIndexFile<TKey> TKeyFile; - -public: - TOutDirectFileImpl() { - TIndexer::SetCallback(this, DispatchCallback); - } - - int Open(const char* fname, size_t pagesize, int pages = 1, size_t ipagesize = 0, size_t ipages = 1, int pagesOrBytes = 1) { - char iname[FILENAME_MAX]; - int ret; - if (ipagesize == 0) - ipagesize = pagesize; - ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); - ret = ret ? ret : DatNameToIdx(iname, fname); - ret = ret ? ret : KeyFile.Open(iname, ipagesize, ipages, pagesOrBytes); - if (ret) - TDatFile::Close(); - return ret; - } - - int Close() { - if (TRecIter::RecNum) { - TRecIter::Flush(); - NextPage(TPageIter::Current()); - } - int ret = KeyFile.Close(); - int ret1 = TDatFile::Close(); - return ret1 ? ret1 : ret; - } - - int GetError() const { - return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError(); - } - -protected: - TKeyFile KeyFile; - - void NextPage(const TDatPage* page) { - typedef TMakeExtKey<TVal, TKey> TMakeExtKey; - - TVal* val = (TVal*)NMicroBDB::GetFirstRecord(page); - TKey key; - if (!TMakeExtKey::Exists) { - TMakeExtKey::Make(&key, nullptr, val, nullptr); - KeyFile.Push(&key); - } else { - size_t ll; - size_t l; - size_t sz = NMicroBDB::SizeOfExt(val, &ll, &l); - typename TExtInfoType<TVal>::TResult valExt; - if (TExtInfoType<TVal>::Exists) - Y_PROTOBUF_SUPPRESS_NODISCARD valExt.ParseFromArray((ui8*)val + sz + ll, l); - typename TExtInfoType<TKey>::TResult keyExt; - TMakeExtKey::Make(&key, &keyExt, val, &valExt); - KeyFile.Push(&key, &keyExt); - } - } - - static void DispatchCallback(void* This, const TDatPage* page) { - ((TMyType*)This)->NextPage(page); - } -}; diff --git a/library/cpp/microbdb/powersorter.h b/library/cpp/microbdb/powersorter.h deleted file mode 100644 index c40de9c23f..0000000000 --- a/library/cpp/microbdb/powersorter.h +++ /dev/null @@ -1,667 +0,0 @@ -#pragma once - -#include "safeopen.h" - -#include <util/generic/vector.h> -#include <util/generic/deque.h> -#include <util/system/mutex.h> -#include <util/system/condvar.h> -#include <util/thread/pool.h> - -template < - class TRecord, - template <typename T> class TCompare, - class TSieve, - class TMemoFile = TOutDatFile<TRecord>> -class TDatSorterBuf { -public: - typedef TRecord TRec; - typedef TVector<TRec*> TVectorType; - typedef TMemoFile TMemo; - typedef TCompare<TRecord> TComp; - -public: - TDatSorterBuf(size_t memory, size_t pageSize) - : Memo("memo", pageSize, memory, 0) - , Cur() - { - Memo.Open(nullptr); - Memo.Freeze(); - } - - ~TDatSorterBuf() { - Vector.clear(); - Memo.Close(); - } - - const TRec* Push(const TRec* v) { - const TRec* u = Memo.Push(v); - if (u) - Vector.push_back((TRec*)u); - return u; - } - - const TRec* Next() { - if (Ptr == Vector.end()) { - if (Cur) - TSieve::Sieve(Cur, Cur); - Cur = nullptr; - } else { - Cur = *Ptr++; - if (!TIsSieveFake<TSieve>::Result) - while (Ptr != Vector.end() && TSieve::Sieve(Cur, *Ptr)) - ++Ptr; - } - return Cur; - } - - const TRec* Current() { - return Cur; - } - - size_t Size() { - return Vector.size(); - } - - void Sort() { - Ptr = Vector.begin(); - Cur = nullptr; - - MBDB_SORT_FUN(Vector.begin(), Vector.end(), TComp()); - } - - void Clear() { - Vector.clear(); - Memo.Freeze(); - Ptr = Vector.begin(); - Cur = nullptr; - } - -private: - TVectorType Vector; - TMemo Memo; - - typename TVectorType::iterator - Ptr; - TRec* Cur; -}; - -template < - class TRecord, - class TInput, - template <typename T> class TCompare, - class TSieve> -class TDatMerger { -public: - typedef TRecord TRec; - typedef TCompare<TRecord> TComp; - typedef TSimpleSharedPtr<TInput> TInputPtr; - typedef TVector<TInputPtr> TInputVector; - -public: - ~TDatMerger() { - Close(); - } - - void Init(const TInputVector& inputs) { - Inputs = inputs; - TVector<TInput*> v; - for (int i = 0; i < Inputs.ysize(); ++i) - v.push_back(Inputs[i].Get()); - HeapIter.Init(&v[0], v.size()); - if (!TIsSieveFake<TSieve>::Result) - PNext = HeapIter.Next(); - } - - const TRec* Next() { - if (TIsSieveFake<TSieve>::Result) { - return HeapIter.Next(); - } - - if (!PNext) { - if (PCur) { - TSieve::Sieve(PCur, PCur); - PCur = nullptr; - } - return nullptr; - } - - PCur = &Cur; - memcpy(PCur, PNext, SizeOf((const TRec*)PNext)); - - do { - PNext = HeapIter.Next(); - } while (PNext && TSieve::Sieve(PCur, PNext)); - - return PCur; - } - - const TRec* Current() { - return (TIsSieveFake<TSieve>::Result ? HeapIter.Current() : PCur); - } - - void Close() { - Inputs.clear(); - HeapIter.Term(); - } - -private: - TInputVector Inputs; - THeapIter<TRec, TInput, TComp> HeapIter; - TRec Cur; - TRec* PCur = nullptr; - const TRec* PNext = nullptr; -}; - -class TPortionManager { -public: - void Open(const char* tempDir) { - TGuard<TMutex> guard(Mutex); - TempDir = tempDir; - } - - TString Next() { - TGuard<TMutex> guard(Mutex); - if (Portions == 0) - DoOpen(); - TString fname = GeneratePortionFilename(Portions++); - return fname; - } - - void Close() { - TGuard<TMutex> guard(Mutex); - Portions = 0; - } - -private: - void DoOpen() { - if (MakeSorterTempl(PortionFilenameTempl, TempDir.data())) { - PortionFilenameTempl[0] = 0; - ythrow yexception() << "portion-manager: bad tempdir \"" << TempDir.data() << "\": " << LastSystemErrorText(); - } - } - - TString GeneratePortionFilename(int i) { - char str[FILENAME_MAX]; - snprintf(str, sizeof(str), PortionFilenameTempl, i); - return TString(str); - } - -private: - TMutex Mutex; - - TString TempDir; - char PortionFilenameTempl[FILENAME_MAX] = {}; - int Portions = 0; -}; - -// A merger powered by threads -template < - class TRecord, - template <typename T> class TCompare, - class TSieve, - class TInput = TInDatFile<TRecord>, - class TOutput = TOutDatFile<TRecord>> -class TPowerMerger { -public: - typedef TRecord TRec; - typedef TDatMerger<TRecord, TInput, TCompare, TSieve> TMerger; - typedef TSimpleSharedPtr<TMerger> TMergerPtr; - typedef TPowerMerger<TRecord, TCompare, TSieve, TInput, TOutput> TFileMerger; - - struct TMergePortionTask: public IObjectInQueue { - TFileMerger* FileMerger; - int Begin; - int End; - TString OutFname; - - TMergePortionTask(TFileMerger* fileMerger, int begin, int end, const TString& outFname) - : FileMerger(fileMerger) - , Begin(begin) - , End(end) - , OutFname(outFname) - { - } - - void Process(void*) override { - THolder<TMergePortionTask> This(this); - //fprintf(stderr, "MergePortion: (%i, %i, %s)\n", Begin, End, ~OutFname); - FileMerger->MergePortion(Begin, End, OutFname); - } - }; - -public: - TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const TSimpleSharedPtr<TPortionManager>& portMan, - int memory, int pageSize, bool autoUnlink) - : MtpQueue(mtpQueue) - , PortionManager(portMan) - , Memory(memory) - , PageSize(pageSize) - , AutoUnlink(autoUnlink) - { - } - - TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const char* tempDir, - int memory, int pageSize, bool autoUnlink) - : MtpQueue(mtpQueue) - , PortionManager(new TPortionManager) - , Memory(memory) - , PageSize(pageSize) - , AutoUnlink(autoUnlink) - { - PortionManager->Open(tempDir); - } - - ~TPowerMerger() { - Close(); - } - - void SetMtpQueue(const TSimpleSharedPtr<TThreadPool>& mtpQueue) { - MtpQueue = mtpQueue; - } - - void MergePortion(int begin, int end, const TString& outFname) { - TMerger merger; - InitMerger(merger, begin, end); - - TOutput out("mergeportion-tmpout", PageSize, BufSize, 0); - out.Open(outFname.data()); - const TRec* rec; - while ((rec = merger.Next())) - out.Push(rec); - out.Close(); - - merger.Close(); - - { - TGuard<TMutex> guard(Mutex); - UnlinkFiles(begin, end); - Files.push_back(outFname); - --Tasks; - TaskFinishedCond.Signal(); - } - } - - void Add(const TString& fname) { - TGuard<TMutex> guard(Mutex); - // fprintf(stderr, "TPowerMerger::Add: %s\n", ~fname); - Files.push_back(fname); - if (InitialFilesEnd > 0) - ythrow yexception() << "TPowerMerger::Add: no more files allowed"; - } - - void Merge(int maxPortions) { - TGuard<TMutex> guard(Mutex); - InitialFilesEnd = Files.ysize(); - if (!InitialFilesEnd) - ythrow yexception() << "TPowerMerger::Merge: no files added"; - Optimize(maxPortions); - MergeMT(); - InitMerger(Merger, CPortions, Files.ysize()); - } - - void Close() { - TGuard<TMutex> guard(Mutex); - Merger.Close(); - UnlinkFiles(CPortions, Files.ysize()); - InitialFilesEnd = CPortions = 0; - Files.clear(); - } - - const TRec* Next() { - return Merger.Next(); - } - - const TRec* Current() { - return Merger.Current(); - } - - int FileCount() const { - TGuard<TMutex> guard(Mutex); - return Files.ysize(); - } - -private: - void InitMerger(TMerger& merger, int begin, int end) { - TGuard<TMutex> guard(Mutex); - TVector<TSimpleSharedPtr<TInput>> inputs; - for (int i = begin; i < end; ++i) { - inputs.push_back(new TInput("mergeportion-tmpin", BufSize, 0)); - inputs.back()->Open(Files[i]); - // fprintf(stderr, "InitMerger: %i, %s\n", i, ~Files[i]); - } - merger.Init(inputs); - } - - void UnlinkFiles(int begin, int end) { - TGuard<TMutex> guard(Mutex); - for (int i = begin; i < end; ++i) { - if (i >= InitialFilesEnd || AutoUnlink) - unlink(Files[i].c_str()); - } - } - - void Optimize(int maxPortions, size_t maxBufSize = 4u << 20) { - TGuard<TMutex> guard(Mutex); - maxPortions = std::min(maxPortions, Memory / PageSize - 1); - maxBufSize = std::max((size_t)PageSize, maxBufSize); - - if (maxPortions <= 2) { - FPortions = MPortions = 2; - BufSize = PageSize; - return; - } - - int Portions = Files.ysize(); - if (maxPortions >= Portions) { - FPortions = MPortions = Portions; - } else if (((Portions + maxPortions - 1) / maxPortions) <= maxPortions) { - while (((Portions + maxPortions - 1) / maxPortions) <= maxPortions) - --maxPortions; - MPortions = ++maxPortions; - int total = ((Portions + MPortions - 1) / MPortions) + Portions; - FPortions = (total % MPortions) ? (total % MPortions) : (int)MPortions; - } else - FPortions = MPortions = maxPortions; - - BufSize = std::min((size_t)(Memory / (MPortions + 1)), maxBufSize); - // fprintf(stderr, "Optimize: Portions=%i; MPortions=%i; FPortions=%i; Memory=%i; BufSize=%i\n", - // (int)Portions, (int)MPortions, (int)FPortions, (int)Memory, (int)BufSize); - } - - void MergeMT() { - TGuard<TMutex> guard(Mutex); - do { - int n; - while ((n = Files.ysize() - CPortions) > MPortions) { - int m = std::min((CPortions == 0 ? (int)FPortions : (int)MPortions), n); - TString fname = PortionManager->Next(); - if (!MtpQueue->Add(new TMergePortionTask(this, CPortions, CPortions + m, fname))) - ythrow yexception() << "TPowerMerger::MergeMT: failed to add task"; - CPortions += m; - ++Tasks; - } - if (Tasks > 0) - TaskFinishedCond.Wait(Mutex); - } while (Tasks > 0); - } - -private: - TMutex Mutex; - TCondVar TaskFinishedCond; - - TMerger Merger; - TSimpleSharedPtr<TThreadPool> MtpQueue; - TSimpleSharedPtr<TPortionManager> PortionManager; - TVector<TString> Files; - int Tasks = 0; - int InitialFilesEnd = 0; - int CPortions = 0; - int MPortions = 0; - int FPortions = 0; - int Memory = 0; - int PageSize = 0; - int BufSize = 0; - bool AutoUnlink = false; -}; - -// A sorter powered by threads -template < - class TRecord, - template <typename T> class TCompare, - class TSieve = TFakeSieve<TRecord>, - class TTmpInput = TInDatFile<TRecord>, - class TTmpOutput = TOutDatFile<TRecord>> -class TPowerSorter { -public: - typedef TPowerSorter<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TSorter; - typedef TRecord TRec; - typedef TTmpOutput TTmpOut; - typedef TTmpInput TTmpIn; - typedef TDatSorterBuf<TRecord, TCompare, TSieve> TSorterBuf; - typedef TCompare<TRecord> TComp; - typedef TPowerMerger<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TFileMerger; - - struct TSortPortionTask: public IObjectInQueue { - TSorter* Sorter; - TSorterBuf* SorterBuf; - int Portion; - - TSortPortionTask(TSorter* sorter, TSorterBuf* sorterBuf, int portion) - : Sorter(sorter) - , SorterBuf(sorterBuf) - , Portion(portion) - { - } - - void Process(void*) override { - TAutoPtr<TSortPortionTask> This(this); - // fprintf(stderr, "SortPortion: %i\n", Portion); - Sorter->SortPortion(SorterBuf); - } - }; - - class TSorterBufQueue { - private: - TMutex Mutex; - TCondVar Cond; - TVector<TSimpleSharedPtr<TSorterBuf>> V; - TDeque<TSorterBuf*> Q; - - int Memory, PageSize, MaxSorterBufs; - - public: - TSorterBufQueue(int memory, int pageSize, int maxSorterBufs) - : Memory(memory) - , PageSize(pageSize) - , MaxSorterBufs(maxSorterBufs) - { - } - - void Push(TSorterBuf* sb) { - TGuard<TMutex> guard(Mutex); - sb->Clear(); - Q.push_back(sb); - Cond.Signal(); - } - - TSorterBuf* Pop() { - TGuard<TMutex> guard(Mutex); - if (!Q.size() && V.ysize() < MaxSorterBufs) { - V.push_back(new TSorterBuf(Memory / MaxSorterBufs, PageSize)); - return V.back().Get(); - } else { - while (!Q.size()) - Cond.Wait(Mutex); - TSorterBuf* t = Q.front(); - Q.pop_front(); - return t; - } - } - - void Clear() { - TGuard<TMutex> guard(Mutex); - Q.clear(); - V.clear(); - } - - void WaitAll() { - TGuard<TMutex> guard(Mutex); - while (Q.size() < V.size()) { - Cond.Wait(Mutex); - } - } - - int GetMaxSorterBufs() const { - return MaxSorterBufs; - } - }; - -public: - TPowerSorter(const TSimpleSharedPtr<TThreadPool>& mtpQueue, size_t maxSorterBufs, - const char* name, size_t memory, size_t pageSize, size_t bufSize) - : MaxSorterBufs(maxSorterBufs) - , Name(name) - , Memory(memory) - , PageSize(pageSize) - , BufSize(bufSize) - , MtpQueue(mtpQueue) - , PortionManager(new TPortionManager) - , SBQueue(Memory, PageSize, MaxSorterBufs) - , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) - { - } - - TPowerSorter(size_t maxSorterBufs, - const char* name, size_t memory, size_t pageSize, size_t bufSize) - : MaxSorterBufs(maxSorterBufs) - , Name(name) - , Memory(memory) - , PageSize(pageSize) - , BufSize(bufSize) - , PortionManager(new TPortionManager) - , SBQueue(Memory, PageSize, maxSorterBufs) - , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) - { - } - - TPowerSorter(const char* name, size_t memory, size_t pageSize, size_t bufSize) - : MaxSorterBufs(5) - , Name(name) - , Memory(memory) - , PageSize(pageSize) - , BufSize(bufSize) - , PortionManager(new TPortionManager) - , SBQueue(Memory, PageSize, MaxSorterBufs) - , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) - { - } - - ~TPowerSorter() { - Close(); - } - - void Open(const char* tempDir) { - Close(); - CurSB = SBQueue.Pop(); - PortionManager->Open(tempDir); - } - - void Reopen(const char* fname) { - Open(fname); - } - - void Close() { - CurSB = nullptr; - SBQueue.Clear(); - PortionCount = 0; - FileMerger.Close(); - PortionManager->Close(); - } - - const TRec* Push(const TRec* v) { - CheckOpen("Push"); - const TRec* u = CurSB->Push(v); - if (!u) { - NextPortion(); - u = CurSB->Push(v); - } - return u; - } - - void Sort(int maxPortions = 1000) { - CheckOpen("Sort"); - if (!PortionCount) { - CurSB->Sort(); - } else { - NextPortion(); - SBQueue.Push(CurSB); - CurSB = nullptr; - SBQueue.WaitAll(); - SBQueue.Clear(); - FileMerger.Merge(maxPortions); - } - } - - const TRec* Next() { - return PortionCount ? FileMerger.Next() : CurSB->Next(); - } - - const TRec* Current() { - return PortionCount ? FileMerger.Current() : CurSB->Current(); - } - - int GetBufSize() const { - return BufSize; - } - - int GetPageSize() const { - return PageSize; - } - - const char* GetName() const { - return Name.data(); - } - -private: - void CheckOpen(const char* m) { - if (!CurSB) - ythrow yexception() << "TPowerSorter::" << m << ": the sorter is not open"; - } - - void NextPortion() { - if (!CurSB->Size()) - return; - ++PortionCount; - if (MaxSorterBufs <= 1) { - SortPortion(CurSB); - } else { - if (!MtpQueue.Get()) { - MtpQueue.Reset(new TThreadPool); - MtpQueue->Start(MaxSorterBufs - 1); - FileMerger.SetMtpQueue(MtpQueue); - } - if (!MtpQueue->Add(new TSortPortionTask(this, CurSB, PortionCount))) - ythrow yexception() << "TPowerSorter::NextPortion: failed to add task"; - } - CurSB = SBQueue.Pop(); - } - - void SortPortion(TSorterBuf* sorterBuf) { - TString portionFilename = PortionManager->Next(); - try { - sorterBuf->Sort(); - - // fprintf(stderr, "TPowerSorter::SortPortion: -> %s\n", ~portionFilename); - TTmpOut out("powersorter-portion", PageSize, BufSize, 0); - out.Open(portionFilename.data()); - - while (sorterBuf->Next()) - out.Push(sorterBuf->Current()); - - out.Close(); - FileMerger.Add(portionFilename); - SBQueue.Push(sorterBuf); - } catch (const yexception& e) { - unlink(portionFilename.data()); - ythrow yexception() << "SortPortion: " << e.what(); - } - } - -private: - int MaxSorterBufs = 0; - TString Name; - int Memory = 0; - int PageSize = 0; - int BufSize = 0; - - TMutex Mutex; - TSimpleSharedPtr<TThreadPool> MtpQueue; - TSimpleSharedPtr<TPortionManager> PortionManager; - - TSorterBufQueue SBQueue; - TSorterBuf* CurSB = nullptr; - int PortionCount = 0; - - TFileMerger FileMerger; -}; diff --git a/library/cpp/microbdb/reader.h b/library/cpp/microbdb/reader.h deleted file mode 100644 index 694a2f1766..0000000000 --- a/library/cpp/microbdb/reader.h +++ /dev/null @@ -1,354 +0,0 @@ -#pragma once - -#include "align.h" -#include "header.h" -#include "extinfo.h" - -#include <contrib/libs/zlib/zlib.h> -#include <contrib/libs/fastlz/fastlz.h> -#include <contrib/libs/snappy/snappy.h> - -#include <util/generic/vector.h> -#include <util/memory/tempbuf.h> - -namespace NMicroBDB { - static const size_t DEFAULT_BUFFER_SIZE = (64 << 10); - - //! - template <class TVal> - class IBasePageReader { - public: - virtual size_t GetRecSize() const = 0; - virtual size_t GetExtSize() const = 0; - virtual bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const = 0; - virtual const ui8* GetExtInfoRaw(size_t* len) const = 0; - virtual const TVal* Next() = 0; - virtual void Reset() = 0; - //! set clearing flag, so temporary buffers will be cleared - //! in next call of Next() - virtual void SetClearFlag() { - } - - virtual ~IBasePageReader() { - } - }; - - template <class TVal, typename TPageIter> - class TRawPageReader: public IBasePageReader<TVal> { - public: - TRawPageReader(TPageIter* const iter) - : PageIter(iter) - { - Reset(); - } - - bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override { - Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); - if (!Rec) - return false; - ui8* raw = (ui8*)Rec + RecSize + ExtLenSize; - return extInfo->ParseFromArray(raw, ExtSize); - } - - size_t GetRecSize() const override { - return RecSize + ExtLenSize; - } - - size_t GetExtSize() const override { - return ExtSize; - } - - const ui8* GetExtInfoRaw(size_t* len) const override { - Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); - if (!Rec) { - *len = 0; - return nullptr; - } - *len = ExtLenSize + ExtSize; - return (ui8*)Rec + RecSize; - } - - const TVal* Next() override { - if (!Rec) - Rec = (TVal*)((char*)PageIter->Current() + sizeof(TDatPage)); - else - Rec = (TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize)); - if (!TExtInfoType<TVal>::Exists) - RecSize = SizeOf(Rec); - else - RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize); - return Rec; - } - - void Reset() override { - Rec = nullptr; - RecSize = 0; - ExtLenSize = 0; - ExtSize = 0; - } - - private: - const TVal* Rec; - size_t RecSize; - size_t ExtLenSize; - size_t ExtSize; - TPageIter* const PageIter; - }; - - template <class TVal, typename TPageIter> - class TCompressedReader: public IBasePageReader<TVal> { - inline size_t GetFirstRecordSize(const TVal* const in) const { - if (!TExtInfoType<TVal>::Exists) { - return DatCeil(SizeOf(in)); - } else { - size_t ll; - size_t l; - size_t ret = SizeOfExt(in, &ll, &l); - - return DatCeil(ret + ll + l); - } - } - - void DecompressBlock() { - if (PageIter->IsFrozen() && Buffer.Get()) - Blocks.push_back(Buffer.Release()); - - const TCompressedHeader* hdr = (const TCompressedHeader*)(Page); - - Page += sizeof(TCompressedHeader); - - const size_t first = GetFirstRecordSize((const TVal*)Page); - - if (!Buffer.Get() || Buffer->Size() < hdr->Original) - Buffer.Reset(new TTempBuf(Max<size_t>(hdr->Original, DEFAULT_BUFFER_SIZE))); - - memcpy(Buffer->Data(), Page, first); - Page += first; - - if (hdr->Count > 1) { - switch (Algo) { - case MBDB_COMPRESSION_ZLIB: { - uLongf dst = hdr->Original - first; - - int ret = uncompress((Bytef*)Buffer->Data() + first, &dst, Page, hdr->Compressed); - - if (ret != Z_OK) - ythrow yexception() << "error then uncompress " << ret; - } break; - case MBDB_COMPRESSION_FASTLZ: { - int dst = hdr->Original - first; - int ret = yfastlz_decompress(Page, hdr->Compressed, Buffer->Data() + first, dst); - - if (!ret) - ythrow yexception() << "error then uncompress"; - } break; - case MBDB_COMPRESSION_SNAPPY: { - if (!snappy::RawUncompress((const char*)Page, hdr->Compressed, Buffer->Data() + first)) - ythrow yexception() << "error then uncompress"; - } break; - } - } - - Rec = nullptr; - RecNum = hdr->Count; - Page += hdr->Compressed; - } - - void ClearBuffer() { - for (size_t i = 0; i < Blocks.size(); ++i) - delete Blocks[i]; - Blocks.clear(); - ClearFlag = false; - } - - public: - TCompressedReader(TPageIter* const iter) - : Rec(nullptr) - , RecSize(0) - , ExtLenSize(0) - , ExtSize(0) - , Page(nullptr) - , PageIter(iter) - , RecNum(0) - , BlockNum(0) - , ClearFlag(false) - { - } - - ~TCompressedReader() override { - ClearBuffer(); - } - - size_t GetRecSize() const override { - return RecSize + ExtLenSize; - } - - size_t GetExtSize() const override { - return ExtSize; - } - - bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override { - Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); - if (!Rec) - return false; - ui8* raw = (ui8*)Rec + RecSize + ExtLenSize; - return extInfo->ParseFromArray(raw, ExtSize); - } - - const ui8* GetExtInfoRaw(size_t* len) const override { - Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); - if (!Rec) { - *len = 0; - return nullptr; - } - *len = ExtLenSize + ExtSize; - return (ui8*)Rec + RecSize; - } - - const TVal* Next() override { - Y_ASSERT(RecNum >= 0); - - if (ClearFlag) - ClearBuffer(); - - if (!Page) { - if (!PageIter->Current()) - return nullptr; - - Page = (ui8*)PageIter->Current() + sizeof(TDatPage); - - BlockNum = ((TCompressedPage*)Page)->BlockCount - 1; - Algo = (ECompressionAlgorithm)((TCompressedPage*)Page)->Algorithm; - Page += sizeof(TCompressedPage); - - DecompressBlock(); - } - - if (!RecNum) { - if (BlockNum <= 0) - return nullptr; - else { - --BlockNum; - DecompressBlock(); - } - } - - --RecNum; - if (!Rec) - Rec = (const TVal*)Buffer->Data(); - else - Rec = (const TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize)); - - if (!TExtInfoType<TVal>::Exists) - RecSize = SizeOf(Rec); - else - RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize); - - return Rec; - } - - void Reset() override { - Page = nullptr; - BlockNum = 0; - Rec = nullptr; - RecSize = 0; - ExtLenSize = 0; - ExtSize = 0; - RecNum = 0; - } - - void SetClearFlag() override { - ClearFlag = true; - } - - public: - THolder<TTempBuf> Buffer; - TVector<TTempBuf*> Blocks; - const TVal* Rec; - size_t RecSize; - size_t ExtLenSize; - size_t ExtSize; - const ui8* Page; - TPageIter* const PageIter; - int RecNum; //!< count of recs in current block - int BlockNum; - ECompressionAlgorithm Algo; - bool ClearFlag; - }; - - class TZLibCompressionImpl { - public: - static const ECompressionAlgorithm Code = MBDB_COMPRESSION_ZLIB; - - inline void Init() { - // - - } - - inline void Term() { - // - - } - - inline size_t CompressBound(size_t size) const noexcept { - return ::compressBound(size); - } - - inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { - uLongf size = outSize; - - if (compress((Bytef*)out, &size, (const Bytef*)in, inSize) != Z_OK) - ythrow yexception() << "not compressed"; - outSize = size; - } - }; - - class TFastlzCompressionImpl { - public: - static const ECompressionAlgorithm Code = MBDB_COMPRESSION_FASTLZ; - - inline void Init() { - // - - } - - inline void Term() { - // - - } - - inline size_t CompressBound(size_t size) const noexcept { - size_t rval = size_t(size * 1.07); - return rval < 66 ? 66 : rval; - } - - inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { - outSize = yfastlz_compress_level(2, in, inSize, out); - if (!outSize) - ythrow yexception() << "not compressed"; - } - }; - - class TSnappyCompressionImpl { - public: - static const ECompressionAlgorithm Code = MBDB_COMPRESSION_SNAPPY; - - inline void Init() { - // - - } - - inline void Term() { - // - - } - - inline size_t CompressBound(size_t size) const noexcept { - return snappy::MaxCompressedLength(size); - } - - inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { - snappy::RawCompress((const char*)in, inSize, (char*)out, &outSize); - } - }; - -} - -using TFakeCompression = void; -using TZLibCompression = NMicroBDB::TZLibCompressionImpl; -using TFastlzCompression = NMicroBDB::TFastlzCompressionImpl; -using TSnappyCompression = NMicroBDB::TSnappyCompressionImpl; diff --git a/library/cpp/microbdb/safeopen.h b/library/cpp/microbdb/safeopen.h deleted file mode 100644 index c328ffd575..0000000000 --- a/library/cpp/microbdb/safeopen.h +++ /dev/null @@ -1,792 +0,0 @@ -#pragma once - -// util -#include <util/generic/yexception.h> -#include <util/generic/vector.h> -#include <util/string/util.h> -#include <util/system/mutex.h> -#include <thread> - -#include "microbdb.h" - -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4706) /*assignment within conditional expression*/ -#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/ -#endif - -template <typename TVal, typename TPageFile = TInputPageFile, typename TIterator = TInputPageIterator<TPageFile>> -class TInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> { -public: - typedef TVal TRec; - typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> TBase; - - TInDatFile(const TString& name, size_t pages, int pagesOrBytes = 1) - : Name(name) - , Pages(pages) - , PagesOrBytes(pagesOrBytes) - { - } - - ~TInDatFile() { - Close(); - } - - void Open(const TString& fname, bool direct = false) { - ui32 gotRecordSig = 0; - int ret = TBase::Open(fname.data(), Pages, PagesOrBytes, &gotRecordSig, direct); - if (ret) { - // XXX: print record type name, not type sig - ythrow yexception() << ErrorMessage(ret, "Failed to open input file", fname, TVal::RecordSig, gotRecordSig); - } - Name = fname; - } - - void OpenStream(TAutoPtr<IInputStream> input) { - ui32 gotRecordSig = 0; - int ret = TBase::Open(input, Pages, PagesOrBytes, &gotRecordSig); - if (ret) { - // XXX: print record type name, not type sig - ythrow yexception() << ErrorMessage(ret, "Failed to open input file", Name, TVal::RecordSig, gotRecordSig); - } - } - - void Close() { - int ret; - if (IsOpen() && (ret = TBase::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing input file", Name); - if ((ret = TBase::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing input file", Name); - } - - const char* GetName() const { - return Name.data(); - } - - using TBase::Current; - using TBase::Freeze; - using TBase::GetError; - using TBase::GetExtInfo; - using TBase::GetExtInfoRaw; - using TBase::GetExtSize; - using TBase::GetLastPage; - using TBase::GetPageNum; - using TBase::GetPageSize; - using TBase::GetRecSize; - using TBase::GotoLastPage; - using TBase::GotoPage; - using TBase::IsEof; - using TBase::IsOpen; - using TBase::Next; - using TBase::Skip; - using TBase::Unfreeze; - -protected: - TString Name; - size_t Pages; - int PagesOrBytes; -}; - -template <typename TVal> -class TMappedInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> { -public: - typedef TVal TRec; - typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> TBase; - - TMappedInDatFile(const TString& name, size_t /* pages */, int /* pagesOrBytes */) - : Name(name) - { - } - - ~TMappedInDatFile() { - Close(); - } - - void Open(const TString& fname) { - int ret = TBase::Open(fname.data()); - if (ret) - ythrow yexception() << ErrorMessage(ret, "Failed to open mapped file", fname, TVal::RecordSig); - Name = fname; - } - - void Close() { - int ret; - if (IsOpen() && (ret = TBase::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing mapped file", Name); - if ((ret = TBase::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing mapped file", Name); - } - - const char* GetName() const { - return Name.data(); - } - - using TBase::Current; - using TBase::GetError; - using TBase::GetExtInfo; - using TBase::GetExtInfoRaw; - using TBase::GetLastPage; - using TBase::GetPageNum; - using TBase::GetPageSize; - using TBase::GotoLastPage; - using TBase::GotoPage; - using TBase::IsEof; - using TBase::IsOpen; - using TBase::Next; - using TBase::Skip; - -protected: - TString Name; -}; - -template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> -class TOutDatFile: protected TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> { -public: - typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> TBase; - - TOutDatFile(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : Name(name) - , PageSize(pagesize) - , Pages(pages) - , PagesOrBytes(pagesOrBytes) - { - } - - ~TOutDatFile() { - Close(); - } - - void Open(const char* fname, bool direct = false) { - int ret = TBase::Open(fname, PageSize, Pages, PagesOrBytes, direct); - if (ret) - ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); - Name = fname; - } - - void Open(const TString& fname) { - Open(fname.data()); - } - - void OpenStream(TAutoPtr<IOutputStream> output) { - int ret = TBase::Open(output, PageSize, Pages, PagesOrBytes); - if (ret) - ythrow yexception() << ErrorMessage(ret, "Failed to open output stream", Name); - } - - void Close() { - int ret; - if ((ret = TBase::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); - if ((ret = TBase::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); - } - - const char* GetName() const { - return Name.data(); - } - - using TBase::Freeze; - using TBase::GetError; - using TBase::GetPageSize; - using TBase::IsEof; - using TBase::IsOpen; - using TBase::Offset; - using TBase::Push; - using TBase::PushWithExtInfo; - using TBase::Reserve; - using TBase::Unfreeze; - -protected: - TString Name; - size_t PageSize, Pages; - int PagesOrBytes; -}; - -template <typename TVal, typename TCompressor, typename TPageFile> -class TOutDatFileArray; - -template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> -class TOutDatFileArray { - typedef TOutDatFile<TVal, TCompressor, TPageFile> TFileType; - -public: - TOutDatFileArray(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : Name(name) - , PageSize(pagesize) - , Pages(pages) - , PagesOrBytes(pagesOrBytes) - , NumFiles(0) - , Files(nullptr) - { - } - - ~TOutDatFileArray() { - for (int i = 0; i < NumFiles; ++i) { - Files[i].Close(); - Files[i].~TFileType(); - } - free(Files); - Files = nullptr; - NumFiles = 0; - } - - TFileType& operator[](size_t pos) { - return Files[pos]; - } - - void Open(int n, const TString& fname) { - char temp[FILENAME_MAX]; - - Name = fname; - NumFiles = CreateDatObjects(n, fname); - - int i; - try { - for (i = 0; i < NumFiles; ++i) { - sprintf(temp, fname.data(), i); - Files[i].Open(temp); - } - } catch (...) { - while (--i >= 0) - Files[i].Close(); - throw; - } - } - - template <typename TNameBuilder> - void OpenWithCallback(int n, const TNameBuilder& builder) { - NumFiles = CreateDatObjects(n, Name); - - for (int i = 0; i < NumFiles; ++i) - Files[i].Open(builder.GetName(i).data()); - } - - void Close() { - for (int i = 0; i < NumFiles; ++i) - Files[i].Close(); - } - - void CloseMT(ui32 threads) { - int current = 0; - TMutex mutex; - TVector<std::thread> thrs; - thrs.reserve(threads); - for (ui32 i = 0; i < threads; i++) { - thrs.emplace_back([this, ¤t, &mutex]() { - while (true) { - mutex.Acquire(); - int cur = current++; - mutex.Release(); - if (cur >= NumFiles) - break; - Files[cur].Close(); - } - }); - } - for (auto& thread : thrs) { - thread.join(); - } - } - - const char* GetName() const { - return Name.data(); - } - -protected: - int CreateDatObjects(int n, const TString& fname) { - if (!(Files = (TFileType*)malloc(n * sizeof(TFileType)))) - ythrow yexception() << "can't alloc \"" << fname << "\" file array: " << LastSystemErrorText(); - int num = 0; - char temp[FILENAME_MAX]; - for (int i = 0; i < n; ++i, ++num) { - sprintf(temp, "%s[%d]", fname.data(), i); - new (Files + i) TFileType(temp, PageSize, Pages, PagesOrBytes); - } - return num; - } - - TString Name; - size_t PageSize, Pages; - int PagesOrBytes, NumFiles; - TFileType* Files; -}; - -template <typename TVal, typename TKey, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> -class TOutDirectFile: protected TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> { - typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TBase; - -public: - TOutDirectFile(const TString& name, size_t pagesize, size_t pages, size_t ipagesize, size_t ipages, int pagesOrBytes) - : Name(name) - , PageSize(pagesize) - , Pages(pages) - , IdxPageSize(ipagesize) - , IdxPages(ipages) - , PagesOrBytes(pagesOrBytes) - { - } - - ~TOutDirectFile() { - Close(); - } - - void Open(const TString& fname) { - int ret = TBase::Open(fname.data(), PageSize, Pages, IdxPageSize, IdxPages, PagesOrBytes); - if (ret) - ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); - Name = fname; - } - - void Close() { - int ret; - if ((ret = TBase::GetError())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); - if ((ret = TBase::Close())) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); - } - - const char* GetName() const { - return Name.data(); - } - - using TBase::Freeze; - using TBase::Push; - using TBase::PushWithExtInfo; - using TBase::Reserve; - using TBase::Unfreeze; - -protected: - TString Name; - size_t PageSize, Pages, IdxPageSize, IdxPages; - int PagesOrBytes; -}; - -template < - typename TVal, - template <typename T> class TComparer, - typename TCompress = TFakeCompression, - typename TSieve = TFakeSieve<TVal>, - typename TPageFile = TOutputPageFile, - typename TFileTypes = TDefInterFileTypes> -class TDatSorter: protected TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> { - typedef TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> TBase; - -public: - typedef TVal TRec; - -public: - TDatSorter(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : Name(name) - , Memory(memory) - , PageSize(pagesize) - , Pages(pages) - , PagesOrBytes(pagesOrBytes) - { - Templ[0] = 0; - } - - ~TDatSorter() { - Close(); - Templ[0] = 0; - } - - void Open(const TString& dirName) { - int ret; - if (ret = MakeSorterTempl(Templ, dirName.data())) { - Templ[0] = 0; - ythrow yexception() << ErrorMessage(ret, Name + " sorter: bad tempdir", dirName); - } - if ((ret = TBase::Open(Templ, PageSize, Pages, PagesOrBytes))) - ythrow yexception() << ErrorMessage(ret, Name + " sorter: open error, temp dir", Templ); - } - - void Sort(bool direct = false) { - int ret = TBase::Sort(Memory, 1000, direct); - if (ret) - ythrow yexception() << ErrorMessage(ret, Name + " sorter: sort error, temp dir", Templ, TVal::RecordSig); - } - - void SortToFile(const TString& name) { - int ret = TBase::SortToFile(name.data(), Memory); - if (ret) - ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToFile", name, TVal::RecordSig); - } - - void SortToStream(TAutoPtr<IOutputStream> output) { - int ret = TBase::SortToStream(output, Memory); - if (ret) - ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToStream", "", TVal::RecordSig); - } - - void Close() { - int ret1 = TBase::GetError(); - int ret2 = TBase::Close(); - if (Templ[0]) { - *strrchr(Templ, GetDirectorySeparator()) = 0; - RemoveDirWithContents(Templ); - Templ[0] = 0; - } - if (ret1) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret1, Name + "sorter: error before closing"); - if (ret2) - if (!std::uncaught_exception()) - ythrow yexception() << ErrorMessage(ret2, Name + "sorter: error while closing"); - } - - int Sort(size_t memory, int maxportions, bool direct = false) { - return TBase::Sort(memory, maxportions, direct); - } - - const char* GetName() const { - return Name.data(); - } - - using TBase::GetPageSize; - using TBase::GetPages; - using TBase::Next; - using TBase::NextPortion; - using TBase::Push; - using TBase::PushWithExtInfo; - using TBase::UseSegmentSorter; - -protected: - TString Name; - size_t Memory, PageSize, Pages; - int PagesOrBytes; - char Templ[FILENAME_MAX]; -}; - -template <typename TSorter> -class TSorterArray { -public: - typedef TSorter TDatSorter; - -public: - TSorterArray(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : Name(name) - , Memory(memory) - , PageSize(pagesize) - , Pages(pages) - , PagesOrBytes(pagesOrBytes) - , NumSorters(0) - , Sorters(nullptr) - { - } - - ~TSorterArray() { - for (int i = 0; i < NumSorters; ++i) { - Sorters[i].Close(); - Sorters[i].~TSorter(); - } - free(Sorters); - Sorters = nullptr; - NumSorters = 0; - } - - TSorter& operator[](size_t pos) { - return Sorters[pos]; - } - - void Open(int n, const TString& fname, size_t memory = 0) { - if (!(Sorters = (TSorter*)malloc(n * sizeof(TSorter)))) - ythrow yexception() << "can't alloc \"" << fname << "\" sorter array: " << LastSystemErrorText(); - NumSorters = n; - char temp[FILENAME_MAX]; - if (memory) - Memory = memory; - for (int i = 0; i < NumSorters; ++i) { - sprintf(temp, "%s[%d]", Name.data(), i); - new (Sorters + i) TSorter(temp, Memory, PageSize, Pages, PagesOrBytes); - } - for (int i = 0; i < NumSorters; ++i) - Sorters[i].Open(fname); - } - - void Close() { - for (int i = 0; i < NumSorters; ++i) - Sorters[i].Close(); - } - - const char* GetName() const { - return Name.data(); - } - -protected: - TString Name; - size_t Memory, PageSize, Pages; - int PagesOrBytes, NumSorters; - TSorter* Sorters; -}; - -template <typename TVal, template <typename T> class TCompare, typename TSieve = TFakeSieve<TVal>> -class TDatSorterArray: public TSorterArray<TDatSorter<TVal, TCompare, TSieve>> { -public: - TDatSorterArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : TSorterArray<TDatSorter<TVal, TCompare, TSieve>>(name, memory, pagesize, pages, pagesOrBytes) - { - } -}; - -template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression, - typename TSieve = TFakeSieve<TVal>, typename TPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> -class TDatSorterMemo: public TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> { - typedef TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> TSorter; - -public: - TOutDatFile<TVal> Memo; - TString Home; - bool OpenReq; - bool Opened; - bool UseDirectWrite; - -public: - TDatSorterMemo(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : TSorter(name, memory, pagesize, pages, pagesOrBytes) - , Memo(name, pagesize, memory, 0) - { - OpenReq = false; - Opened = false; - UseDirectWrite = false; - } - - void Open(const TString& home) { - OpenReq = true; - // TSorter::Open(home); - Home = home; - Memo.Open(nullptr); - Memo.Freeze(); - } - - void Reopen(const char* home) { - Close(); - Open(home); - } - - void Open() { - if (!OpenReq) { - OpenReq = true; - Memo.Open(nullptr); - Memo.Freeze(); - } - } - - void OpenIfNeeded() { - if (OpenReq && !Opened) { - if (!Home) - ythrow yexception() << "Temp directory not specified, call Open(char*) first : " << TSorter::Name; - TSorter::Open(Home); - Opened = true; - } - } - - TVal* Reserve(size_t len) { - if (TExtInfoType<TVal>::Exists) - return ReserveWithExt(len, 0); - - TVal* u = Memo.Reserve(len); - if (!u) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Freeze(); - u = Memo.Reserve(len); - } - TSorter::PushWithExtInfo(u); - return u; - } - - TVal* ReserveWithExt(size_t len, size_t extSize) { - size_t fullLen = len + len_long((i64)extSize) + extSize; - TVal* u = Memo.Reserve(fullLen); - if (!u) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Freeze(); - u = Memo.Reserve(fullLen); - if (!u) { - if (fullLen > Memo.GetPageSize()) { - ythrow yexception() << "Size of element and " << len << " size of extInfo " << extSize - << " is larger than page size " << Memo.GetPageSize(); - } - ythrow yexception() << "going to insert a null pointer. Bad."; - } - } - out_long((i64)extSize, (char*)u + len); - TSorter::PushWithExtInfo(u); - return u; - } - - char* GetReservedExt(TVal* rec, size_t len, size_t extSize) { - return (char*)rec + len + len_long((i64)extSize); - } - - const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { - const TVal* u = Memo.Push(v, extInfo); - if (!u) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Freeze(); - u = Memo.Push(v, extInfo); - if (!u) { - if (SizeOf(v) > Memo.GetPageSize()) { - ythrow yexception() << "Size of element " << SizeOf(v) - << " is larger than page size " << Memo.GetPageSize(); - } - ythrow yexception() << "going to insert a null pointer. Bad."; - } - } - TSorter::PushWithExtInfo(u); - return u; - } - - const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { - const TVal* u = Memo.Push(v, extInfoRaw, extLen); - if (!u) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Freeze(); - u = Memo.Push(v, extInfoRaw, extLen); - if (!u) { - if (SizeOf(v) > Memo.GetPageSize()) { - ythrow yexception() << "Size of element " << SizeOf(v) - << " is larger than page size " << Memo.GetPageSize(); - } - ythrow yexception() << "going to insert a null pointer. Bad.."; - } - } - TSorter::PushWithExtInfo(u); - return u; - } - - const TVal* PushWithExtInfo(const TVal* v) { - const TVal* u = Memo.PushWithExtInfo(v); - if (!u) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Freeze(); - u = Memo.PushWithExtInfo(v); - if (!u) { - if (SizeOf(v) > Memo.GetPageSize()) { - ythrow yexception() << "Size of element " << SizeOf(v) - << " is larger than page size " << Memo.GetPageSize(); - } - ythrow yexception() << "going to insert a null pointer. Bad..."; - } - } - TSorter::PushWithExtInfo(u); - return u; - } - - void Sort(bool direct = false) { - if (Opened) { - TSorter::NextPortion(UseDirectWrite); - Memo.Close(); - OpenReq = false; - TSorter::Sort(direct); - } else { - TSorter::SortPortion(); - } - } - - const TVal* Next() { - return Opened ? TSorter::Next() : TSorter::Nextp(); - } - - bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { - return NMicroBDB::GetExtInfo(Current(), extInfo); - } - - const ui8* GetExtInfoRaw(size_t* len) const { - return NMicroBDB::GetExtInfoRaw(Current(), len); - } - - const TVal* Current() const { - return Opened ? TSorter::Current() : TSorter::Currentp(); - } - - int NextPortion() { - OpenIfNeeded(); - return TSorter::NextPortion(UseDirectWrite); - } - - void SortToFile(const char* name) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Close(); - OpenReq = false; - TSorter::SortToFile(name); - } - - void SortToStream(TAutoPtr<IOutputStream> output) { - OpenIfNeeded(); - TSorter::NextPortion(UseDirectWrite); - Memo.Close(); - OpenReq = false; - TSorter::SortToStream(output); - } - - template <typename TKey, typename TOutCompress> - void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) { - Sort(); - TOutDirectFile<TVal, TKey, TOutCompress> out(TSorter::Name, TSorter::PageSize, TSorter::Pages, ipagesize, ipages, TSorter::PagesOrBytes); - out.Open(name); - while (const TVal* rec = Next()) - out.PushWithExtInfo(rec); - out.Close(); - } - - template <typename TKey> - void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) { - SortToDirectFile<TKey, TCompress>(name, ipagesize, ipages); - } - - void CloseSorter() { - if (Opened) - TSorter::Close(); - else - TSorter::Closep(); - Memo.Freeze(); - Opened = false; - } - - void Close() { - if (Opened) - TSorter::Close(); - else - TSorter::Closep(); - Memo.Close(); - OpenReq = false; - Opened = false; - } - - int SavePortions(const char* mask) { - return TSorter::SavePortions(mask, UseDirectWrite); - } - -public: - using TSorter::RestorePortions; -}; - -template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression, - typename TSieve = TFakeSieve<TVal>, class TPageFile = TOutputPageFile, class TFileTypes = TDefInterFileTypes> -class TDatSorterMemoArray: public TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> { -public: - typedef TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> TBase; - - TDatSorterMemoArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) - : TBase(name, memory, pagesize, pages, pagesOrBytes) - { - } -}; - -#if defined(_MSC_VER) -#pragma warning(pop) -#endif diff --git a/library/cpp/microbdb/sorter.h b/library/cpp/microbdb/sorter.h deleted file mode 100644 index b2e7390377..0000000000 --- a/library/cpp/microbdb/sorter.h +++ /dev/null @@ -1,677 +0,0 @@ -#pragma once - -#include <util/ysaveload.h> -#include <util/generic/algorithm.h> -#include <contrib/libs/libc_compat/include/link/link.h> - -#include "header.h" -#include "heap.h" -#include "extinfo.h" -#include "input.h" -#include "output.h" - -#ifdef TEST_MERGE -#define MBDB_SORT_FUN ::StableSort -#else -#define MBDB_SORT_FUN ::Sort -#endif - -template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile, typename TFileTypes> -class TDatSorterImpl; - -template <class TVal> -struct TFakeSieve { - static inline int Sieve(TVal*, const TVal*) noexcept { - return 0; - } -}; - -template <class TSieve> -struct TIsSieveFake { - static const bool Result = false; -}; - -template <class T> -struct TIsSieveFake<TFakeSieve<T>> { - static const bool Result = true; -}; - -class TDefInterFileTypes { -public: - typedef TOutputPageFile TOutPageFile; - typedef TInputPageFile TInPageFile; -}; - -//class TCompressedInterFileTypes; - -template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> -class TDatSorterImplBase: protected THeapIter<TVal, TInDatFileImpl<TVal, TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>>>, TCompare> { - typedef TOutputRecordIterator<TVal, TOutputPageIterator<typename TFileTypes::TOutPageFile>, TFakeIndexer, TCompress> TTmpRecIter; - typedef TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>> TInTmpRecIter; - -public: - typedef TOutDatFileImpl<TVal, TTmpRecIter> TTmpOut; - typedef TInDatFileImpl<TVal, TInTmpRecIter> TTmpIn; - - typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TOutPageFile>, TFakeIndexer, TCompress>> TOut; - typedef THeapIter<TVal, TTmpIn, TCompare> TMyHeap; - typedef TVector<const TVal*> TMyVector; - typedef typename TMyVector::iterator TMyIterator; - - class IPortionSorter { - public: - virtual ~IPortionSorter() { - } - - virtual void Sort(TMyVector&, TTmpOut*) = 0; - }; - - class TDefaultSorter: public IPortionSorter { - public: - void Sort(TMyVector& vector, TTmpOut* out) override { - MBDB_SORT_FUN(vector.begin(), vector.end(), TCompare()); - - const typename TMyVector::const_iterator - end = (TIsSieveFake<TSieve>::Result) ? vector.end() : TDatSorterImplBase::SieveRange(vector.begin(), vector.end()); - - for (typename TMyVector::const_iterator it = vector.begin(); it != end; ++it) { - out->PushWithExtInfo(*it); - } - } - }; - - class TSegmentedSorter: public IPortionSorter { - class TAdaptor { - typedef typename TMyVector::const_iterator TConstIterator; - - public: - TAdaptor(TConstIterator b, TConstIterator e) - : Curr_(b) - , End_(e) - { - --Curr_; - } - - inline const TVal* Current() const { - return *Curr_; - } - - inline const TVal* Next() { - ++Curr_; - - if (Curr_ == End_) { - return nullptr; - } - - return *Curr_; - } - - private: - TConstIterator Curr_; - TConstIterator End_; - }; - - typedef THeapIter<TVal, TAdaptor, TCompare> TPortionsHeap; - - public: - void Sort(TMyVector& vector, TTmpOut* out) override { - TVector<TAdaptor> bounds; - typename TMyVector::iterator - it = vector.begin(); - const size_t portions = Max<size_t>(1, (vector.size() * sizeof(TVal)) / (4 << 20)); - const size_t step = vector.size() / portions; - - // Sort segments - while (it != vector.end()) { - const typename TMyVector::iterator - end = Min(it + step, vector.end()); - - MBDB_SORT_FUN(it, end, TCompare()); - - bounds.push_back(TAdaptor(it, end)); - - it = end; - } - - // - // Merge result - // - - TPortionsHeap heap(bounds); - - if (TIsSieveFake<TSieve>::Result) { - while (const TVal* val = heap.Next()) { - out->PushWithExtInfo(val); - } - } else { - const TVal* val = heap.Next(); - const TVal* prev = out->PushWithExtInfo(val); - - for (val = heap.Next(); val && prev; val = heap.Next()) { - if (TSieve::Sieve((TVal*)prev, val)) { - continue; - } - - prev = out->PushWithExtInfo(val); - } - - if (prev) { - TSieve::Sieve((TVal*)prev, prev); - } - } - } - }; - -public: - TDatSorterImplBase() - : Sorter(new TDefaultSorter) - { - InFiles = nullptr; - TempBuf = nullptr; - Ptr = Vector.end(); - Cur = nullptr; - Portions = CPortions = Error = 0; - } - - ~TDatSorterImplBase() { - Close(); - } - - int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) { - Portions = CPortions = Error = 0; - TempBuf = strdup(templ); - Pagesize = pagesize; - if (pagesOrBytes) - Pages = pages; - else - Pages = pages / pagesize; - Pages = Max(1, Pages); - return 0; - } - - void Push(const TVal* v) { - // Serialized extInfo must follow a record being pushed, therefore, to avoid - // unintentional misusage (as if when you are adding TExtInfo in your record - // type: you may forget to check your sorting routines and get a segfault as - // a result). - // PushWithExtInfo(v) should be called on records with extInfo. - static_assert(!TExtInfoType<TVal>::Exists, "expect !TExtInfoType<TVal>::Exists"); - - Vector.push_back(v); - } - - void PushWithExtInfo(const TVal* v) { - Vector.push_back(v); - } - - int SortPortion() { - Ptr = Vector.end(); - Cur = nullptr; - if (!Vector.size() || Error) - return Error; - - MBDB_SORT_FUN(Vector.begin(), Vector.end(), TCompare()); - - if (!TIsSieveFake<TSieve>::Result) { - const typename TMyVector::iterator - end = SieveRange(Vector.begin(), Vector.end()); - - Vector.resize(end - Vector.begin()); - } - - Ptr = Vector.begin(); - Cur = nullptr; - return 0; - } - - const TVal* Nextp() { - Cur = Ptr == Vector.end() ? nullptr : *Ptr++; - return Cur; - } - - const TVal* Currentp() const { - return Cur; - } - - void Closep() { - Vector.clear(); - Ptr = Vector.end(); - Cur = nullptr; - } - - int NextPortion(bool direct = false) { - if (!Vector.size() || Error) - return Error; - - TTmpOut out; - int ret, ret1; - char fname[FILENAME_MAX]; - - snprintf(fname, sizeof(fname), TempBuf, Portions++); - if ((ret = out.Open(fname, Pagesize, Pages, 1, direct))) - return Error = ret; - - Sorter->Sort(Vector, &out); - - Vector.erase(Vector.begin(), Vector.end()); - ret = out.GetError(); - ret1 = out.Close(); - Error = Error ? Error : ret ? ret : ret1; - if (Error) - unlink(fname); - return Error; - } - - int SavePortions(const char* mask, bool direct = false) { - char srcname[PATH_MAX], dstname[PATH_MAX]; - if (Vector.size()) - NextPortion(direct); - for (int i = 0; i < Portions; i++) { - char num[10]; - sprintf(num, "%i", i); - snprintf(srcname, sizeof(srcname), TempBuf, i); - snprintf(dstname, sizeof(dstname), mask, num); - int res = rename(srcname, dstname); - if (res) - return res; - } - snprintf(dstname, sizeof(dstname), mask, "count"); - TOFStream fcount(dstname); - Save(&fcount, Portions); - fcount.Finish(); - return 0; - } - - int RestorePortions(const char* mask) { - char srcname[PATH_MAX], dstname[PATH_MAX]; - snprintf(srcname, sizeof(srcname), mask, "count"); - TIFStream fcount(srcname); - Load(&fcount, Portions); - for (int i = 0; i < Portions; i++) { - char num[10]; - sprintf(num, "%i", i); - snprintf(dstname, sizeof(dstname), TempBuf, i); - snprintf(srcname, sizeof(srcname), mask, num); - unlink(dstname); - int res = link(srcname, dstname); - if (res) - return res; - } - return 0; - } - - int RestorePortions(const char* mask, ui32 count) { - char srcname[PATH_MAX], dstname[PATH_MAX]; - ui32 portions; - TVector<ui32> counts; - for (ui32 j = 0; j < count; j++) { - snprintf(srcname, sizeof(srcname), mask, j, "count"); - TIFStream fcount(srcname); - Load(&fcount, portions); - counts.push_back(portions); - Portions += portions; - } - ui32 p = 0; - for (ui32 j = 0; j < count; j++) { - int cnt = counts[j]; - for (int i = 0; i < cnt; i++, p++) { - char num[10]; - sprintf(num, "%i", i); - snprintf(dstname, sizeof(dstname), TempBuf, p); - snprintf(srcname, sizeof(srcname), mask, j, num); - unlink(dstname); - int res = link(srcname, dstname); - if (res) { - fprintf(stderr, "Can not link %s to %s\n", srcname, dstname); - return res; - } - } - } - return 0; - } - - int Sort(size_t memory, int maxportions = 1000, bool direct = false) { - int ret, end, beg, i; - char fname[FILENAME_MAX]; - - if (Vector.size()) - NextPortion(); - - if (Error) - return Error; - if (!Portions) { - TMyHeap::Init(&DummyFile, 1); // closed file - HPages = 1; - return 0; - } - - Optimize(memory, maxportions); - if (!(InFiles = new TTmpIn[MPortions])) - return MBDB_NO_MEMORY; - - for (beg = 0; beg < Portions && !Error; beg = end) { - end = (int)Min(beg + FPortions, Portions); - for (i = beg; i < end && !Error; i++) { - snprintf(fname, sizeof(fname), TempBuf, i); - if ((ret = InFiles[i - beg].Open(fname, HPages, 1, nullptr, direct))) - Error = Error ? Error : ret; - } - if (Error) - return Error; - TMyHeap::Init(InFiles, end - beg); - if (end != Portions) { - TTmpOut out; - const TVal* v; - snprintf(fname, sizeof(fname), TempBuf, Portions++); - if ((ret = out.Open(fname, Pagesize, HPages))) - return Error = Error ? Error : ret; - while ((v = TMyHeap::Next())) - out.PushWithExtInfo(v); - ret = out.GetError(); - Error = Error ? Error : ret; - ret = out.Close(); - Error = Error ? Error : ret; - for (i = beg; i < end; i++) { - ret = InFiles[i - beg].Close(); - Error = Error ? Error : ret; - snprintf(fname, sizeof(fname), TempBuf, CPortions++); - unlink(fname); - } - } - FPortions = MPortions; - } - return Error; - } - - int Close() { - char fname[FILENAME_MAX]; - delete[] InFiles; - InFiles = nullptr; - Closep(); - for (int i = CPortions; i < Portions; i++) { - snprintf(fname, sizeof(fname), TempBuf, i); - unlink(fname); - } - CPortions = Portions = 0; - free(TempBuf); - TempBuf = nullptr; - return Error; - } - - void UseSegmentSorter() { - Sorter.Reset(new TSegmentedSorter); - } - - inline int GetError() const { - return Error; - } - - inline int GetPages() const { - return Pages; - } - - inline int GetPageSize() const { - return Pagesize; - } - -private: - static TMyIterator SieveRange(const TMyIterator begin, const TMyIterator end) { - TMyIterator it = begin; - TMyIterator prev = begin; - - for (++it; it != end; ++it) { - if (TSieve::Sieve((TVal*)*prev, *it)) { - continue; - } - - ++prev; - - if (it != prev) { - *prev = *it; - } - } - - TSieve::Sieve((TVal*)*prev, *prev); - - return ++prev; - } - -protected: - void Optimize(size_t memory, int maxportions, size_t fbufmax = 256u << 20) { - maxportions = (int)Min((size_t)maxportions, memory / Pagesize) - 1; - size_t maxpages = Max((size_t)1u, fbufmax / Pagesize); - - if (maxportions <= 2) { - FPortions = MPortions = 2; - HPages = 1; - return; - } - if (maxportions >= Portions) { - FPortions = MPortions = Portions; - HPages = (int)Min(memory / ((Portions + 1) * Pagesize), maxpages); - return; - } - if (((Portions + maxportions - 1) / maxportions) <= maxportions) { - while (((Portions + maxportions - 1) / maxportions) <= maxportions) - --maxportions; - MPortions = ++maxportions; - int total = ((Portions + maxportions - 1) / maxportions) + Portions; - FPortions = (total % maxportions) ? (total % maxportions) : MPortions; - HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages); - return; - } - FPortions = MPortions = maxportions; - HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages); - } - - TMyVector Vector; - typename TMyVector::iterator Ptr; - const TVal* Cur; - TTmpIn *InFiles, DummyFile; - char* TempBuf; - int Portions, CPortions, Pagesize, Pages, Error; - int FPortions, MPortions, HPages; - THolder<IPortionSorter> Sorter; -}; - -template <class TVal, class TCompare, typename TCompress> -class TDatSorterImpl<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> - : public TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> { - typedef TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> TBase; - -public: - int SortToFile(const char* name, size_t memory, int maxportions = 1000) { - int ret = TBase::Sort(memory, maxportions); - if (ret) - return ret; - typename TBase::TOut out; - if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages))) - return ret; - const TVal* rec; - while ((rec = Next())) - out.PushWithExtInfo(rec); - if ((ret = out.GetError())) - return ret; - if ((ret = out.Close())) - return ret; - if ((ret = TBase::Close())) - return ret; - return 0; - } - - int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) { - int ret = TBase::Sort(memory, maxportions); - if (ret) - return ret; - typename TBase::TOut out; - if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages))) - return ret; - const TVal* rec; - while ((rec = Next())) - out.PushWithExtInfo(rec); - if ((ret = out.GetError())) - return ret; - if ((ret = out.Close())) - return ret; - if ((ret = TBase::Close())) - return ret; - return 0; - } - - const TVal* Next() { - return TBase::TMyHeap::Next(); - } - - const TVal* Current() const { - return TBase::TMyHeap::Current(); - } - - bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { - return TBase::TMyHeap::GetExtInfo(extInfo); - } - - const ui8* GetExtInfoRaw(size_t* len) const { - return TBase::TMyHeap::GetExtInfoRaw(len); - } -}; - -template <class TVal, class TCompare, typename TCompress, typename TSieve, - typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> -class TDatSorterImpl: public TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> { - typedef TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> TBase; - -public: - TDatSorterImpl() - : Cur(nullptr) - , Prev(nullptr) - { - } - - int SortToFile(const char* name, size_t memory, int maxportions = 1000) { - int ret = Sort(memory, maxportions); - if (ret) - return ret; - typename TBase::TOut out; - if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages))) - return ret; - const TVal* rec; - while ((rec = Next())) - out.PushWithExtInfo(rec); - if ((ret = out.GetError())) - return ret; - if ((ret = out.Close())) - return ret; - if ((ret = TBase::Close())) - return ret; - return 0; - } - - int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) { - int ret = Sort(memory, maxportions); - if (ret) - return ret; - typename TBase::TOut out; - if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages))) - return ret; - const TVal* rec; - while ((rec = Next())) - out.PushWithExtInfo(rec); - if ((ret = out.GetError())) - return ret; - if ((ret = out.Close())) - return ret; - if ((ret = TBase::Close())) - return ret; - return 0; - } - - int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) { - int res = TBase::Open(templ, pagesize, pages, pagesOrBytes); - Prev = nullptr; - Cur = nullptr; - return res; - } - - int Sort(size_t memory, int maxportions = 1000, bool direct = false) { - int res = TBase::Sort(memory, maxportions, direct); - if (!res) { - const TVal* rec = TBase::TMyHeap::Next(); - if (rec) { - size_t els, es; - size_t sz = NMicroBDB::SizeOfExt(rec, &els, &es); - sz += els + es; - if (!TExtInfoType<TVal>::Exists) - Cur = (TVal*)malloc(sizeof(TVal)); - else - Cur = (TVal*)malloc(TBase::Pagesize); - memcpy(Cur, rec, sz); - } - } - return res; - } - - // Prev = last returned - // Cur = current accumlating with TSieve - - const TVal* Next() { - if (!Cur) { - if (Prev) { - free(Prev); - Prev = nullptr; - } - return nullptr; - } - const TVal* rec; - - if (TIsSieveFake<TSieve>::Result) - rec = TBase::TMyHeap::Next(); - else { - do { - rec = TBase::TMyHeap::Next(); - } while (rec && TSieve::Sieve((TVal*)Cur, rec)); - } - - if (!Prev) { - if (!TExtInfoType<TVal>::Exists) - Prev = (TVal*)malloc(sizeof(TVal)); - else - Prev = (TVal*)malloc(TBase::Pagesize); - } - size_t els, es; - size_t sz = NMicroBDB::SizeOfExt(Cur, &els, &es); - sz += els + es; - memcpy(Prev, Cur, sz); - - if (rec) { - sz = NMicroBDB::SizeOfExt(rec, &els, &es); - sz += els + es; - memcpy(Cur, rec, sz); - } else { - TSieve::Sieve((TVal*)Cur, Cur); - free(Cur); - Cur = nullptr; - } - return Prev; - } - - const TVal* Current() const { - return Prev; - } - - int Close() { - int res = TBase::Close(); - if (Prev) { - free(Prev); - Prev = nullptr; - } - if (Cur) { - free(Cur); - Cur = nullptr; - } - return res; - } - -protected: - TVal* Cur; - TVal* Prev; -}; diff --git a/library/cpp/microbdb/sorterdef.h b/library/cpp/microbdb/sorterdef.h deleted file mode 100644 index 8834b5fff8..0000000000 --- a/library/cpp/microbdb/sorterdef.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#define MAKESORTERTMPL(TRecord, MemberFunc) \ - template <typename T> \ - struct MemberFunc; \ - template <> \ - struct MemberFunc<TRecord> { \ - bool operator()(const TRecord* l, const TRecord* r) { \ - return TRecord ::MemberFunc(l, r) < 0; \ - } \ - int operator()(const TRecord* l, const TRecord* r, int) { \ - return TRecord ::MemberFunc(l, r); \ - } \ - } - -template <typename T> -static inline int compare(const T& a, const T& b) { - return (a < b) ? -1 : (a > b); -} diff --git a/library/cpp/microbdb/utility.h b/library/cpp/microbdb/utility.h deleted file mode 100644 index 5c86061bca..0000000000 --- a/library/cpp/microbdb/utility.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include "microbdb.h" - -template <class TRecord, template <class T> class TCompare> -int SortData(const TFile& ifile, const TFile& ofile, const TDatMetaPage* meta, size_t memory, const char* tmpDir = nullptr) { - char templ[FILENAME_MAX]; - TInDatFileImpl<TRecord> datin; - TOutDatFileImpl<TRecord> datout; - TDatSorterImpl<TRecord, TCompare<TRecord>, TFakeCompression, TFakeSieve<TRecord>> sorter; - const TRecord* u; - int ret; - - const size_t minMemory = (2u << 20); - memory = Max(memory, minMemory + minMemory / 2); - if (datin.Open(ifile, meta, memory - minMemory, 0)) - err(1, "can't read input file"); - - size_t outpages = Max((size_t)2u, minMemory / datin.GetPageSize()); - memory -= outpages * datin.GetPageSize(); - - if (ret = MakeSorterTempl(templ, tmpDir)) - err(1, "can't create tempdir in \"%s\"; error: %d\n", templ, ret); - - if (sorter.Open(templ, datin.GetPageSize(), outpages)) { - *strrchr(templ, LOCSLASH_C) = 0; - RemoveDirWithContents(templ); - err(1, "can't open sorter"); - } - - while (1) { - datin.Freeze(); - while ((u = datin.Next())) - sorter.PushWithExtInfo(u); - sorter.NextPortion(); - if (datin.GetError() || datin.IsEof()) - break; - } - - if (datin.GetError()) { - *strrchr(templ, LOCSLASH_C) = 0; - RemoveDirWithContents(templ); - err(1, "in data file error %d", datin.GetError()); - } - if (datin.Close()) { - *strrchr(templ, LOCSLASH_C) = 0; - RemoveDirWithContents(templ); - err(1, "can't close in data file"); - } - - sorter.Sort(memory); - - if (datout.Open(ofile, datin.GetPageSize(), outpages)) { - *strrchr(templ, LOCSLASH_C) = 0; - RemoveDirWithContents(templ); - err(1, "can't write out file"); - } - - while ((u = sorter.Next())) - datout.PushWithExtInfo(u); - - if (sorter.GetError()) - err(1, "sorter error %d", sorter.GetError()); - if (sorter.Close()) - err(1, "can't close sorter"); - - *strrchr(templ, LOCSLASH_C) = 0; - RemoveDirWithContents(templ); - - if (datout.GetError()) - err(1, "out data file error %d", datout.GetError()); - if (datout.Close()) - err(1, "can't close out data file"); - return 0; -} diff --git a/library/cpp/microbdb/wrappers.h b/library/cpp/microbdb/wrappers.h deleted file mode 100644 index 38eb8edebc..0000000000 --- a/library/cpp/microbdb/wrappers.h +++ /dev/null @@ -1,637 +0,0 @@ -#pragma once - -#include "microbdb.h" - -#define MAKEFILTERTMPL(TRecord, MemberFunc, NS) \ - template <typename T> \ - struct MemberFunc; \ - template <> \ - struct MemberFunc<TRecord> { \ - bool operator()(const TRecord* r) { \ - return NS::MemberFunc(r); \ - } \ - } - -#define MAKEJOINTMPL(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \ - template <typename A, typename B> \ - struct MemberFunc; \ - template <> \ - struct MemberFunc<TRecordA, TRecordB> { \ - int operator()(const TRecordA* l, const TRecordB* r) { \ - return NS::MemberFunc(l, r); \ - } \ - }; \ - typedef TMergeRec<TRecordA, TRecordB> TMergeType - -#define MAKEJOINTMPL2(TRecordA, TRecordB, MemberFunc, StructName, TMergeType) \ - template <typename A, typename B> \ - struct StructName; \ - template <> \ - struct StructName<TRecordA, TRecordB> { \ - int operator()(const TRecordA* l, const TRecordB* r) { \ - return MemberFunc(l, r); \ - } \ - }; \ - typedef TMergeRec<TRecordA, TRecordB> TMergeType - -#define MAKEJOINTMPLLEFT(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \ - template <typename A, typename B> \ - struct MemberFunc; \ - template <> \ - struct MemberFunc<TRecordA, TRecordB> { \ - int operator()(const TRecordA* l, const TRecordB* r) { \ - return NS::MemberFunc(l->RecA, r); \ - } \ - }; \ - typedef TMergeRec<TRecordA, TRecordB> TMergeType - -template <class TRec> -class IDatNextSource { -public: - virtual const TRec* Next() = 0; - virtual void Work() { - } -}; - -template <class TRec> -class IDatNextReceiver { -public: - IDatNextReceiver(IDatNextSource<TRec>& source) - : Source(source) - { - } - - virtual void Work() { - Source.Work(); - } - -protected: - IDatNextSource<TRec>& Source; -}; - -template <class TInRec, class TOutRec> -class IDatNextChannel: public IDatNextReceiver<TInRec>, public IDatNextSource<TOutRec> { -public: - IDatNextChannel(IDatNextSource<TInRec>& source) - : IDatNextReceiver<TInRec>(source) - { - } - - virtual void Work() { - IDatNextReceiver<TInRec>::Work(); - } -}; - -class IDatWorker { -public: - virtual void Work() = 0; -}; - -template <class TRec> -class IDatPushReceiver { -public: - virtual void Push(const TRec* rec) = 0; - virtual void Work() = 0; -}; - -template <class TRec> -class IDatPushSource { -public: - IDatPushSource(IDatPushReceiver<TRec>& receiver) - : Receiver(receiver) - { - } - - virtual void Work() { - Receiver.Work(); - } - -protected: - IDatPushReceiver<TRec>& Receiver; -}; - -template <class TInRec, class TOutRec> -class IDatPushChannel: public IDatPushReceiver<TInRec>, public IDatPushSource<TOutRec> { -public: - IDatPushChannel(IDatPushReceiver<TOutRec>& receiver) - : IDatPushSource<TOutRec>(receiver) - { - } - - virtual void Work() { - IDatPushSource<TOutRec>::Work(); - } -}; - -template <class TRec> -class IDatNextToPush: public IDatNextReceiver<TRec>, public IDatPushSource<TRec> { - typedef IDatNextReceiver<TRec> TNextReceiver; - typedef IDatPushSource<TRec> TPushSource; - -public: - IDatNextToPush(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver) - : TNextReceiver(source) - , TPushSource(receiver) - { - } - - virtual void Work() { - const TRec* rec; - while (rec = TNextReceiver::Source.Next()) - TPushSource::Receiver.Push(rec); - TPushSource::Work(); - TNextReceiver::Work(); - } -}; - -template <class TRec> -class TDatNextPNSplitter: public IDatNextReceiver<TRec>, public IDatNextSource<TRec>, public IDatPushSource<TRec> { -public: - TDatNextPNSplitter(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver) - : IDatNextReceiver<TRec>(source) - , IDatNextSource<TRec>() - , IDatPushSource<TRec>(receiver) - { - } - - const TRec* Next() { - const TRec* rec = IDatNextReceiver<TRec>::Source.Next(); - if (rec) { - IDatPushSource<TRec>::Receiver.Push(rec); - return rec; - } else { - return 0; - } - } - - virtual void Work() { - IDatNextReceiver<TRec>::Work(); - IDatPushSource<TRec>::Work(); - } -}; - -template <class TRec, class TOutRecA = TRec, class TOutRecB = TRec> -class TDatPushPPSplitter: public IDatPushReceiver<TRec>, public IDatPushSource<TOutRecA>, public IDatPushSource<TOutRecB> { -public: - TDatPushPPSplitter(IDatPushReceiver<TOutRecA>& receiverA, IDatPushReceiver<TOutRecB>& receiverB) - : IDatPushSource<TOutRecA>(receiverA) - , IDatPushSource<TOutRecB>(receiverB) - { - } - - void Push(const TRec* rec) { - IDatPushSource<TOutRecA>::Receiver.Push(rec); - IDatPushSource<TOutRecB>::Receiver.Push(rec); - } - - void Work() { - IDatPushSource<TOutRecA>::Work(); - IDatPushSource<TOutRecB>::Work(); - } -}; - -template <class TRec> -class TFastInDatFile: public TInDatFile<TRec>, public IDatNextSource<TRec> { -public: - typedef TInDatFile<TRec> Base; - - TFastInDatFile(const char* name, bool open = true, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0) - : TInDatFile<TRec>(name, pages, pagesOrBytes) - , FileName(name) - { - if (open) - Base::Open(name); - } - - void Open() { - Base::Open(FileName); - } - - template <class TPassRec> - bool PassToUid(const TRec* inrec, const TPassRec* torec) { - inrec = Base::Current(); - while (inrec && CompareUids(inrec, torec) < 0) - inrec = Base::Next(); - return (inrec && CompareUids(inrec, torec) == 0); - } - - void Work() { - Base::Close(); - } - - const TRec* Next() { - return Base::Next(); - } - -private: - TString FileName; -}; - -template <class TRec> -class TPushOutDatFile: public TOutDatFile<TRec>, public IDatPushReceiver<TRec> { -public: - typedef TOutDatFile<TRec> Base; - - TPushOutDatFile(const char* name, bool open = true) - : Base(name, dbcfg::pg_docuid, dbcfg::fbufsize, 0) - , FileName(name) - { - if (open) - Base::Open(name); - } - - void Open() { - Base::Open(~FileName); - } - - void Push(const TRec* rec) { - Base::Push(rec); - } - - void Work() { - Base::Close(); - } - -private: - TString FileName; -}; - -template <class TRec> -class TNextOutDatFile: public IDatNextToPush<TRec> { -public: - typedef IDatNextToPush<TRec> TBase; - - TNextOutDatFile(const char* name, IDatNextSource<TRec>& source, bool open = true) - : TBase(source, File) - , File(name, open) - { - } - - void Open() { - File.Open(); - } - -private: - TPushOutDatFile<TRec> File; -}; - -template <class TVal, template <typename T> class TCompare> -class TNextDatSorterMemo: public TDatSorterMemo<TVal, TCompare>, public IDatNextChannel<TVal, TVal> { - typedef TDatSorterMemo<TVal, TCompare> TImpl; - -public: - TNextDatSorterMemo(IDatNextSource<TVal>& source, const char* dir = dbcfg::fname_temp, const char* name = "yet another sorter", size_t memory = dbcfg::small_sorter_size, size_t pagesize = dbcfg::pg_docuid, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0) - : TImpl(name, memory, pagesize, pages, pagesOrBytes) - , IDatNextChannel<TVal, TVal>(source) - , Sorted(false) - { - TImpl::Open(dir); - } - - void Sort() { - const TVal* rec; - while (rec = IDatNextChannel<TVal, TVal>::Source.Next()) { - TImpl::Push(rec); - } - TImpl::Sort(); - Sorted = true; - } - - const TVal* Next() { - if (!Sorted) - Sort(); - return TImpl::Next(); - } - -private: - bool Sorted; - TString Dir; -}; - -template <class TInRec, class TOutRec> -class TDatConverter: public IDatNextChannel<TInRec, TOutRec> { -public: - TDatConverter(IDatNextSource<TInRec>& source) - : IDatNextChannel<TInRec, TOutRec>(source) - { - } - - virtual void Convert(const TInRec& inrec, TOutRec& outrec) { - outrec(inrec); - } - - const TOutRec* Next() { - const TInRec* rec = IDatNextChannel<TInRec, TOutRec>::Source.Next(); - if (!rec) - return 0; - Convert(*rec, CurrentRec); - return &CurrentRec; - } - -private: - TOutRec CurrentRec; -}; - -template <class TRecA, class TRecB> -class TMergeRec { -public: - const TRecA* RecA; - const TRecB* RecB; -}; - -enum NMergeTypes { - MT_JOIN = 0, - MT_ADD = 1, - MT_OVERWRITE = 2, - MT_TYPENUM -}; - -template <class TRecA, class TRecB, template <typename TA, typename TB> class TCompare> -class TNextDatMerger: public IDatNextReceiver<TRecA>, public IDatNextReceiver<TRecB>, public IDatNextSource<TMergeRec<TRecA, TRecB>> { -public: - TNextDatMerger(IDatNextSource<TRecA>& sourceA, IDatNextSource<TRecB>& sourceB, ui8 mergeType) - : IDatNextReceiver<TRecA>(sourceA) - , IDatNextReceiver<TRecB>(sourceB) - , MergeType(mergeType) - , MoveA(false) - , MoveB(false) - , NotInit(true) - { - } - - const TMergeRec<TRecA, TRecB>* Next() { - if (MoveA || NotInit) - SourceARec = IDatNextReceiver<TRecA>::Source.Next(); - if (MoveB || NotInit) - SourceBRec = IDatNextReceiver<TRecB>::Source.Next(); - NotInit = false; - - // Cout << "Next " << SourceARec->HostId << "\t" << SourceBRec->HostId << "\t" << TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) << "\t" << ::compare(SourceARec->HostId, SourceBRec->HostId) << "\t" << ::compare(1, 2) << "\t" << ::compare(2,1) << Endl; - if (MergeType == MT_ADD && SourceARec && (!SourceBRec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) { - MergeRec.RecA = SourceARec; - MergeRec.RecB = 0; - MoveA = true; - MoveB = false; - return &MergeRec; - } - - if (MergeType == MT_ADD && SourceBRec && (!SourceARec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) { - MergeRec.RecA = 0; - MergeRec.RecB = SourceBRec; - MoveA = false; - MoveB = true; - return &MergeRec; - } - - if (MergeType == MT_ADD && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) == 0) { - MergeRec.RecA = SourceARec; - MergeRec.RecB = SourceBRec; - MoveA = true; - MoveB = true; - return &MergeRec; - } - - while (MergeType == MT_JOIN && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) != 0) { - while (SourceARec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0) { - SourceARec = IDatNextReceiver<TRecA>::Source.Next(); - } - while (SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) > 0) { - SourceBRec = IDatNextReceiver<TRecB>::Source.Next(); - } - } - - if (MergeType == MT_JOIN && SourceARec && SourceBRec) { - MergeRec.RecA = SourceARec; - MergeRec.RecB = SourceBRec; - MoveA = true; - MoveB = true; - return &MergeRec; - } - - MergeRec.RecA = 0; - MergeRec.RecB = 0; - return 0; - } - - void Work() { - IDatNextReceiver<TRecA>::Source.Work(); - IDatNextReceiver<TRecB>::Source.Work(); - } - -private: - TMergeRec<TRecA, TRecB> MergeRec; - const TRecA* SourceARec; - const TRecB* SourceBRec; - ui8 MergeType; - bool MoveA; - bool MoveB; - bool NotInit; -}; - -/*template<class TRec, class TSource, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > -class TPushDatMerger { -public: - TPushDatMerger(TSource& source, TReceiver& receiver, ui8 mergeType) - : Source(source) - , Receiver(receiver) - , MergeType(mergeType) - { - } - - virtual void Init() { - SourceRec = Source.Next(); - } - - virtual void Push(const TRec* rec) { - while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) < 0) { - if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) - Receiver.Push(SourceRec); - SourceRec = Source.Next(); - } - - bool intersected = false; - while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) == 0) { - intersected = true; - if (MergeType == MT_ADD) - Receiver.Push(SourceRec); - SourceRec = Source.Next(); - } - - if (intersected && MergeType == MT_JOIN) - Receiver.Push(rec); - - if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) - Receiver.Push(rec); - } - - virtual void Term() { - if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) { - while (SourceRec) { - Receiver.Push(SourceRec); - SourceRec = Source.Next(); - } - } - } - -private: - TSource& Source; - const TRec* SourceRec; - TReceiver& Receiver; - ui8 MergeType; -};*/ - -/*template <class TRec, class TSourceA, class TSourceB, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > -class TNextDatMerger: public TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> { - typedef TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> TImpl; -public: - TNextDatMerger(TSourceA& sourceA, TSourceB& sourceB, TReceiver& receiver, ui8 mergeType) - : TImpl(sourceA, receiver, mergeType) - , SourceB(sourceB) - { - } - - virtual void Work() { - TImpl::Init(); - while (SourceBRec = SourceB.Next()) { - TImpl::Push(SourceBRec); - } - TImpl::Term(); - } -private: - TSourceB& SourceB; - const TRec* SourceBRec; -};*/ - -/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > -class TFilePushDatMerger: public TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> { - typedef TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl; -public: - TFilePushDatMerger(const char* name, TReceiver& receiver, ui8 mergeType) - : TImpl(SourceFile, receiver, mergeType) - , SourceFile(name) - { - } - - virtual void Push(const TRec* rec) { - TImpl::Push(rec); - } - - virtual void Term() { - TImpl::Term(); - } -private: - TFastInDatFile<TRec> SourceFile; -};*/ - -/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > -class TFileNextDatMerger: public TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> { - typedef TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl; -public: - TFileNextDatMerger(const char* sourceAname, const char* sourceBname, TReceiver& receiver, ui8 mergeType) - : TImpl(FileA, FileB, receiver, mergeType) - , FileA(sourceAname) - , FileB(sourceBname) - { - } - - virtual void Work() { - TImpl::Work(); - } -private: - TFastInDatFile<TRec> FileA; - TFastInDatFile<TRec> FileB; -};*/ - -template <class TRec, template <typename T> class TPredicate> -class TDatNextFilter: public IDatNextChannel<TRec, TRec> { -public: - TDatNextFilter(IDatNextSource<TRec>& source) - : IDatNextChannel<TRec, TRec>(source) - { - } - - virtual const TRec* Next() { - const TRec* rec; - while ((rec = IDatNextChannel<TRec, TRec>::Source.Next()) != 0 && !Check(rec)) { - } - if (!rec) - return 0; - return rec; - } - -protected: - virtual bool Check(const TRec* rec) { - return TPredicate<TRec>()(rec); - } -}; - -template <class TRec, template <typename T> class TPredicate> -class TDatPushFilter: public IDatPushChannel<TRec, TRec> { -public: - TDatPushFilter(IDatPushReceiver<TRec>& receiver) - : IDatPushChannel<TRec, TRec>(receiver) - { - } - - virtual void Push(const TRec* rec) { - if (Check(rec)) - IDatPushChannel<TRec, TRec>::Receiver.Push(rec); - } - -private: - virtual bool Check(const TRec* rec) { - return TPredicate<TRec>()(rec); - } -}; - -template <class TInRec, class TOutRec, template <typename T> class TCompare> -class TDatGrouper: public IDatNextChannel<TInRec, TOutRec> { -public: - TDatGrouper(IDatNextSource<TInRec>& source) - : IDatNextChannel<TInRec, TOutRec>(source) - , Begin(true) - , Finish(false) - , HasOutput(false) - { - } - - const TOutRec* Next() { - while (CurrentRec = IDatNextChannel<TInRec, TOutRec>::Source.Next()) { - int cmp = 0; - if (Begin) { - Begin = false; - OnStart(); - } else if ((cmp = TCompare<TInRec>()(CurrentRec, LastRec, 0)) != 0) { - OnFinish(); - OnStart(); - } - OnRecord(); - LastRec = CurrentRec; - if (HasOutput) { - HasOutput = false; - return &OutRec; - } - } - if (!Finish) - OnFinish(); - Finish = true; - if (HasOutput) { - HasOutput = false; - return &OutRec; - } - return 0; - } - -protected: - virtual void OnStart() = 0; - virtual void OnRecord() = 0; - virtual void OnFinish() = 0; - - const TInRec* CurrentRec; - const TInRec* LastRec; - TOutRec OutRec; - - bool Begin; - bool Finish; - bool HasOutput; -}; diff --git a/library/cpp/microbdb/ya.make b/library/cpp/microbdb/ya.make deleted file mode 100644 index 3e553f8535..0000000000 --- a/library/cpp/microbdb/ya.make +++ /dev/null @@ -1,36 +0,0 @@ -LIBRARY() - -SRCS( - align.h - compressed.h - extinfo.h - file.cpp - hashes.h - header.h - header.cpp - heap.h - input.h - microbdb.cpp - noextinfo.proto - output.h - powersorter.h - reader.h - safeopen.h - sorter.h - sorterdef.h - utility.h - wrappers.h -) - -PEERDIR( - contrib/libs/fastlz - contrib/libs/libc_compat - contrib/libs/protobuf - contrib/libs/snappy - contrib/libs/zlib - library/cpp/deprecated/fgood - library/cpp/on_disk/st_hash - library/cpp/packedtypes -) - -END() diff --git a/library/cpp/on_disk/CMakeLists.txt b/library/cpp/on_disk/CMakeLists.txt index ade3b33c9a..4202947169 100644 --- a/library/cpp/on_disk/CMakeLists.txt +++ b/library/cpp/on_disk/CMakeLists.txt @@ -7,4 +7,3 @@ add_subdirectory(chunks) -add_subdirectory(st_hash) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index ad332fef62..0000000000 --- a/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-on_disk-st_hash) -target_link_libraries(cpp-on_disk-st_hash PUBLIC - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-on_disk-st_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp -) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 737875ca6c..0000000000 --- a/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-on_disk-st_hash) -target_link_libraries(cpp-on_disk-st_hash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-on_disk-st_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp -) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 737875ca6c..0000000000 --- a/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-on_disk-st_hash) -target_link_libraries(cpp-on_disk-st_hash PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-on_disk-st_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp -) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.txt b/library/cpp/on_disk/st_hash/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/on_disk/st_hash/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt deleted file mode 100644 index ad332fef62..0000000000 --- a/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,18 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-on_disk-st_hash) -target_link_libraries(cpp-on_disk-st_hash PUBLIC - contrib-libs-cxxsupp - yutil - cpp-deprecated-mapped_file -) -target_sources(cpp-on_disk-st_hash PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp -) diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp deleted file mode 100644 index ef5af4d432..0000000000 --- a/library/cpp/on_disk/st_hash/fake.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "save_stl.h" -#include "static_hash.h" -#include "static_hash_map.h" -#include "sthash_iterators.h" diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h deleted file mode 100644 index 00f8f0e20d..0000000000 --- a/library/cpp/on_disk/st_hash/save_stl.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include <util/generic/hash.h> -#include <util/system/yassert.h> -#include <util/stream/output.h> - -// this structure might be replaced with sthashtable class -template <class HF, class Eq, class size_type> -struct sthashtable_nvm_sv { - sthashtable_nvm_sv() { - if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) { - memset(this, 0, sizeof(sthashtable_nvm_sv)); - } - } - - sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd) - : sthashtable_nvm_sv() - { - hf = phf; - eq = peq; - num_buckets = pnb; - num_elements = pne; - data_end_off = pnd; - } - - HF hf; - Eq eq; - size_type num_buckets; - size_type num_elements; - size_type data_end_off; -}; - -/** - * Some hack to save both THashMap and sthash. - * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method. - */ -template <class V, class K, class HF, class Ex, class Eq, class A> -template <class KeySaver> -inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const { - Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count()); - typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type; - sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0}; - // to do: m.b. use just the size of corresponding object? - typename KeySaver::TSizeType cur_off = sizeof(sv_type) + - (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType); - sv.data_end_off = cur_off; - const_iterator n; - for (n = begin(); n != end(); ++n) { - sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n)); - } - typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr; - if (stHash) - sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]); - //saver.Align(sizeof(char*)); - stream->Write(&sv, sizeof(sv)); - - size_type i; - //save vector - for (i = 0; i < buckets.size(); ++i) { - node* cur = buckets[i]; - stream->Write(&cur_off, sizeof(cur_off)); - if (cur) { - while (!((uintptr_t)cur & 1)) { - cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val)); - cur = cur->next; - } - } - if (stHash) - cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]); - } - stream->Write(&cur_off, sizeof(cur_off)); // end mark - for (i = 0; i < buckets.size(); ++i) { - node* cur = buckets[i]; - if (cur) { - while (!((uintptr_t)cur & 1)) { - ks.SaveRecord(stream, cur->val); - cur = cur->next; - } - } - if (stHash) - stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]); - } - return 0; -} diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h deleted file mode 100644 index ca7a6ccd36..0000000000 --- a/library/cpp/on_disk/st_hash/static_hash.h +++ /dev/null @@ -1,420 +0,0 @@ -#pragma once - -#include "save_stl.h" -#include "sthash_iterators.h" - -#include <util/generic/hash.h> -#include <util/generic/vector.h> -#include <util/generic/buffer.h> -#include <util/generic/cast.h> -#include <util/generic/yexception.h> // for save/load only -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <utility> - -#include <memory> -#include <algorithm> -#include <functional> - -#include <cstdlib> -#include <cstddef> - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible' -#endif - -template <class HashType, class KeySaver> -inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) { - KeySaver ks; - if (hash.save_for_st(stream, ks)) - ythrow yexception() << "Could not save hash to stream"; -} - -template <class HashType> -inline void SaveHashToStream(HashType& hash, IOutputStream* stream) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - return SaveHashToStreamEx<HashType, KeySaver>(hash, stream); -} - -template <class HashType, class KeySaver> -inline void SaveHashToFileEx(HashType& hash, const char* fileName) { - TFileOutput output(fileName); - SaveHashToStreamEx<HashType, KeySaver>(hash, &output); -} - -template <class HashType> -inline void SaveHashToFile(HashType& hash, const char* fileName) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashSetToFile(HashType& hash, const char* fileName) { - typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashToFile32(HashType& hash, const char* fileName) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType, class KeySaver> -inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) { - TBufferOutput stream(buffer); - KeySaver ks; - if (hash.save_for_st(&stream, ks, stHash)) - ythrow yexception() << "Could not save hash to memory"; -} - -template <class HashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -/** - * Some hack to save both THashMap and sthash. - * THashMap and sthash must have same bucket_count(). - */ -template <class HashType, class StHashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH; - - SH sh = reinterpret_cast<SH>(stHash); - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh); -} - -template <class HashType> -inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -template <class Iter, typename size_type_f = ui64> -class sthashtable { -public: - typedef typename Iter::TKeyType key_type; - typedef typename Iter::TValueType value_type; - typedef typename Iter::THasherType hasher; - typedef typename Iter::TKeyEqualType key_equal; - - typedef size_type_f size_type; - typedef ptrdiff_t difference_type; - typedef const value_type* const_pointer; - typedef const value_type& const_reference; - - typedef Iter const_iterator; - - const hasher hash_funct() const { - return hash; - } - const key_equal key_eq() const { - return equals; - } - -private: - const hasher hash; - const key_equal equals; - -private: - const_iterator iter_at_bucket(size_type bucket) const { - return (const_iterator)(((char*)this + buckets()[bucket])); - } - - const_iterator iter_at_bucket_or_end(size_type bucket) const { - if (bucket < num_buckets) - return (const_iterator)(((char*)this + buckets()[bucket])); - else - return end(); - } - - const size_type num_buckets; - const size_type num_elements; - const size_type data_end_off; - -protected: //shut up gcc warning - // we can't construct/destroy this object at all! - sthashtable(); - sthashtable(const sthashtable& ht); - ~sthashtable(); - -public: - // const size_type *buckets; - const size_type* buckets() const { - return (size_type*)((char*)this + sizeof(*this)); - } - const size_type buckets(size_type n) const { - return buckets()[n]; - } - - size_type size() const { - return num_elements; - } - size_type max_size() const { - return size_type(-1); - } - bool empty() const { - return size() == 0; - } - - const_iterator begin() const { - return num_buckets ? iter_at_bucket(0) : end(); - } - - const_iterator end() const { - return (const_iterator)(((char*)this + data_end_off)); - } - -public: - size_type size_in_bytes() const { - return data_end_off; - } - - size_type bucket_count() const { - return num_buckets; - } - - size_type elems_in_bucket(size_type bucket) const { - size_type result = 0; - const_iterator first = iter_at_bucket(bucket); - const_iterator last = iter_at_bucket_or_end(bucket + 1); - - for (; first != last; ++first) - ++result; - return result; - } - - template <class TheKey> - const_iterator find(const TheKey& key) const { - size_type n = bkt_num_key(key); - const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1)); - for (; - first != last && !first.KeyEquals(equals, key); - ++first) { - } - if (first != last) - return first; - return end(); - } - - size_type count(const key_type& key) const { - const size_type n = bkt_num_key(key); - size_type result = 0; - const_iterator first = iter_at_bucket(n); - const_iterator last = iter_at_bucket_or_end(n + 1); - - for (; first != last; ++first) - if (first.KeyEquals(equals, key)) - ++result; - return result; - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const; - -private: - template <class TheKey> - size_type bkt_num_key(const TheKey& key) const { - return hash(key) % num_buckets; - } -}; - -template <class I, class size_type_f> -std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const { - typedef std::pair<const_iterator, const_iterator> pii; - const size_type n = bkt_num_key(key); - const_iterator first = iter_at_bucket(n); - const_iterator last = iter_at_bucket_or_end(n + 1); - - for (; first != last; ++first) { - if (first.KeyEquals(equals, key)) { - const_iterator cur = first; - ++cur; - for (; cur != last; ++cur) - if (!cur.KeyEquals(equals, key)) - return pii(const_iterator(first), - const_iterator(cur)); - return pii(const_iterator(first), - const_iterator(last)); - } - } - return pii(end(), end()); -} - -/* end __SGI_STL_HASHTABLE_H */ - -template <class Key, class T, class HashFcn /*= hash<Key>*/, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash { -private: - typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht; - ht rep; - -public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef T mapped_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator const_iterator; - - const hasher hash_funct() const { - return rep.hash_funct(); - } - const key_equal key_eq() const { - return rep.key_eq(); - } - -public: - size_type size() const { - return rep.size(); - } - size_type max_size() const { - return rep.max_size(); - } - bool empty() const { - return rep.empty(); - } - - const_iterator begin() const { - return rep.begin(); - } - const_iterator end() const { - return rep.end(); - } - -public: - template <class TheKey> - const_iterator find(const TheKey& key) const { - return rep.find(key); - } - template <class TheKey> - bool has(const TheKey& key) const { - return rep.find(key) != rep.end(); - } - - size_type count(const key_type& key) const { - return rep.count(key); - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - size_type size_in_bytes() const { - return rep.size_in_bytes(); - } - - size_type bucket_count() const { - return rep.bucket_count(); - } - size_type max_bucket_count() const { - return rep.max_bucket_count(); - } - size_type elems_in_bucket(size_type n) const { - return rep.elems_in_bucket(n); - } - - const size_type* buckets() const { - return rep.buckets(); - } - const size_type buckets(size_type n) const { - return rep.buckets()[n]; - } -}; - -template <class Key, class HashFcn, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> { - typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base; - -public: - using Base::const_iterator; - using Base::hasher; - using Base::key_equal; - using Base::key_type; - using Base::size_type; - using Base::value_type; -}; - -template <class Key, class T, class HashFcn /*= hash<Key>*/, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_mm { -private: - typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht; - ht rep; - -public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef T mapped_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator const_iterator; - - const hasher hash_funct() const { - return rep.hash_funct(); - } - const key_equal key_eq() const { - return rep.key_eq(); - } - -public: - size_type size() const { - return rep.size(); - } - size_type max_size() const { - return rep.max_size(); - } - bool empty() const { - return rep.empty(); - } - - const_iterator begin() const { - return rep.begin(); - } - const_iterator end() const { - return rep.end(); - } - - const_iterator find(const key_type& key) const { - return rep.find(key); - } - - size_type count(const key_type& key) const { - return rep.count(key); - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - size_type bucket_count() const { - return rep.bucket_count(); - } - size_type max_bucket_count() const { - return rep.max_bucket_count(); - } - size_type elems_in_bucket(size_type n) const { - return rep.elems_in_bucket(n); - } -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h deleted file mode 100644 index 5dc50abd39..0000000000 --- a/library/cpp/on_disk/st_hash/static_hash_map.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#include "static_hash.h" - -#include <library/cpp/deprecated/mapped_file/mapped_file.h> - -#include <util/system/filemap.h> - -template <class SH> -struct sthash_mapped_c { - typedef SH H; - typedef typename H::const_iterator const_iterator; - TMappedFile M; - H* hsh; - sthash_mapped_c() - : M() - , hsh(nullptr) - { - } - sthash_mapped_c(const char* fname, bool precharge) - : M() - , hsh(nullptr) - { - Open(fname, precharge); - } - void Open(const char* fname, bool precharge) { - M.init(fname); - if (precharge) - M.precharge(); - hsh = (H*)M.getData(); - if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh) - ythrow yexception() << "Could not map hash: " << fname << " is damaged"; - } - H* operator->() { - return hsh; - } - const H* operator->() const { - return hsh; - } - H* GetSthash() { - return hsh; - } - const H* GetSthash() const { - return hsh; - } -}; - -template <class Key, class T, class Hash> -struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> { - typedef sthash<Key, T, Hash> H; - sthash_mapped(const char* fname, bool precharge) - : sthash_mapped_c<H>(fname, precharge) - { - } - sthash_mapped() - : sthash_mapped_c<H>() - { - } -}; diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h deleted file mode 100644 index 6a9ebdd6c3..0000000000 --- a/library/cpp/on_disk/st_hash/sthash_iterators.h +++ /dev/null @@ -1,334 +0,0 @@ -#pragma once - -#include "save_stl.h" - -#include <util/system/align.h> - -/** - This file provides functionality for saving some relatively simple THashMap object - to disk in a form that can be mapped read-only (via mmap) at any address. - That saved object is accessed via pointer to sthash object (that must have - the same parameters as original THashMap object) - - If either key or value are variable-sized (i.e. contain pointers), user must - write his own instantiation of TSthashIterator (read iterator for sthash) and - TSthashWriter (write iterator for THashMap). - An example for <const char *, B> pair is in here. -**/ - -// TEmptyValue and SizeOfEx are helpers for sthash_set -struct TEmptyValue { - TEmptyValue() = default; -}; - -template <class T> -inline size_t SizeOfEx() { - return sizeof(T); -} - -template <> -inline size_t SizeOfEx<TEmptyValue>() { - return 0; -} -template <> -inline size_t SizeOfEx<const TEmptyValue>() { - return 0; -} - -template <class TKey, class TValue, class HashFcn, class EqualKey> -struct TSthashIterator { - // Implementation for simple types - typedef const TKey TKeyType; - typedef const TValue TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - explicit TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - TKey& Key() const { - return *(TKey*)Data; - } - TValue& Value() { - return *(TValue*)(Data + sizeof(TKey)); - } - const TValue& Value() const { - return *(const TValue*)(Data + sizeof(TKey)); - } - - template <class AnotherKeyType> - bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const { - return eq(*(TKey*)Data, key); - } - - size_t GetLength() const { - return sizeof(TKey) + SizeOfEx<TValue>(); - } -}; - -template <class Key, class Value, typename size_type_o = ui64> -struct TSthashWriter { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const Key, const Value>&) const { - return sizeof(Key) + SizeOfEx<Value>(); - } - int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const { - stream->Write(&record.first, sizeof(Key)); - stream->Write(&record.second, SizeOfEx<Value>()); - return 0; - } -}; - -// Remember that this simplified implementation makes a copy of `key' in std::make_pair. -// It can also waste some memory on undesired alignment. -template <class Key, typename size_type_o = ui64> -struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> { - typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter; - size_t GetRecordSize(const Key& key) const { - return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue())); - } - int SaveRecord(IOutputStream* stream, const Key& key) const { - return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue())); - } -}; - -// we can't save something with pointers without additional tricks - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A*, B, HashFcn, EqualKey> {}; - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A, B*, HashFcn, EqualKey> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B*, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A, B*, size_type_o> {}; - -template <class T> -inline size_t AlignForChrKey() { - return 4; // TODO: change this (requeres rebuilt of a few existing files) -} - -template <> -inline size_t AlignForChrKey<TEmptyValue>() { - return 1; -} - -template <> -inline size_t AlignForChrKey<const TEmptyValue>() { - return AlignForChrKey<TEmptyValue>(); -} - -// !! note that for char*, physical placement of key and value is swapped -template <class TValue, class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> { - typedef const TValue TValueType; - typedef const char* TKeyType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - const char* Key() const { - return Data + SizeOfEx<TValue>(); - } - TValue& Value() { - return *(TValue*)Data; - } - const TValue& Value() const { - return *(const TValue*)Data; - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(Data + SizeOfEx<TValue>(), k); - } - - size_t GetLength() const { - size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>(); - length = AlignUp(length, AlignForChrKey<TValue>()); - return length; - } -}; - -template <class Value, typename size_type_o> -struct TSthashWriter<const char*, Value, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const char*, const Value>& record) const { - size_t length = strlen(record.first) + 1 + SizeOfEx<Value>(); - length = AlignUp(length, AlignForChrKey<Value>()); - return length; - } - int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const { - const char* alignBuffer = "qqqq"; - stream->Write(&record.second, SizeOfEx<Value>()); - size_t length = strlen(record.first) + 1; - stream->Write(record.first, length); - length = AlignUpSpace(length, AlignForChrKey<Value>()); - if (length) - stream->Write(alignBuffer, length); - return 0; - } -}; - -template <class TKey, class HashFcn, class EqualKey> -struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> { - typedef const TKey TKeyType; - typedef const char* TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - TKey& Key() { - return *(TKey*)Data; - } - const char* Value() const { - return Data + sizeof(TKey); - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(*(TKey*)Data, k); - } - - size_t GetLength() const { - size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey); - length = AlignUp(length, (size_t)4); - return length; - } -}; - -template <class Key, typename size_type_o> -struct TSthashWriter<Key, const char*, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const Key, const char*>& record) const { - size_t length = strlen(record.second) + 1 + sizeof(Key); - length = AlignUp(length, (size_t)4); - return length; - } - int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const { - const char* alignBuffer = "qqqq"; - stream->Write(&record.first, sizeof(Key)); - size_t length = strlen(record.second) + 1; - stream->Write(record.second, length); - length = AlignUpSpace(length, (size_t)4); - if (length) - stream->Write(alignBuffer, length); - return 0; - } -}; - -template <class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> { - typedef const char* TKeyType; - typedef const char* TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - const char* Key() const { - return Data; - } - const char* Value() const { - return Data + strlen(Data) + 1; - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(Data, k); - } - - size_t GetLength() const { - size_t length = strlen(Data) + 1; - length += strlen(Data + length) + 1; - return length; - } -}; - -template <typename size_type_o> -struct TSthashWriter<const char*, const char*, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const char*, const char*>& record) const { - size_t size = strlen(record.first) + strlen(record.second) + 2; - return size; - } - int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const { - stream->Write(record.first, strlen(record.first) + 1); - stream->Write(record.second, strlen(record.second) + 1); - return 0; - } -}; diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make deleted file mode 100644 index 8c6d05711c..0000000000 --- a/library/cpp/on_disk/st_hash/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - fake.cpp - save_stl.h - static_hash.h - static_hash_map.h - sthash_iterators.h -) - -PEERDIR( - library/cpp/deprecated/mapped_file -) - -END() diff --git a/library/cpp/regex/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/CMakeLists.darwin-x86_64.txt index 877d40538b..6e2a4fabcd 100644 --- a/library/cpp/regex/CMakeLists.darwin-x86_64.txt +++ b/library/cpp/regex/CMakeLists.darwin-x86_64.txt @@ -6,7 +6,6 @@ # original buildsystem will not be accepted. -add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-aarch64.txt b/library/cpp/regex/CMakeLists.linux-aarch64.txt index 84c257a819..279390306b 100644 --- a/library/cpp/regex/CMakeLists.linux-aarch64.txt +++ b/library/cpp/regex/CMakeLists.linux-aarch64.txt @@ -6,6 +6,5 @@ # original buildsystem will not be accepted. -add_subdirectory(glob) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-x86_64.txt b/library/cpp/regex/CMakeLists.linux-x86_64.txt index 877d40538b..6e2a4fabcd 100644 --- a/library/cpp/regex/CMakeLists.linux-x86_64.txt +++ b/library/cpp/regex/CMakeLists.linux-x86_64.txt @@ -6,7 +6,6 @@ # original buildsystem will not be accepted. -add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.windows-x86_64.txt b/library/cpp/regex/CMakeLists.windows-x86_64.txt index 877d40538b..6e2a4fabcd 100644 --- a/library/cpp/regex/CMakeLists.windows-x86_64.txt +++ b/library/cpp/regex/CMakeLists.windows-x86_64.txt @@ -6,7 +6,6 @@ # original buildsystem will not be accepted. -add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index ca8383e355..0000000000 --- a/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-regex-glob) -target_link_libraries(cpp-regex-glob PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset -) -target_sources(cpp-regex-glob PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp -) diff --git a/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 3953937c6d..0000000000 --- a/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-regex-glob) -target_link_libraries(cpp-regex-glob PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset -) -target_sources(cpp-regex-glob PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp -) diff --git a/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 3953937c6d..0000000000 --- a/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-regex-glob) -target_link_libraries(cpp-regex-glob PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset -) -target_sources(cpp-regex-glob PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp -) diff --git a/library/cpp/regex/glob/CMakeLists.txt b/library/cpp/regex/glob/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/regex/glob/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt deleted file mode 100644 index ca8383e355..0000000000 --- a/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-regex-glob) -target_link_libraries(cpp-regex-glob PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset -) -target_sources(cpp-regex-glob PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp -) diff --git a/library/cpp/regex/glob/glob.cpp b/library/cpp/regex/glob/glob.cpp deleted file mode 100644 index 9da058122a..0000000000 --- a/library/cpp/regex/glob/glob.cpp +++ /dev/null @@ -1,921 +0,0 @@ -#define FROM_IMPLEMENTATION -#include "glob_compat.h" - -#if defined(USE_INTERNAL_GLOB) -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Guido van Rossum. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <library/cpp/charset/ci_string.h> -#include <util/system/compat.h> -#include <util/folder/dirut.h> - -/* - * glob(3) -- a superset of the one defined in POSIX 1003.2. - * - * The [!...] convention to negate a range is supported (SysV, Posix, ksh). - * - * Optional extra services, controlled by flags not defined by POSIX: - * - * GLOB_QUOTE: - * Escaping convention: \ inhibits any special meaning the following - * character might have (except \ at end of string is retained). - * GLOB_MAGCHAR: - * Set in gl_flags if pattern contained a globbing character. - * GLOB_NOMAGIC: - * Same as GLOB_NOCHECK, but it will only append pattern if it did - * not contain any magic characters. [Used in csh style globbing] - * GLOB_ALTDIRFUNC: - * Use alternately specified directory access functions. - * GLOB_TILDE: - * expand ~user/foo to the /home/dir/of/user/foo - * GLOB_BRACE: - * expand {1,2}{a,b} to 1a 1b 2a 2b - * gl_matchc: - * Number of matches in the current invocation of glob. - */ - -/* - * Some notes on multibyte character support: - * 1. Patterns with illegal byte sequences match nothing - even if - * GLOB_NOCHECK is specified. - * 2. Illegal byte sequences in filenames are handled by treating them as - * single-byte characters with a value of the first byte of the sequence - * cast to wchar_t. - * 3. State-dependent encodings are not currently supported. - */ - -//#include <sys/param.h> -#include <sys/stat.h> - -#include <ctype.h> -//#include <dirent.h> -#include <errno.h> -#include <limits.h> -//#include <pwd.h> -//#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#if defined(_unix_) -#include <unistd.h> -#endif -#include <wchar.h> - -#if !defined(_unix_) -// silly replacement for compilation -using uint_fast64_t = ui64; -using u_int = unsigned int; -using u_char = unsigned char; -#define ARG_MAX 256 -#define S_ISDIR(x) ((x) & _S_IFDIR) -#define S_ISLNK(x) 0 -#define lstat stat -inline bool issetugid() { return false; } -inline char *getlogin() { return 0; } -inline int getuid() { return 0; } -struct passwd { - char *pw_dir; -}; -inline passwd *getpwuid(int) { return 0; } -inline passwd *getpwnam(char *) { return 0; } -#endif - -#define __collate_load_error 1 -inline int __collate_range_cmp(int, int) { return 0; } -#undef COMMA // was defined in stroka.h -// end silly replacement - -//#include "collate.h" - -#define DOLLAR '$' -#define DOT '.' -#define EOS '\0' -#define LBRACKET '[' -#define NOT '!' -#define QUESTION '?' -#define QUOTE '\\' -#define RANGE '-' -#define RBRACKET ']' -#define SEP '/' -#define STAR '*' -#define TILDE '~' -#define UNDERSCORE '_' -#define LBRACE '{' -#define RBRACE '}' -#define SLASH '/' -#define COMMA ',' - -#ifndef DEBUG - -#define M_QUOTE 0x8000000000ULL -#define M_PROTECT 0x4000000000ULL -#define M_MASK 0xffffffffffULL -#define M_CHAR 0x00ffffffffULL - -using Char = uint_fast64_t; - -#else - -#define M_QUOTE 0x80 -#define M_PROTECT 0x40 -#define M_MASK 0xff -#define M_CHAR 0x7f - -using Char = char; - -#endif - - -#define CHAR(c) ((Char)((c)&M_CHAR)) -#define META(c) ((Char)((c)|M_QUOTE)) -#define M_ALL META('*') -#define M_END META(']') -#define M_NOT META('!') -#define M_ONE META('?') -#define M_RNG META('-') -#define M_SET META('[') -#define ismeta(c) (((c)&M_QUOTE) != 0) - - -static int compare(const void *, const void *); -static int g_Ctoc(const Char *, char *, u_int); -static int g_lstat(Char *, struct stat *, glob_t *); -static DIR *g_opendir(Char *, glob_t *); -static Char *g_strchr(Char *, wchar_t); -#ifdef notdef -static Char *g_strcat(Char *, const Char *); -#endif -static int glob0(const Char *, glob_t *, int *); -static int glob1(Char *, glob_t *, int *); -static int glob2(Char *, Char *, Char *, Char *, glob_t *, int *); -static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, int *); -static int globextend(const Char *, glob_t *, int *); -static const Char * - globtilde(const Char *, Char *, size_t, glob_t *); -static int globexp1(const Char *, glob_t *, int *); -static int globexp2(const Char *, const Char *, glob_t *, int *, int *); -static int match(Char *, Char *, Char *); -#ifdef DEBUG -static void qprintf(const char *, Char *); -#endif - -int -glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob) -{ - const u_char *patnext; - int limit; - Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; - mbstate_t mbs; - wchar_t wc; - size_t clen; - - patnext = (u_char *) pattern; - if (!(flags & GLOB_APPEND)) { - pglob->gl_pathc = 0; - pglob->gl_pathv = NULL; - if (!(flags & GLOB_DOOFFS)) - pglob->gl_offs = 0; - } - if (flags & GLOB_LIMIT) { - limit = pglob->gl_matchc; - if (limit == 0) - limit = ARG_MAX; - } else - limit = 0; - pglob->gl_flags = flags & ~GLOB_MAGCHAR; - pglob->gl_errfunc = errfunc; - pglob->gl_matchc = 0; - - bufnext = patbuf; - bufend = bufnext + MAXPATHLEN - 1; - if (flags & GLOB_NOESCAPE) { - memset(&mbs, 0, sizeof(mbs)); - while (bufend - bufnext >= MB_CUR_MAX) { - clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); - if (clen == (size_t)-1 || clen == (size_t)-2) - return (GLOB_NOMATCH); - else if (clen == 0) - break; - *bufnext++ = wc; - patnext += clen; - } - } else { - /* Protect the quoted characters. */ - memset(&mbs, 0, sizeof(mbs)); - while (bufend - bufnext >= MB_CUR_MAX) { - if (*patnext == QUOTE) { - if (*++patnext == EOS) { - *bufnext++ = QUOTE | M_PROTECT; - continue; - } - prot = M_PROTECT; - } else - prot = 0; - clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); - if (clen == (size_t)-1 || clen == (size_t)-2) - return (GLOB_NOMATCH); - else if (clen == 0) - break; - *bufnext++ = wc | prot; - patnext += clen; - } - } - *bufnext = EOS; - - if (flags & GLOB_BRACE) - return globexp1(patbuf, pglob, &limit); - else - return glob0(patbuf, pglob, &limit); -} - -/* - * Expand recursively a glob {} pattern. When there is no more expansion - * invoke the standard globbing routine to glob the rest of the magic - * characters - */ -static int -globexp1(const Char *pattern, glob_t *pglob, int *limit) -{ - const Char* ptr = pattern; - int rv; - - /* Protect a single {}, for find(1), like csh */ - if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) - return glob0(pattern, pglob, limit); - - while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL) - if (!globexp2(ptr, pattern, pglob, &rv, limit)) - return rv; - - return glob0(pattern, pglob, limit); -} - - -/* - * Recursive brace globbing helper. Tries to expand a single brace. - * If it succeeds then it invokes globexp1 with the new pattern. - * If it fails then it tries to glob the rest of the pattern and returns. - */ -static int -globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, int *limit) -{ - int i; - Char *lm, *ls; - const Char *pe, *pm, *pm1, *pl; - Char patbuf[MAXPATHLEN]; - - /* copy part up to the brace */ - for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) - continue; - *lm = EOS; - ls = lm; - - /* Find the balanced brace */ - for (i = 0, pe = ++ptr; *pe; pe++) - if (*pe == LBRACKET) { - /* Ignore everything between [] */ - for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) - continue; - if (*pe == EOS) { - /* - * We could not find a matching RBRACKET. - * Ignore and just look for RBRACE - */ - pe = pm; - } - } - else if (*pe == LBRACE) - i++; - else if (*pe == RBRACE) { - if (i == 0) - break; - i--; - } - - /* Non matching braces; just glob the pattern */ - if (i != 0 || *pe == EOS) { - *rv = glob0(patbuf, pglob, limit); - return 0; - } - - for (i = 0, pl = pm = ptr; pm <= pe; pm++) - switch (*pm) { - case LBRACKET: - /* Ignore everything between [] */ - for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) - continue; - if (*pm == EOS) { - /* - * We could not find a matching RBRACKET. - * Ignore and just look for RBRACE - */ - pm = pm1; - } - break; - - case LBRACE: - i++; - break; - - case RBRACE: - if (i) { - i--; - break; - } - [[fallthrough]]; - case COMMA: - if (i && *pm == COMMA) - break; - else { - /* Append the current string */ - for (lm = ls; (pl < pm); *lm++ = *pl++) - continue; - /* - * Append the rest of the pattern after the - * closing brace - */ - for (pl = pe + 1; (*lm++ = *pl++) != EOS;) - continue; - - /* Expand the current pattern */ -#ifdef DEBUG - qprintf("globexp2:", patbuf); -#endif - *rv = globexp1(patbuf, pglob, limit); - - /* move after the comma, to the next string */ - pl = pm + 1; - } - break; - - default: - break; - } - *rv = 0; - return 0; -} - - - -/* - * expand tilde from the passwd file. - */ -static const Char * -globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) -{ - struct passwd *pwd; - char *h; - const Char *p; - Char *b, *eb; - - if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) - return pattern; - - /* - * Copy up to the end of the string or / - */ - eb = &patbuf[patbuf_len - 1]; - for (p = pattern + 1, h = (char *) patbuf; - h < (char *)eb && *p && *p != SLASH; *h++ = (char)*p++) - continue; - - *h = EOS; - - if (((char *) patbuf)[0] == EOS) { - /* - * handle a plain ~ or ~/ by expanding $HOME first (iff - * we're not running setuid or setgid) and then trying - * the password file - */ - if (issetugid() != 0 || - (h = ::getenv("HOME")) == NULL) { - if (((h = getlogin()) != NULL && - (pwd = getpwnam(h)) != NULL) || - (pwd = getpwuid(getuid())) != NULL) - h = pwd->pw_dir; - else - return pattern; - } - } - else { - /* - * Expand a ~user - */ - if ((pwd = getpwnam((char*) patbuf)) == NULL) - return pattern; - else - h = pwd->pw_dir; - } - - /* Copy the home directory */ - for (b = patbuf; b < eb && *h; *b++ = *h++) - continue; - - /* Append the rest of the pattern */ - while (b < eb && (*b++ = *p++) != EOS) - continue; - *b = EOS; - - return patbuf; -} - - -/* - * The main glob() routine: compiles the pattern (optionally processing - * quotes), calls glob1() to do the real pattern matching, and finally - * sorts the list (unless unsorted operation is requested). Returns 0 - * if things went well, nonzero if errors occurred. - */ -static int -glob0(const Char *pattern, glob_t *pglob, int *limit) -{ - const Char *qpatnext; - int c, err, oldpathc; - Char *bufnext, patbuf[MAXPATHLEN]; - - qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); - oldpathc = pglob->gl_pathc; - bufnext = patbuf; - - /* We don't need to check for buffer overflow any more. */ - while ((c = (char)*qpatnext++) != EOS) { - switch (c) { - case LBRACKET: - c = (char)*qpatnext; - if (c == NOT) - ++qpatnext; - if (*qpatnext == EOS || - g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) { - *bufnext++ = LBRACKET; - if (c == NOT) - --qpatnext; - break; - } - *bufnext++ = M_SET; - if (c == NOT) - *bufnext++ = M_NOT; - c = (char)*qpatnext++; - do { - *bufnext++ = CHAR(c); - if (*qpatnext == RANGE && - (c = (char)qpatnext[1]) != RBRACKET) { - *bufnext++ = M_RNG; - *bufnext++ = CHAR(c); - qpatnext += 2; - } - } while ((c = (char)*qpatnext++) != RBRACKET); - pglob->gl_flags |= GLOB_MAGCHAR; - *bufnext++ = M_END; - break; - case QUESTION: - pglob->gl_flags |= GLOB_MAGCHAR; - *bufnext++ = M_ONE; - break; - case STAR: - pglob->gl_flags |= GLOB_MAGCHAR; - /* collapse adjacent stars to one, - * to avoid exponential behavior - */ - if (bufnext == patbuf || bufnext[-1] != M_ALL) - *bufnext++ = M_ALL; - break; - default: - *bufnext++ = CHAR(c); - break; - } - } - *bufnext = EOS; -#ifdef DEBUG - qprintf("glob0:", patbuf); -#endif - - if ((err = glob1(patbuf, pglob, limit)) != 0) - return(err); - - /* - * If there was no match we are going to append the pattern - * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified - * and the pattern did not contain any magic characters - * GLOB_NOMAGIC is there just for compatibility with csh. - */ - if (pglob->gl_pathc == oldpathc) { - if (((pglob->gl_flags & GLOB_NOCHECK) || - ((pglob->gl_flags & GLOB_NOMAGIC) && - !(pglob->gl_flags & GLOB_MAGCHAR)))) - return(globextend(pattern, pglob, limit)); - else - return(GLOB_NOMATCH); - } - if (!(pglob->gl_flags & GLOB_NOSORT)) - qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, - pglob->gl_pathc - oldpathc, sizeof(char *), compare); - return(0); -} - -static int -compare(const void *p, const void *q) -{ - return(strcmp(*(char **)p, *(char **)q)); -} - -static int -glob1(Char *pattern, glob_t *pglob, int *limit) -{ - Char pathbuf[MAXPATHLEN]; - - /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ - if (*pattern == EOS) - return(0); - return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1, - pattern, pglob, limit)); -} - -/* - * The functions glob2 and glob3 are mutually recursive; there is one level - * of recursion for each segment in the pattern that contains one or more - * meta characters. - */ -static int -glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, glob_t *pglob, int *limit) -{ - struct stat sb; - Char *p, *q; - int anymeta; - - /* - * Loop over pattern segments until end of pattern or until - * segment with meta character found. - */ - for (anymeta = 0;;) { - if (*pattern == EOS) { /* End of pattern? */ - *pathend = EOS; - if (g_lstat(pathbuf, &sb, pglob)) - return(0); - - if (((pglob->gl_flags & GLOB_MARK) && - pathend[-1] != SEP) && (S_ISDIR(sb.st_mode))) { - if (pathend + 1 > pathend_last) - return (GLOB_ABORTED); - *pathend++ = SEP; - *pathend = EOS; - } - ++pglob->gl_matchc; - return(globextend(pathbuf, pglob, limit)); - } - - /* Find end of next segment, copy tentatively to pathend. */ - q = pathend; - p = pattern; - while (*p != EOS && *p != SEP) { - if (ismeta(*p)) - anymeta = 1; - if (q + 1 > pathend_last) - return (GLOB_ABORTED); - *q++ = *p++; - } - - if (!anymeta) { /* No expansion, do next segment. */ - pathend = q; - pattern = p; - while (*pattern == SEP) { - if (pathend + 1 > pathend_last) - return (GLOB_ABORTED); - *pathend++ = *pattern++; - } - } else /* Need expansion, recurse. */ - return(glob3(pathbuf, pathend, pathend_last, pattern, p, - pglob, limit)); - } - /* NOTREACHED */ -} - -static int -glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, int *limit) -{ - struct dirent *dp; - DIR *dirp; - int err; - char buf[MAXPATHLEN]; - - /* - * The readdirfunc declaration can't be prototyped, because it is - * assigned, below, to two functions which are prototyped in glob.h - * and dirent.h as taking pointers to differently typed opaque - * structures. - */ - typedef struct dirent *(*readdirfunc_t)(void*); - readdirfunc_t readdirfunc; - - if (pathend > pathend_last) - return (GLOB_ABORTED); - *pathend = EOS; - errno = 0; - - if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { - /* TODO: don't call for ENOENT or ENOTDIR? */ - if (pglob->gl_errfunc) { - if (g_Ctoc(pathbuf, buf, sizeof(buf))) - return (GLOB_ABORTED); - if (pglob->gl_errfunc(buf, errno) || - pglob->gl_flags & GLOB_ERR) - return (GLOB_ABORTED); - } - return(0); - } - - err = 0; - - /* Search directory for matching names. */ - if (pglob->gl_flags & GLOB_ALTDIRFUNC) - readdirfunc = pglob->gl_readdir; - else - readdirfunc = (readdirfunc_t)readdir; - while ((dp = (*readdirfunc)(dirp))) { - u_char *sc; - Char *dc; - wchar_t wc; - size_t clen; - mbstate_t mbs; - - /* Initial DOT must be matched literally. */ - if (dp->d_name[0] == DOT && *pattern != DOT) - continue; - memset(&mbs, 0, sizeof(mbs)); - dc = pathend; - sc = (u_char *) dp->d_name; - while (dc < pathend_last) { - clen = mbrtowc(&wc, (const char*)sc, MB_LEN_MAX, &mbs); - if (clen == (size_t)-1 || clen == (size_t)-2) { - wc = *sc; - clen = 1; - memset(&mbs, 0, sizeof(mbs)); - } - if ((*dc++ = wc) == EOS) - break; - sc += clen; - } - if (!match(pathend, pattern, restpattern)) { - *pathend = EOS; - continue; - } - err = glob2(pathbuf, --dc, pathend_last, restpattern, - pglob, limit); - if (err) - break; - } - - if (pglob->gl_flags & GLOB_ALTDIRFUNC) - (*pglob->gl_closedir)(dirp); - else - closedir(dirp); - return(err); -} - - -/* - * Extend the gl_pathv member of a glob_t structure to accomodate a new item, - * add the new item, and update gl_pathc. - * - * This assumes the BSD realloc, which only copies the block when its size - * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic - * behavior. - * - * Return 0 if new item added, error code if memory couldn't be allocated. - * - * Invariant of the glob_t structure: - * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and - * gl_pathv points to (gl_offs + gl_pathc + 1) items. - */ -static int -globextend(const Char *path, glob_t *pglob, int *limit) -{ - char **pathv; - int i; - size_t newsize, len; - char *copy; - const Char *p; - - if (*limit && pglob->gl_pathc > *limit) { - errno = 0; - return (GLOB_NOSPACE); - } - - newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); - pathv = pglob->gl_pathv ? - (char**)realloc((char *)pglob->gl_pathv, newsize) : - (char**)malloc(newsize); - if (pathv == NULL) { - if (pglob->gl_pathv) { - free(pglob->gl_pathv); - pglob->gl_pathv = NULL; - } - return(GLOB_NOSPACE); - } - - if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { - /* first time around -- clear initial gl_offs items */ - pathv += pglob->gl_offs; - for (i = pglob->gl_offs; --i >= 0; ) - *--pathv = NULL; - } - pglob->gl_pathv = pathv; - - for (p = path; *p++;) - continue; - len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */ - if ((copy = (char*)malloc(len)) != NULL) { - if (g_Ctoc(path, copy, (u_int)len)) { - free(copy); - return (GLOB_NOSPACE); - } - pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; - } - pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; - return(copy == NULL ? GLOB_NOSPACE : 0); -} - -/* - * pattern matching function for filenames. Each occurrence of the * - * pattern causes a recursion level. - */ -static int -match(Char *name, Char *pat, Char *patend) -{ - int ok, negate_range; - Char c, k; - - while (pat < patend) { - c = *pat++; - switch (c & M_MASK) { - case M_ALL: - if (pat == patend) - return(1); - do - if (match(name, pat, patend)) - return(1); - while (*name++ != EOS); - return(0); - case M_ONE: - if (*name++ == EOS) - return(0); - break; - case M_SET: - ok = 0; - if ((k = *name++) == EOS) - return(0); - if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) - ++pat; - while (((c = *pat++) & M_MASK) != M_END) - if ((*pat & M_MASK) == M_RNG) { - if (__collate_load_error ? - CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : - __collate_range_cmp((int)CHAR(c), (int)CHAR(k)) <= 0 - && __collate_range_cmp((int)CHAR(k), (int)CHAR(pat[1])) <= 0 - ) - ok = 1; - pat += 2; - } else if (c == k) - ok = 1; - if (ok == negate_range) - return(0); - break; - default: - if (*name++ != c) - return(0); - break; - } - } - return(*name == EOS); -} - -/* Free allocated data belonging to a glob_t structure. */ -void -globfree(glob_t *pglob) -{ - int i; - char **pp; - - if (pglob->gl_pathv != NULL) { - pp = pglob->gl_pathv + pglob->gl_offs; - for (i = pglob->gl_pathc; i--; ++pp) - if (*pp) - free(*pp); - free(pglob->gl_pathv); - pglob->gl_pathv = NULL; - } -} - -static DIR * -g_opendir(Char *str, glob_t *pglob) -{ - char buf[MAXPATHLEN]; - - if (!*str) - strcpy(buf, "."); - else { - if (g_Ctoc(str, buf, sizeof(buf))) - return (NULL); - } - - if (pglob->gl_flags & GLOB_ALTDIRFUNC) - return (DIR*)((*pglob->gl_opendir)(buf)); - - return(opendir(buf)); -} - -static int -g_lstat(Char *fn, struct stat *sb, glob_t *pglob) -{ - char buf[MAXPATHLEN]; - - if (g_Ctoc(fn, buf, sizeof(buf))) { - errno = ENAMETOOLONG; - return (-1); - } - if (pglob->gl_flags & GLOB_ALTDIRFUNC) - return((*pglob->gl_lstat)(buf, sb)); - return(lstat(buf, sb)); -} - -static Char * -g_strchr(Char *str, wchar_t ch) -{ - do { - if (*str == ch) - return (str); - } while (*str++); - return (NULL); -} - -static int -g_Ctoc(const Char *str, char *buf, u_int len) -{ - mbstate_t mbs; - size_t clen; - - memset(&mbs, 0, sizeof(mbs)); - while ((int)len >= MB_CUR_MAX) { - clen = wcrtomb(buf, (wchar_t)*str, &mbs); - if (clen == (size_t)-1) - return (1); - if (*str == L'\0') - return (0); - str++; - buf += clen; - len -= (u_int)clen; - } - return (1); -} - -#ifdef DEBUG -static void -qprintf(const char *str, Char *s) -{ - Char *p; - - (void)printf("%s:\n", str); - for (p = s; *p; p++) - (void)printf("%c", CHAR(*p)); - (void)printf("\n"); - for (p = s; *p; p++) - (void)printf("%c", *p & M_PROTECT ? '"' : ' '); - (void)printf("\n"); - for (p = s; *p; p++) - (void)printf("%c", ismeta(*p) ? '_' : ' '); - (void)printf("\n"); -} -#endif -#endif diff --git a/library/cpp/regex/glob/glob_compat.h b/library/cpp/regex/glob/glob_compat.h deleted file mode 100644 index 0dc518d51b..0000000000 --- a/library/cpp/regex/glob/glob_compat.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include <util/system/defaults.h> - -#if defined(_MSC_VER) || defined(_bionic_) -#define USE_INTERNAL_GLOB -#endif - -#if !defined(USE_INTERNAL_GLOB) -#include <glob.h> -#else - -struct stat; -typedef struct { - int gl_pathc; /* Count of total paths so far. */ - int gl_matchc; /* Count of paths matching pattern. */ - int gl_offs; /* Reserved at beginning of gl_pathv. */ - int gl_flags; /* Copy of flags parameter to glob. */ - char** gl_pathv; /* List of paths matching pattern. */ - /* Copy of errfunc parameter to glob. */ - int (*gl_errfunc)(const char*, int); - - /* - * Alternate filesystem access methods for glob; replacement - * versions of closedir(3), readdir(3), opendir(3), stat(2) - * and lstat(2). - */ - void (*gl_closedir)(void*); - struct dirent* (*gl_readdir)(void*); - void* (*gl_opendir)(const char*); - int (*gl_lstat)(const char*, struct stat*); - int (*gl_stat)(const char*, struct stat*); -} glob_t; - -//#if __POSIX_VISIBLE >= 199209 -/* Believed to have been introduced in 1003.2-1992 */ -#define GLOB_APPEND 0x0001 /* Append to output from previous call. */ -#define GLOB_DOOFFS 0x0002 /* Use gl_offs. */ -#define GLOB_ERR 0x0004 /* Return on error. */ -#define GLOB_MARK 0x0008 /* Append / to matching directories. */ -#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */ -#define GLOB_NOSORT 0x0020 /* Don't sort. */ -#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */ - -/* Error values returned by glob(3) */ -#define GLOB_NOSPACE (-1) /* Malloc call failed. */ -#define GLOB_ABORTED (-2) /* Unignored error. */ -#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */ -#define GLOB_NOSYS (-4) /* Obsolete: source comptability only. */ -//#endif /* __POSIX_VISIBLE >= 199209 */ - -//#if __BSD_VISIBLE -#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */ -#define GLOB_BRACE 0x0080 /* Expand braces ala csh. */ -#define GLOB_MAGCHAR 0x0100 /* Pattern had globbing characters. */ -#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */ -#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */ -#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */ -#define GLOB_LIMIT 0x1000 /* limit number of returned paths */ - -/* source compatibility, these are the old names */ -#define GLOB_MAXPATH GLOB_LIMIT -#define GLOB_ABEND GLOB_ABORTED -//#endif /* __BSD_VISIBLE */ - -int glob(const char*, int, int (*)(const char*, int), glob_t*); -void globfree(glob_t*); - -#endif /* _MSC_VER */ - -#if !defined(FROM_IMPLEMENTATION) -#undef USE_INTERNAL_GLOB -#endif diff --git a/library/cpp/regex/glob/glob_iterator.cpp b/library/cpp/regex/glob/glob_iterator.cpp deleted file mode 100644 index 746b49f397..0000000000 --- a/library/cpp/regex/glob/glob_iterator.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "glob_iterator.h" diff --git a/library/cpp/regex/glob/glob_iterator.h b/library/cpp/regex/glob/glob_iterator.h deleted file mode 100644 index e25481e594..0000000000 --- a/library/cpp/regex/glob/glob_iterator.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include "glob_compat.h" - -#include <util/generic/noncopyable.h> -#include <util/generic/string.h> -#include <util/generic/yexception.h> - -class TGlobPaths : TNonCopyable { -public: - TGlobPaths(const char* pattern) { - Impl.gl_pathc = 0; - int result = glob(pattern, 0, nullptr, &Impl); - Y_ENSURE(result == 0 || result == GLOB_NOMATCH, "glob failed"); - } - - TGlobPaths(const TString& pattern) - : TGlobPaths(pattern.data()) - { - } - - ~TGlobPaths() { - globfree(&Impl); - } - - const char** begin() { - return const_cast<const char**>(Impl.gl_pathv); - } - - const char** end() { - return const_cast<const char**>(Impl.gl_pathv + Impl.gl_pathc); - } - -private: - glob_t Impl; -}; diff --git a/library/cpp/regex/glob/ya.make b/library/cpp/regex/glob/ya.make deleted file mode 100644 index 9379742d99..0000000000 --- a/library/cpp/regex/glob/ya.make +++ /dev/null @@ -1,12 +0,0 @@ -LIBRARY() - -SRCS( - glob.cpp - glob_iterator.cpp -) - -PEERDIR( - library/cpp/charset -) - -END() diff --git a/library/cpp/reverse_geocoder/CMakeLists.txt b/library/cpp/reverse_geocoder/CMakeLists.txt deleted file mode 100644 index 621e95fdb2..0000000000 --- a/library/cpp/reverse_geocoder/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(core) -add_subdirectory(library) -add_subdirectory(proto) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 17f6e79c96..0000000000 --- a/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,35 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-core) -target_link_libraries(cpp-reverse_geocoder-core PUBLIC - contrib-libs-cxxsupp - yutil - cpp-reverse_geocoder-library - cpp-reverse_geocoder-proto - cpp-digest-crc32c -) -target_sources(cpp-reverse_geocoder-core PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp -) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 02361a0a1a..0000000000 --- a/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,36 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-core) -target_link_libraries(cpp-reverse_geocoder-core PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-reverse_geocoder-library - cpp-reverse_geocoder-proto - cpp-digest-crc32c -) -target_sources(cpp-reverse_geocoder-core PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp -) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 02361a0a1a..0000000000 --- a/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,36 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-core) -target_link_libraries(cpp-reverse_geocoder-core PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-reverse_geocoder-library - cpp-reverse_geocoder-proto - cpp-digest-crc32c -) -target_sources(cpp-reverse_geocoder-core PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp -) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.txt b/library/cpp/reverse_geocoder/core/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/reverse_geocoder/core/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 17f6e79c96..0000000000 --- a/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,35 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-core) -target_link_libraries(cpp-reverse_geocoder-core PUBLIC - contrib-libs-cxxsupp - yutil - cpp-reverse_geocoder-library - cpp-reverse_geocoder-proto - cpp-digest-crc32c -) -target_sources(cpp-reverse_geocoder-core PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp -) diff --git a/library/cpp/reverse_geocoder/core/area_box.cpp b/library/cpp/reverse_geocoder/core/area_box.cpp deleted file mode 100644 index 67038fe4f8..0000000000 --- a/library/cpp/reverse_geocoder/core/area_box.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "area_box.h" - -using namespace NReverseGeocoder; - -TRef NReverseGeocoder::LookupAreaBox(const TPoint& point) { - const TRef boxX = (point.X - NAreaBox::LowerX) / NAreaBox::DeltaX; - const TRef boxY = (point.Y - NAreaBox::LowerY) / NAreaBox::DeltaY; - return boxX * NAreaBox::NumberY + boxY; -} diff --git a/library/cpp/reverse_geocoder/core/area_box.h b/library/cpp/reverse_geocoder/core/area_box.h deleted file mode 100644 index 1077a65fef..0000000000 --- a/library/cpp/reverse_geocoder/core/area_box.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include "common.h" -#include "point.h" - -namespace NReverseGeocoder { - namespace NAreaBox { - const TCoordinate LowerX = ToCoordinate(-180.0); - const TCoordinate UpperX = ToCoordinate(180.0); - const TCoordinate LowerY = ToCoordinate(-90.0); - const TCoordinate UpperY = ToCoordinate(90.0); - const TCoordinate DeltaX = ToCoordinate(0.1); - const TCoordinate DeltaY = ToCoordinate(0.1); - const TCoordinate NumberX = (UpperX - LowerX) / DeltaX; - const TCoordinate NumberY = (UpperY - LowerY) / DeltaY; - const TCoordinate Number = NumberX * NumberY; - - } - - // Area of geo territory. Variable PolygonRefsOffset refers to the polygons lying inside this - // area. Geo map is divided into equal bounding boxes from (NAreaBox::LowerX, NAreaBox::LowerY) - // to (NAreaBox::UpperX, NAreaBox::UpperY) with DeltaX and DeltaY sizes. Logic of filling is in - // generator. - struct Y_PACKED TAreaBox { - TNumber PolygonRefsOffset; - TNumber PolygonRefsNumber; - }; - - static_assert(sizeof(TAreaBox) == 8, "NReverseGeocoder::TAreaBox size mismatch"); - - // Determine in wich area box in geoData is point. - TRef LookupAreaBox(const TPoint& point); - -} diff --git a/library/cpp/reverse_geocoder/core/bbox.cpp b/library/cpp/reverse_geocoder/core/bbox.cpp deleted file mode 100644 index aa4258ac22..0000000000 --- a/library/cpp/reverse_geocoder/core/bbox.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "bbox.h" diff --git a/library/cpp/reverse_geocoder/core/bbox.h b/library/cpp/reverse_geocoder/core/bbox.h deleted file mode 100644 index e8b6e00aa3..0000000000 --- a/library/cpp/reverse_geocoder/core/bbox.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include "common.h" -#include "point.h" - -#include <util/generic/utility.h> - -namespace NReverseGeocoder { - struct Y_PACKED TBoundingBox { - TCoordinate X1; - TCoordinate Y1; - TCoordinate X2; - TCoordinate Y2; - - TBoundingBox() - : X1(0) - , Y1(0) - , X2(0) - , Y2(0) - { - } - - TBoundingBox(TCoordinate x1, TCoordinate y1, TCoordinate x2, TCoordinate y2) - : X1(x1) - , Y1(y1) - , X2(x2) - , Y2(y2) - { - } - - TBoundingBox(const TPoint* points, TNumber number) { - Init(); - for (TNumber i = 0; i < number; ++i) - Relax(points[i]); - } - - void Init() { - X1 = ToCoordinate(180.0); - Y1 = ToCoordinate(90.0); - X2 = ToCoordinate(-180.0); - Y2 = ToCoordinate(-90.0); - } - - void Relax(const TPoint& p) { - X1 = Min(X1, p.X); - Y1 = Min(Y1, p.Y); - X2 = Max(X2, p.X); - Y2 = Max(Y2, p.Y); - } - - bool HasIntersection(const TBoundingBox& r) const { - if (X1 > r.X2 || X2 < r.X1 || Y1 > r.Y2 || Y2 < r.Y1) - return false; - return true; - } - - bool Contains(const TPoint& p) const { - if (p.X < X1 || p.X > X2 || p.Y < Y1 || p.Y > Y2) - return false; - return true; - } - }; - - static_assert(sizeof(TBoundingBox) == 16, "NReverseGeocoder::TBoundingBox size mismatch"); - -} diff --git a/library/cpp/reverse_geocoder/core/common.cpp b/library/cpp/reverse_geocoder/core/common.cpp deleted file mode 100644 index 67c02a20a0..0000000000 --- a/library/cpp/reverse_geocoder/core/common.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "common.h" diff --git a/library/cpp/reverse_geocoder/core/common.h b/library/cpp/reverse_geocoder/core/common.h deleted file mode 100644 index 090407ffd9..0000000000 --- a/library/cpp/reverse_geocoder/core/common.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include <util/system/compiler.h> -#include <util/system/types.h> - -namespace NReverseGeocoder { - using TCoordinate = i32; - using TGeoId = ui64; - using TNumber = ui32; - using TRef = ui32; - using TSquare = i64; - using TVersion = ui64; - - const double EARTH_RADIUS = 6371000.0; - - inline TCoordinate ToCoordinate(double x) { - return x * 1e6; - } - - inline double ToDouble(TCoordinate x) { - return x / 1e6; - } - -} diff --git a/library/cpp/reverse_geocoder/core/edge.cpp b/library/cpp/reverse_geocoder/core/edge.cpp deleted file mode 100644 index 86c6ab8535..0000000000 --- a/library/cpp/reverse_geocoder/core/edge.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "edge.h" diff --git a/library/cpp/reverse_geocoder/core/edge.h b/library/cpp/reverse_geocoder/core/edge.h deleted file mode 100644 index 9d20928857..0000000000 --- a/library/cpp/reverse_geocoder/core/edge.h +++ /dev/null @@ -1,101 +0,0 @@ -#pragma once - -#include "common.h" -#include "point.h" - -#include <util/generic/utility.h> -#include <util/system/yassert.h> - -namespace NReverseGeocoder { - // TEdge is a type, which represent polygon edge, Beg/End refers on begin/End edge points in - // geographical data. - struct Y_PACKED TEdge { - TRef Beg; - TRef End; - - TEdge() - : Beg(0) - , End(0) - { - } - - TEdge(const TRef& a, const TRef& b) - : Beg(a) - , End(b) - { - } - - bool operator==(const TEdge& e) const { - return Beg == e.Beg && End == e.End; - } - - bool operator!=(const TEdge& e) const { - return Beg != e.Beg || End != e.End; - } - - bool operator<(const TEdge& e) const { - return Beg < e.Beg || (Beg == e.Beg && End < e.End); - } - - // Checks that current edge is lying lower then other edge. Both edges must have a common X - // values, otherwise the behavior is undefined. - bool Lower(const TEdge& e, const TPoint* points) const { - if (*this == e) - return false; - - const TPoint& a1 = points[Beg]; - const TPoint& a2 = points[End]; - const TPoint& b1 = points[e.Beg]; - const TPoint& b2 = points[e.End]; - - Y_ASSERT(a1.X <= a2.X && b1.X <= b2.X); - - if (a1 == b1) { - return (a2 - a1).Cross(b2 - a1) > 0; - } else if (a2 == b2) { - return (a1 - b1).Cross(b2 - b1) > 0; - } else if (b1.X >= a1.X && b1.X <= a2.X) { - return (a2 - a1).Cross(b1 - a1) > 0; - } else if (b2.X >= a1.X && b2.X <= a2.X) { - return (a2 - a1).Cross(b2 - a1) > 0; - } else if (a1.X >= b1.X && a1.X <= b2.X) { - return (a1 - b1).Cross(b2 - b1) > 0; - } else if (a2.X >= b1.X && a2.X <= b2.X) { - return (a2 - b1).Cross(b2 - b1) > 0; - } else { - return false; - } - } - - // Checks that current edge lying lower then given point. Edge and point must have a common X - // values, otherwise the behavior is undefined. - bool Lower(const TPoint& p, const TPoint* points) const { - if (Contains(p, points)) - return false; - - TPoint a = points[Beg]; - TPoint b = points[End]; - - if (a.X > b.X) - DoSwap(a, b); - - return (b - a).Cross(p - a) > 0; - } - - bool Contains(const TPoint& p, const TPoint* points) const { - TPoint a = points[Beg]; - TPoint b = points[End]; - - if (a.X > b.X) - DoSwap(a, b); - - if (p.X < a.X || p.X > b.X) - return false; - - return (b - a).Cross(p - a) == 0; - } - }; - - static_assert(sizeof(TEdge) == 8, "NReverseGeocoder::TEdge size mismatch"); - -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.cpp b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp deleted file mode 100644 index 4db0534b22..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/debug.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "debug.h" - -#include <library/cpp/reverse_geocoder/library/log.h> -#include <library/cpp/reverse_geocoder/library/memory.h> - -using namespace NReverseGeocoder; -using namespace NGeoData; - -size_t NReverseGeocoder::NGeoData::Space(const IGeoData& g) { - size_t space = 0; - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - space += sizeof(TVar); - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - space += sizeof(TNumber) + sizeof(TArr) * g.Arr##Number(); - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - return space; -} - -template <typename TArr> -static float ArraySpace(TNumber number) { - return number * sizeof(TArr) * 1.0 / MB; -} - -void NReverseGeocoder::NGeoData::Show(IOutputStream& out, const IGeoData& g) { - out << "GeoData = " << NGeoData::Space(g) * 1.0 / GB << " GB" << '\n'; - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - out << " GeoData." << #Var << " = " << (unsigned long long)g.Var() << '\n'; - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - out << " GeoData." << #Arr << " = " \ - << g.Arr##Number() << " x " << sizeof(TArr) << " = " \ - << ArraySpace<TArr>(g.Arr##Number()) << " MB" \ - << '\n'; - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR -} - -template <typename TArr> -static bool Equals(const TArr* a, const TArr* b, size_t count) { - return !memcmp(a, b, sizeof(TArr) * count); -} - -bool NReverseGeocoder::NGeoData::Equals(const IGeoData& a, const IGeoData& b) { -#define GEO_BASE_DEF_VAR(TVar, Var) \ - if (a.Var() != b.Var()) { \ - LogError(#Var " not equal"); \ - return false; \ - } - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ - if (!::Equals(a.Arr(), b.Arr(), a.Arr##Number())) { \ - LogError(#Arr " not equal"); \ - return false; \ - } - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - return true; -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.h b/library/cpp/reverse_geocoder/core/geo_data/debug.h deleted file mode 100644 index e7a4d9029c..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/debug.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "geo_data.h" - -#include <util/stream/output.h> - -namespace NReverseGeocoder { - namespace NGeoData { - size_t Space(const IGeoData& g); - - void Show(IOutputStream& out, const IGeoData& g); - - bool Equals(const IGeoData& a, const IGeoData& b); - - } -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.cpp b/library/cpp/reverse_geocoder/core/geo_data/def.cpp deleted file mode 100644 index bb9f760d73..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/def.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "def.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.h b/library/cpp/reverse_geocoder/core/geo_data/def.h deleted file mode 100644 index d3e331d873..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/def.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include <library/cpp/reverse_geocoder/core/area_box.h> -#include <library/cpp/reverse_geocoder/core/common.h> -#include <library/cpp/reverse_geocoder/core/edge.h> -#include <library/cpp/reverse_geocoder/core/kv.h> -#include <library/cpp/reverse_geocoder/core/part.h> -#include <library/cpp/reverse_geocoder/core/point.h> -#include <library/cpp/reverse_geocoder/core/polygon.h> -#include <library/cpp/reverse_geocoder/core/region.h> - -namespace NReverseGeocoder { - const TVersion GEO_DATA_VERSION_0 = 0; - const TVersion GEO_DATA_VERSION_1 = 1; - - const TVersion GEO_DATA_CURRENT_VERSION = GEO_DATA_VERSION_1; - -// Geographical data definition. This define need for reflection in map/unmap, show, etc. -#define GEO_BASE_DEF_GEO_DATA \ - GEO_BASE_DEF_VAR(TVersion, Version); \ - GEO_BASE_DEF_ARR(TPoint, Points); \ - GEO_BASE_DEF_ARR(TEdge, Edges); \ - GEO_BASE_DEF_ARR(TRef, EdgeRefs); \ - GEO_BASE_DEF_ARR(TPart, Parts); \ - GEO_BASE_DEF_ARR(TPolygon, Polygons); \ - GEO_BASE_DEF_ARR(TRef, PolygonRefs); \ - GEO_BASE_DEF_ARR(TAreaBox, Boxes); \ - GEO_BASE_DEF_ARR(char, Blobs); \ - GEO_BASE_DEF_ARR(TKv, Kvs); \ - GEO_BASE_DEF_ARR(TRegion, Regions); \ - GEO_BASE_DEF_ARR(TRawPolygon, RawPolygons); \ - GEO_BASE_DEF_ARR(TRef, RawEdgeRefs); \ - // #define GEO_BASE_DEF_GEO_DATA - -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp deleted file mode 100644 index be3310b291..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "geo_data.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.h b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h deleted file mode 100644 index 7cb76bcddc..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/geo_data.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "def.h" - -namespace NReverseGeocoder { - class IGeoData { -#define GEO_BASE_DEF_VAR(TVar, Var) \ - virtual const TVar& Var() const = 0; - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - virtual const TArr* Arr() const = 0; \ - virtual TNumber Arr##Number() const = 0; - - public: - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - virtual ~IGeoData() { - } - }; - -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.cpp b/library/cpp/reverse_geocoder/core/geo_data/map.cpp deleted file mode 100644 index 312f7d7cb0..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/map.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#include "map.h" - -#include <library/cpp/reverse_geocoder/library/log.h> -#include <library/cpp/reverse_geocoder/library/system.h> -#include <library/cpp/reverse_geocoder/proto/geo_data.pb.h> - -#include <library/cpp/digest/crc32c/crc32c.h> - -#include <util/generic/algorithm.h> -#include <util/generic/buffer.h> -#include <util/generic/vector.h> -#include <util/network/address.h> -#include <util/system/filemap.h> -#include <util/system/unaligned_mem.h> - -using namespace NReverseGeocoder; - -static const TNumber CRC_SIZE = 3; - -void NReverseGeocoder::TGeoDataMap::Init() { -#define GEO_BASE_DEF_VAR(TVar, Var) \ - Var##_ = TVar(); - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - Arr##_ = nullptr; \ - Arr##Number_ = 0; - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR -} - -NReverseGeocoder::TGeoDataMap::TGeoDataMap() - : Data_(nullptr) - , Size_(0) -{ - Init(); -} - -static bool CheckMemoryConsistency(const NProto::TGeoData& g) { - TVector<std::pair<intptr_t, intptr_t>> segments; - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - // undef - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - if (g.Get##Arr##Number() > 0) { \ - intptr_t const beg = g.Get##Arr(); \ - intptr_t const end = g.Get##Arr() + g.Get##Arr##Number() * sizeof(TArr); \ - segments.emplace_back(beg, end); \ - } - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - Sort(segments.begin(), segments.end()); - - for (size_t i = 0; i + 1 < segments.size(); ++i) - if (segments[i].second > segments[i + 1].first) - return false; - - return true; -} - -void NReverseGeocoder::TGeoDataMap::Remap() { - Init(); - - if (!Data_) - return; - - const ui64 headerSize = ntohl(ReadUnaligned<ui64>(Data_)); - - NProto::TGeoData header; - if (!header.ParseFromArray(Data_ + sizeof(ui64), headerSize)) - ythrow yexception() << "Unable parse geoData header"; - - if (header.GetMagic() != SYSTEM_ENDIAN_FLAG) - ythrow yexception() << "Different endianness in geoData and host"; - - if (!CheckMemoryConsistency(header)) - ythrow yexception() << "Memory is not consistent!"; - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - Var##_ = header.Get##Var(); - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ - if (Arr##Number() > 0) { \ - const intptr_t offset = header.Get##Arr(); \ - Arr##_ = (TArr*)(((intptr_t)Data_) + offset); \ - const ui32 hash = Crc32c(Arr##_, std::min(Arr##Number_, CRC_SIZE) * sizeof(TArr)); \ - if (hash != header.Get##Arr##Crc32()) \ - ythrow yexception() << "Wrong crc32 for " << #Arr; \ - } - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - if (Version() != GEO_DATA_CURRENT_VERSION) - ythrow yexception() << "Unable use version " << Version() - << "(current version is " << GEO_DATA_CURRENT_VERSION << ")"; -} - -static size_t HeaderSize() { - NProto::TGeoData header; - header.SetMagic(std::numeric_limits<decltype(header.GetMagic())>::max()); - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - header.Set##Var(std::numeric_limits<decltype(header.Get##Var())>::max()); - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ - header.Set##Arr(std::numeric_limits<decltype(header.Get##Arr())>::max()); \ - header.Set##Arr##Crc32(std::numeric_limits<decltype(header.Get##Arr##Crc32())>::max()); - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - return header.ByteSize(); -} - -static const char* Serialize(const IGeoData& g, TBlockAllocator* allocator, size_t* size) { - size_t const preAllocatedSize = allocator->TotalAllocatedSize(); - char* data = (char*)allocator->Allocate(HeaderSize() + sizeof(ui64)); - - NProto::TGeoData header; - header.SetMagic(SYSTEM_ENDIAN_FLAG); - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - header.Set##Var(g.Var()); - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ - if (g.Arr##Number() > 0) { \ - TArr* arr = (TArr*)allocator->Allocate(sizeof(TArr) * g.Arr##Number()); \ - memcpy(arr, g.Arr(), sizeof(TArr) * g.Arr##Number()); \ - header.Set##Arr((ui64)(((intptr_t)arr) - ((intptr_t)data))); \ - header.Set##Arr##Crc32(Crc32c(arr, std::min(g.Arr##Number(), CRC_SIZE) * sizeof(TArr))); \ - }; - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - const auto str = header.SerializeAsString(); - WriteUnaligned<ui64>(data, (ui64)htonl(str.size())); - memcpy(data + sizeof(ui64), str.data(), str.size()); - - if (size) - *size = allocator->TotalAllocatedSize() - preAllocatedSize; - - return data; -} - -static size_t TotalByteSize(const IGeoData& g) { - size_t total_size = TBlockAllocator::AllocateSize(HeaderSize() + sizeof(ui64)); - -#define GEO_BASE_DEF_VAR(TVar, Var) \ - // undef - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ - total_size += TBlockAllocator::AllocateSize(sizeof(TArr) * g.Arr##Number()); - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - return total_size; -} - -NReverseGeocoder::TGeoDataMap::TGeoDataMap(const IGeoData& geoData, TBlockAllocator* allocator) - : TGeoDataMap() -{ - Data_ = Serialize(geoData, allocator, &Size_); - Remap(); -} - -void NReverseGeocoder::TGeoDataMap::SerializeToFile(const TString& path, const IGeoData& data) { - TBlob data_blob = SerializeToBlob(data); - - TFile file(path, CreateAlways | RdWr); - file.Write(data_blob.Data(), data_blob.Length()); -} - -TBlob NReverseGeocoder::TGeoDataMap::SerializeToBlob(const IGeoData& data) { - TBuffer buf; - buf.Resize(TotalByteSize(data)); - memset(buf.data(), 0, buf.size()); - - TBlockAllocator allocator(buf.Data(), buf.Size()); - TGeoDataMap(data, &allocator); - - return TBlob::FromBuffer(buf); -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.h b/library/cpp/reverse_geocoder/core/geo_data/map.h deleted file mode 100644 index e466bd912e..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/map.h +++ /dev/null @@ -1,89 +0,0 @@ -#pragma once - -#include "geo_data.h" - -#include <library/cpp/reverse_geocoder/library/block_allocator.h> - -#include <util/memory/blob.h> - -namespace NReverseGeocoder { - class TGeoDataMap: public IGeoData, public TNonCopyable { -#define GEO_BASE_DEF_VAR(TVar, Var) \ -public: \ - const TVar& Var() const override { \ - return Var##_; \ - } \ - \ -private: \ - TVar Var##_; - -#define GEO_BASE_DEF_ARR(TArr, Arr) \ -public: \ - const TArr* Arr() const override { \ - return Arr##_; \ - } \ - TNumber Arr##Number() const override { \ - return Arr##Number_; \ - } \ - \ -private: \ - TNumber Arr##Number_; \ - const TArr* Arr##_; - - GEO_BASE_DEF_GEO_DATA - -#undef GEO_BASE_DEF_VAR -#undef GEO_BASE_DEF_ARR - - public: - TGeoDataMap(); - - static void SerializeToFile(const TString& path, const IGeoData& data); - - static TBlob SerializeToBlob(const IGeoData& data); - - TGeoDataMap(const IGeoData& data, TBlockAllocator* allocator); - - TGeoDataMap(const char* data, size_t size) - : TGeoDataMap() - { - Data_ = data; - Size_ = size; - Remap(); - } - - TGeoDataMap(TGeoDataMap&& dat) - : TGeoDataMap() - { - DoSwap(Data_, dat.Data_); - DoSwap(Size_, dat.Size_); - Remap(); - dat.Remap(); - } - - TGeoDataMap& operator=(TGeoDataMap&& dat) { - DoSwap(Data_, dat.Data_); - DoSwap(Size_, dat.Size_); - Remap(); - dat.Remap(); - return *this; - } - - const char* Data() const { - return Data_; - } - - size_t Size() const { - return Size_; - } - - private: - void Init(); - - void Remap(); - - const char* Data_; - size_t Size_; - }; - -} diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp deleted file mode 100644 index 5ff2d13783..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "proxy.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.h b/library/cpp/reverse_geocoder/core/geo_data/proxy.h deleted file mode 100644 index fecb9fc7cf..0000000000 --- a/library/cpp/reverse_geocoder/core/geo_data/proxy.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once - -#include "geo_data.h" -#include "map.h" - -#include <util/generic/ptr.h> -#include <util/system/filemap.h> - -namespace NReverseGeocoder { - class IGeoDataProxy { - public: - virtual const IGeoData* GeoData() const = 0; - - virtual ~IGeoDataProxy() { - } - }; - - using TGeoDataProxyPtr = THolder<IGeoDataProxy>; - - class TGeoDataMapProxy: public IGeoDataProxy, public TNonCopyable { - public: - explicit TGeoDataMapProxy(const char* path) - : MemFile_(path) - { - MemFile_.Map(0, MemFile_.Length()); - GeoData_ = TGeoDataMap((const char*)MemFile_.Ptr(), MemFile_.MappedSize()); - } - - const IGeoData* GeoData() const override { - return &GeoData_; - } - - private: - TFileMap MemFile_; - TGeoDataMap GeoData_; - }; - - class TGeoDataWrapper: public IGeoDataProxy, public TNonCopyable { - public: - explicit TGeoDataWrapper(const IGeoData& g) - : GeoData_(&g) - { - } - - const IGeoData* GeoData() const override { - return GeoData_; - } - - private: - const IGeoData* GeoData_; - }; - - class TGeoDataRawProxy: public IGeoDataProxy, public TNonCopyable { - public: - TGeoDataRawProxy(const char* data, size_t dataSize) - : GeoData_(data, dataSize) - { - } - - const IGeoData* GeoData() const override { - return &GeoData_; - } - - private: - TGeoDataMap GeoData_; - }; - -} diff --git a/library/cpp/reverse_geocoder/core/kv.cpp b/library/cpp/reverse_geocoder/core/kv.cpp deleted file mode 100644 index a48e9c947e..0000000000 --- a/library/cpp/reverse_geocoder/core/kv.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "kv.h" diff --git a/library/cpp/reverse_geocoder/core/kv.h b/library/cpp/reverse_geocoder/core/kv.h deleted file mode 100644 index 639c21de52..0000000000 --- a/library/cpp/reverse_geocoder/core/kv.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "common.h" - -namespace NReverseGeocoder { - // k and v is offsets on blobs in geographical data blobs array. See geo_data.h - // for details. - struct TKv { - TNumber K; - TNumber V; - }; - -} diff --git a/library/cpp/reverse_geocoder/core/location.cpp b/library/cpp/reverse_geocoder/core/location.cpp deleted file mode 100644 index b2d2f54d12..0000000000 --- a/library/cpp/reverse_geocoder/core/location.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "location.h" diff --git a/library/cpp/reverse_geocoder/core/location.h b/library/cpp/reverse_geocoder/core/location.h deleted file mode 100644 index 5aa3198684..0000000000 --- a/library/cpp/reverse_geocoder/core/location.h +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -namespace NReverseGeocoder { - struct TLocation { - double Lon; - double Lat; - - TLocation() - : Lon(0) - , Lat(0) - { - } - - TLocation(double lon, double lat) - : Lon(lon) - , Lat(lat) - { - } - }; - -} diff --git a/library/cpp/reverse_geocoder/core/part.cpp b/library/cpp/reverse_geocoder/core/part.cpp deleted file mode 100644 index c973d2171a..0000000000 --- a/library/cpp/reverse_geocoder/core/part.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "part.h" - -#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> - -#include <util/generic/algorithm.h> - -using namespace NReverseGeocoder; - -bool NReverseGeocoder::TPart::Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, - const TEdge* edges, const TPoint* points) const { - auto edgeRefsBegin = UnalignedIter(edgeRefs) + EdgeRefsOffset; - auto edgeRefsEnd = edgeRefsBegin + edgeRefsNumber; - - // Find lower bound edge, which lying below given point. - auto cmp = [&](const TRef& e, const TPoint& p) { - return edges[e].Lower(p, points); - }; - - auto edgeRef = LowerBound(edgeRefsBegin, edgeRefsEnd, point, cmp); - - if (edgeRef == edgeRefsEnd) - return false; - - if (edges[*edgeRef].Contains(point, points)) - return true; - - // If the point is inside of the polygon then it will intersect the edge an odd number of times. - return (edgeRef - edgeRefsBegin) % 2 == 1; -} diff --git a/library/cpp/reverse_geocoder/core/part.h b/library/cpp/reverse_geocoder/core/part.h deleted file mode 100644 index 9b24fee96f..0000000000 --- a/library/cpp/reverse_geocoder/core/part.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include "common.h" -#include "edge.h" -#include "point.h" - -namespace NReverseGeocoder { - // TPart contains version of persistent scanline. Parts lying in geofraphical data parts array, - // ordered by Coordinate for each polygon. Variable EdgeRefsOffset refers on EdgeRefs array for - // this part. For optimal usage of memory, part does not contain "EdgeRefsNumber" variable, because - // it's can be computed as parts[i + 1].EdgeRefsOffset - parts[i].EdgeRefsOffset for every part - // in geographical data. Especially for this, added fake part into IGeoData with correct - // EdgeRefsOffset. Refs in EdgeRefs are in increasing order for each part. It is necessary to - // quickly determine how many edges is under the point. See generator/ for details. - struct Y_PACKED TPart { - TCoordinate Coordinate; - TNumber EdgeRefsOffset; - - // Checks point lying under odd numbers of edges or on edge. - bool Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, - const TEdge* edges, const TPoint* points) const; - }; - - static_assert(sizeof(TPart) == 8, "NReverseGeocoder::TPart size mismatch"); - -} diff --git a/library/cpp/reverse_geocoder/core/point.cpp b/library/cpp/reverse_geocoder/core/point.cpp deleted file mode 100644 index 396e27e596..0000000000 --- a/library/cpp/reverse_geocoder/core/point.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "point.h" diff --git a/library/cpp/reverse_geocoder/core/point.h b/library/cpp/reverse_geocoder/core/point.h deleted file mode 100644 index 75f1dfc1b4..0000000000 --- a/library/cpp/reverse_geocoder/core/point.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include "common.h" -#include "location.h" - -namespace NReverseGeocoder { - struct Y_PACKED TPoint { - TCoordinate X; - TCoordinate Y; - - TPoint() - : X(0) - , Y(0) - { - } - - TPoint(const TCoordinate& x1, const TCoordinate& y1) - : X(x1) - , Y(y1) - { - } - - explicit TPoint(const TLocation& l) - : X(ToCoordinate(l.Lon)) - , Y(ToCoordinate(l.Lat)) - { - } - - TPoint operator-(const TPoint& p) const { - return TPoint(X - p.X, Y - p.Y); - } - - bool operator==(const TPoint& b) const { - return X == b.X && Y == b.Y; - } - - bool operator!=(const TPoint& b) const { - return X != b.X || Y != b.Y; - } - - bool operator<(const TPoint& b) const { - return X < b.X || (X == b.X && Y < b.Y); - } - - TSquare Cross(const TPoint& p) const { - return 1ll * X * p.Y - 1ll * Y * p.X; - } - }; - - static_assert(sizeof(TPoint) == 8, "NReverseGeocoder::TPoint size mismatch"); - -} diff --git a/library/cpp/reverse_geocoder/core/polygon.cpp b/library/cpp/reverse_geocoder/core/polygon.cpp deleted file mode 100644 index 2baac2d229..0000000000 --- a/library/cpp/reverse_geocoder/core/polygon.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "polygon.h" - -#include <util/generic/algorithm.h> - -using namespace NReverseGeocoder; - -static bool Check(const TPart* part, const TPoint& point, const TRef* edgeRefs, - const TEdge* edges, const TPoint* points) { - const TNumber edgeRefsNumber = (part + 1)->EdgeRefsOffset - part->EdgeRefsOffset; - return part->Contains(point, edgeRefsNumber, edgeRefs, edges, points); -} - -bool NReverseGeocoder::TPolygon::Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, - const TEdge* edges, const TPoint* points) const { - if (!Bbox.Contains(point)) - return false; - - parts += PartsOffset; - const TPart* partsEnd = parts + PartsNumber; - - // Find lower bound part, which can contains given point. - const TPart* part = LowerBound(parts, partsEnd, point, [&](const TPart& a, const TPoint& b) { - return a.Coordinate < b.X; - }); - - if (part->Coordinate > point.X) { - if (part == parts) - return false; - --part; - } - - if (point.X < part->Coordinate || point.X > (part + 1)->Coordinate) - return false; - - if (point.X == part->Coordinate) - if (part != parts && Check(part - 1, point, edgeRefs, edges, points)) - return true; - - return Check(part, point, edgeRefs, edges, points); -} - -bool NReverseGeocoder::TPolygonBase::Better(const TPolygonBase& p, const TRegion* regions, - TNumber regionsNumber) const { - if (Square < p.Square) - return true; - - if (Square == p.Square) { - const TRegion* begin = regions; - const TRegion* end = regions + regionsNumber; - - const TRegion* r1 = LowerBound(begin, end, TGeoId(RegionId)); - const TRegion* r2 = LowerBound(begin, end, TGeoId(p.RegionId)); - - if (r1 == end || r1->RegionId != RegionId) - return false; - - if (r2 == end || r2->RegionId != p.RegionId) - return false; - - return r1->Better(*r2); - } - - return false; -} - -bool NReverseGeocoder::TRawPolygon::Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, - const TPoint* points) const { - if (!Bbox.Contains(point)) - return false; - - edgeRefs += EdgeRefsOffset; - - TNumber intersections = 0; - for (TNumber i = 0; i < EdgeRefsNumber; ++i) { - const TEdge& e = edges[edgeRefs[i]]; - - if (e.Contains(point, points)) - return true; - - TPoint a = points[e.Beg]; - TPoint b = points[e.End]; - - if (a.X > b.X) - DoSwap(a, b); - - if (a.X < point.X && b.X >= point.X && e.Lower(point, points)) - ++intersections; - } - - return intersections % 2 == 1; -} diff --git a/library/cpp/reverse_geocoder/core/polygon.h b/library/cpp/reverse_geocoder/core/polygon.h deleted file mode 100644 index 065bba1e38..0000000000 --- a/library/cpp/reverse_geocoder/core/polygon.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "bbox.h" -#include "common.h" -#include "edge.h" -#include "part.h" -#include "point.h" -#include "region.h" - -namespace NReverseGeocoder { -#pragma pack(push, 1) - - struct TPolygonBase { - enum EType { - TYPE_UNKNOWN = 0, - TYPE_INNER = 1, - TYPE_OUTER = 2, - }; - - // If TYPE_INNER and polygon contains given point, this means that region with RegionId - // does not contains point. - EType Type; - - ui32 Unused1; - - // Geographical data indetifiers. - TGeoId RegionId; - TGeoId PolygonId; - - // Rectangle in which lies that polygon. - TBoundingBox Bbox; - - // Square of polygon. Need for determine which polygon is better. See better member function. - TSquare Square; - - // Total points number of given polygon. - TNumber PointsNumber; - - // Check that this polygon better then given polygon, which means that this polygons lying - // deeper then given in polygons hierarchy. - bool Better(const TPolygonBase& p, const TRegion* regions, TNumber regionsNumber) const; - }; - - // Polygon is a representation of persistent scanline data structure. - struct TPolygon: public TPolygonBase { - // Versions of persistent scanline. - TNumber PartsOffset; - TNumber PartsNumber; - ui32 Unused2; - - // Fast point in polygon test using persistent scanline. You can see how this data structure - // generated in generator/. - bool Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, - const TEdge* edges, const TPoint* points) const; - }; - - static_assert(sizeof(TPolygon) == 64, "NReverseGeocoder::TPolygon size mismatch"); - - // Raw polygon is a polygon representation for slow tests. - struct TRawPolygon: public TPolygonBase { - // Raw polygon edge refs. - TNumber EdgeRefsOffset; - TNumber EdgeRefsNumber; - ui32 Unused2; - - bool Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, - const TPoint* points) const; - }; - - static_assert(sizeof(TRawPolygon) == 64, "NReverseGeocoder::TRawPolygon size mismatch"); - -#pragma pack(pop) -} diff --git a/library/cpp/reverse_geocoder/core/region.cpp b/library/cpp/reverse_geocoder/core/region.cpp deleted file mode 100644 index 62b4acd0a1..0000000000 --- a/library/cpp/reverse_geocoder/core/region.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "region.h" diff --git a/library/cpp/reverse_geocoder/core/region.h b/library/cpp/reverse_geocoder/core/region.h deleted file mode 100644 index 4b010c7103..0000000000 --- a/library/cpp/reverse_geocoder/core/region.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include "common.h" - -namespace NReverseGeocoder { - struct Y_PACKED TRegion { - TGeoId RegionId; - TNumber KvsOffset; - TNumber KvsNumber; - TSquare Square; - TNumber PolygonsNumber; - ui32 Unused; - - bool operator==(const TRegion& r) const { - return RegionId == r.RegionId; - } - - bool operator<(const TRegion& r) const { - return RegionId < r.RegionId; - } - - bool operator<(const TGeoId& r) const { - return RegionId < r; - } - - friend bool operator<(const TGeoId& regionId, const TRegion& r) { - return regionId < r.RegionId; - } - - bool Better(const TRegion& r) const { - return Square < r.Square; - } - }; - - static_assert(sizeof(TRegion) == 32, "NReverseGeocoder::TRegion size mismatch"); - -} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp deleted file mode 100644 index d73e4f2648..0000000000 --- a/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp +++ /dev/null @@ -1,182 +0,0 @@ -#include "reverse_geocoder.h" -#include "geo_data/geo_data.h" - -#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> - -#include <util/generic/algorithm.h> -#include <util/system/unaligned_mem.h> - -using namespace NReverseGeocoder; - -static bool PolygonContains(const TPolygon& p, const TPoint& point, const IGeoData& geoData) { - const TPart* parts = geoData.Parts(); - const TRef* edgeRefs = geoData.EdgeRefs(); - const TEdge* edges = geoData.Edges(); - const TPoint* points = geoData.Points(); - return p.Contains(point, parts, edgeRefs, edges, points); -} - -template <typename TAnswer> -static void UpdateAnswer(const TAnswer** answer, const TAnswer& polygon, - const IGeoData& geoData) { - if (!*answer) { - *answer = &polygon; - } else { - const TRegion* regions = geoData.Regions(); - const TNumber regionsNumber = geoData.RegionsNumber(); - if (!(*answer)->Better(polygon, regions, regionsNumber)) - *answer = &polygon; - } -} - -static void SortDebug(TReverseGeocoder::TDebug* debug, const IGeoData& geoData) { - const TRegion* regions = geoData.Regions(); - const TNumber regionsNumber = geoData.RegionsNumber(); - - auto cmp = [&](const TGeoId& a, const TGeoId& b) { - const TRegion* r1 = LowerBound(regions, regions + regionsNumber, a); - const TRegion* r2 = LowerBound(regions, regions + regionsNumber, b); - return r1->Better(*r2); - }; - - Sort(debug->begin(), debug->end(), cmp); -} - -TGeoId NReverseGeocoder::TReverseGeocoder::Lookup(const TLocation& location, TDebug* debug) const { - const IGeoData& geoData = *GeoDataProxy_->GeoData(); - - if (debug) - debug->clear(); - - const TPoint point(location); - const TRef boxRef = LookupAreaBox(point); - - if (boxRef >= geoData.BoxesNumber()) - return UNKNOWN_GEO_ID; - - const TNumber refsOffset = geoData.Boxes()[boxRef].PolygonRefsOffset; - const TNumber refsNumber = geoData.Boxes()[boxRef].PolygonRefsNumber; - - const TPolygon* answer = nullptr; - - const TPolygon* p = geoData.Polygons(); - const auto refsBegin = UnalignedIter(geoData.PolygonRefs()) + refsOffset; - const auto refsEnd = refsBegin + refsNumber; - - for (auto iterL = refsBegin, iterR = refsBegin; iterL < refsEnd; iterL = iterR) { - iterR = iterL + 1; - - if (PolygonContains(p[*iterL], point, geoData)) { - if (p[*iterL].Type == TPolygon::TYPE_INNER) { - // All polygons with same RegionId must be skipped if polygon is inner. - // In geoData small inner polygons stored before big outer polygons. - while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) - ++iterR; - - } else { - UpdateAnswer(&answer, p[*iterL], geoData); - - if (debug) - debug->push_back(p[*iterL].RegionId); - - while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) - ++iterR; - } - } - } - - if (debug) - SortDebug(debug, geoData); - - return answer ? answer->RegionId : UNKNOWN_GEO_ID; -} - -TGeoId NReverseGeocoder::TReverseGeocoder::RawLookup(const TLocation& location, TDebug* debug) const { - const IGeoData& geoData = *GeoDataProxy_->GeoData(); - - if (debug) - debug->clear(); - - const TPoint point(location); - - const TRawPolygon* borders = geoData.RawPolygons(); - const TNumber bordersNumber = geoData.RawPolygonsNumber(); - - const TRawPolygon* answer = nullptr; - - TNumber i = 0; - while (i < bordersNumber) { - if (borders[i].Contains(point, geoData.RawEdgeRefs(), geoData.Edges(), geoData.Points())) { - if (borders[i].Type == TRawPolygon::TYPE_INNER) { - TNumber j = i + 1; - while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) - ++j; - - i = j; - - } else { - UpdateAnswer(&answer, borders[i], geoData); - - if (debug) - debug->push_back(borders[i].RegionId); - - TNumber j = i + 1; - while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) - ++j; - - i = j; - } - } else { - ++i; - } - } - - if (debug) - SortDebug(debug, geoData); - - return answer ? answer->RegionId : UNKNOWN_GEO_ID; -} - -bool NReverseGeocoder::TReverseGeocoder::EachKv(TGeoId regionId, TKvCallback callback) const { - const IGeoData& g = *GeoDataProxy_->GeoData(); - - const TRegion* begin = g.Regions(); - const TRegion* end = begin + g.RegionsNumber(); - - const TRegion* region = LowerBound(begin, end, regionId); - - if (region == end || region->RegionId != regionId) - return false; - - const TKv* kvs = g.Kvs() + region->KvsOffset; - const char* blobs = g.Blobs(); - - for (TNumber i = 0; i < region->KvsNumber; ++i) { - const char* k = blobs + kvs[i].K; - const char* v = blobs + kvs[i].V; - callback(k, v); - } - - return true; -} - -void NReverseGeocoder::TReverseGeocoder::EachPolygon(TPolygonCallback callback) const { - const IGeoData& g = *GeoDataProxy_->GeoData(); - - for (TNumber i = 0; i < g.PolygonsNumber(); ++i) - callback(g.Polygons()[i]); -} - -void NReverseGeocoder::TReverseGeocoder::EachPart(const TPolygon& polygon, TPartCallback callback) const { - const IGeoData& g = *GeoDataProxy_->GeoData(); - - const TNumber partsOffset = polygon.PartsOffset; - const TNumber partsNumber = polygon.PartsNumber; - - for (TNumber i = partsOffset; i < partsOffset + partsNumber; ++i) { - const TPart& part = g.Parts()[i]; - const TPart& npart = g.Parts()[i + 1]; - const TNumber edgeRefsNumber = npart.EdgeRefsOffset - part.EdgeRefsOffset; - callback(part, edgeRefsNumber); - } -} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.h b/library/cpp/reverse_geocoder/core/reverse_geocoder.h deleted file mode 100644 index c74eddb40e..0000000000 --- a/library/cpp/reverse_geocoder/core/reverse_geocoder.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "common.h" -#include "geo_data/geo_data.h" -#include "geo_data/proxy.h" - -#include <util/generic/noncopyable.h> -#include <util/generic/vector.h> - -#include <functional> - -namespace NReverseGeocoder { - const TGeoId UNKNOWN_GEO_ID = static_cast<TGeoId>(-1); - - // NOTE: Be careful! It's work fine and fast on real world dataset. - // But in theory it's can spent O(n^2) memory (on real world dataset it's just 6n). - // Point in polygon test will be O(log n) always. Memory spent will be O(n) in future! - class TReverseGeocoder: public TNonCopyable { - public: - using TDebug = TVector<TGeoId>; - using TKvCallback = std::function<void(const char*, const char*)>; - using TPolygonCallback = std::function<void(const TPolygon&)>; - using TPartCallback = std::function<void(const TPart&, TNumber)>; - - TReverseGeocoder() - : GeoDataProxy_() - { - } - - TReverseGeocoder(TReverseGeocoder&& g) - : GeoDataProxy_() - { - DoSwap(GeoDataProxy_, g.GeoDataProxy_); - } - - TReverseGeocoder& operator=(TReverseGeocoder&& g) { - DoSwap(GeoDataProxy_, g.GeoDataProxy_); - return *this; - } - - explicit TReverseGeocoder(const char* path) - : GeoDataProxy_(new TGeoDataMapProxy(path)) - { - } - - explicit TReverseGeocoder(const IGeoData& geoData) - : GeoDataProxy_(new TGeoDataWrapper(geoData)) - { - } - - TReverseGeocoder(const char* data, size_t dataSize) - : GeoDataProxy_(new TGeoDataRawProxy(data, dataSize)) - { - } - - TGeoId Lookup(const TLocation& location, TDebug* debug = nullptr) const; - - TGeoId RawLookup(const TLocation& location, TDebug* debug = nullptr) const; - - bool EachKv(TGeoId regionId, TKvCallback callback) const; - - void EachPolygon(TPolygonCallback callback) const; - - void EachPart(const TPolygon& polygon, TPartCallback callback) const; - - const IGeoData& GeoData() const { - return *GeoDataProxy_->GeoData(); - } - - private: - TGeoDataProxyPtr GeoDataProxy_; - }; -} diff --git a/library/cpp/reverse_geocoder/core/ya.make b/library/cpp/reverse_geocoder/core/ya.make deleted file mode 100644 index 9f7dc67464..0000000000 --- a/library/cpp/reverse_geocoder/core/ya.make +++ /dev/null @@ -1,28 +0,0 @@ -LIBRARY() - -PEERDIR( - library/cpp/reverse_geocoder/library - library/cpp/reverse_geocoder/proto - library/cpp/digest/crc32c -) - -SRCS( - area_box.cpp - bbox.cpp - common.cpp - edge.cpp - reverse_geocoder.cpp - kv.cpp - location.cpp - part.cpp - point.cpp - polygon.cpp - region.cpp - geo_data/debug.cpp - geo_data/def.cpp - geo_data/geo_data.cpp - geo_data/map.cpp - geo_data/proxy.cpp -) - -END() diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index f82b4b8cd1..0000000000 --- a/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,21 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-library) -target_link_libraries(cpp-reverse_geocoder-library PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-reverse_geocoder-library PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp -) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 4b45fce452..0000000000 --- a/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,22 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-library) -target_link_libraries(cpp-reverse_geocoder-library PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-reverse_geocoder-library PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp -) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 4b45fce452..0000000000 --- a/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,22 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-library) -target_link_libraries(cpp-reverse_geocoder-library PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-reverse_geocoder-library PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp -) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.txt b/library/cpp/reverse_geocoder/library/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/reverse_geocoder/library/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt deleted file mode 100644 index f82b4b8cd1..0000000000 --- a/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,21 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-reverse_geocoder-library) -target_link_libraries(cpp-reverse_geocoder-library PUBLIC - contrib-libs-cxxsupp - yutil -) -target_sources(cpp-reverse_geocoder-library PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp -) diff --git a/library/cpp/reverse_geocoder/library/block_allocator.cpp b/library/cpp/reverse_geocoder/library/block_allocator.cpp deleted file mode 100644 index 56f61dc566..0000000000 --- a/library/cpp/reverse_geocoder/library/block_allocator.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "block_allocator.h" - -using namespace NReverseGeocoder; - -static size_t const MEMORY_IS_USED_FLAG = ~0ull; -static size_t const SIZEOF_SIZE = AlignMemory(sizeof(size_t)); - -void* NReverseGeocoder::TBlockAllocator::Allocate(size_t number) { - number = AlignMemory(number); - if (BytesAllocated_ + number + SIZEOF_SIZE > BytesLimit_) - ythrow yexception() << "Unable allocate memory"; - char* begin = ((char*)Data_) + BytesAllocated_; - char* end = begin + number; - *((size_t*)end) = MEMORY_IS_USED_FLAG; - BytesAllocated_ += number + SIZEOF_SIZE; - return begin; -} - -size_t NReverseGeocoder::TBlockAllocator::AllocateSize(size_t number) { - return AlignMemory(number) + SIZEOF_SIZE; -} - -static void RelaxBlock(char* begin, size_t* number) { - while (*number > 0) { - char* ptr = begin + *number - SIZEOF_SIZE; - if (*((size_t*)ptr) == MEMORY_IS_USED_FLAG) - return; - *number -= *((size_t*)ptr) + SIZEOF_SIZE; - } -} - -void NReverseGeocoder::TBlockAllocator::Deallocate(void* ptr, size_t number) { - number = AlignMemory(number); - char* begin = (char*)ptr; - char* end = begin + number; - if (*((size_t*)end) != MEMORY_IS_USED_FLAG) - ythrow yexception() << "Trying to deallocate not allocated pointer " << ptr; - *((size_t*)end) = number; - RelaxBlock((char*)Data_, &BytesAllocated_); -} diff --git a/library/cpp/reverse_geocoder/library/block_allocator.h b/library/cpp/reverse_geocoder/library/block_allocator.h deleted file mode 100644 index 1189d6b25c..0000000000 --- a/library/cpp/reverse_geocoder/library/block_allocator.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include "memory.h" - -#include <util/generic/yexception.h> - -namespace NReverseGeocoder { - class TBlockAllocator: public TNonCopyable { - public: - TBlockAllocator() - : Data_(nullptr) - , BytesAllocated_(0) - , BytesLimit_(0) - { - } - - TBlockAllocator(void* data, size_t bytesLimit) - : Data_(data) - , BytesAllocated_(0) - , BytesLimit_(bytesLimit) - { - } - - TBlockAllocator(TBlockAllocator&& a) - : TBlockAllocator() - { - DoSwap(Data_, a.Data_); - DoSwap(BytesAllocated_, a.BytesAllocated_); - DoSwap(BytesLimit_, a.BytesLimit_); - } - - TBlockAllocator& operator=(TBlockAllocator&& a) { - DoSwap(Data_, a.Data_); - DoSwap(BytesAllocated_, a.BytesAllocated_); - DoSwap(BytesLimit_, a.BytesLimit_); - return *this; - } - - virtual ~TBlockAllocator() { - } - - virtual void* Allocate(size_t number); - - static size_t AllocateSize(size_t number); - - virtual void Deallocate(void* ptr, size_t number); - - size_t TotalAllocatedSize() const { - return BytesAllocated_; - } - - void Setup(void* data, size_t bytesLimit) { - Data_ = data; - BytesLimit_ = bytesLimit; - BytesAllocated_ = 0; - } - - private: - void* Data_; - size_t BytesAllocated_; - size_t BytesLimit_; - }; - -} diff --git a/library/cpp/reverse_geocoder/library/fs.cpp b/library/cpp/reverse_geocoder/library/fs.cpp deleted file mode 100644 index 98c3b9ef81..0000000000 --- a/library/cpp/reverse_geocoder/library/fs.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "fs.h" - -#include <util/folder/dirut.h> -#include <util/string/split.h> - -namespace NReverseGeocoder { - TVector<TString> GetDataFilesList(const char* input) { - if (IsDir(input)) { - return GetFileListInDirectory<TVector<TString>>(input); - } - - TVector<TString> result; - for (const auto& partIt : StringSplitter(input).Split(',')) { - result.push_back(TString(partIt.Token())); - } - return result; - } -} diff --git a/library/cpp/reverse_geocoder/library/fs.h b/library/cpp/reverse_geocoder/library/fs.h deleted file mode 100644 index 4435f960c8..0000000000 --- a/library/cpp/reverse_geocoder/library/fs.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include <util/folder/iterator.h> -#include <util/string/vector.h> - -namespace NReverseGeocoder { - template <typename Cont> - Cont GetFileListInDirectory(const char* dirName) { - TDirIterator dirIt(dirName, TDirIterator::TOptions(FTS_LOGICAL)); - Cont dirContent; - for (auto file = dirIt.begin(); file != dirIt.end(); ++file) { - if (strcmp(file->fts_path, dirName)) - dirContent.push_back(file->fts_path); - } - return dirContent; - } - - TVector<TString> GetDataFilesList(const char* input); -} diff --git a/library/cpp/reverse_geocoder/library/log.cpp b/library/cpp/reverse_geocoder/library/log.cpp deleted file mode 100644 index 44e6ddf287..0000000000 --- a/library/cpp/reverse_geocoder/library/log.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "log.h" - -#include <util/datetime/systime.h> -#include <util/generic/yexception.h> -#include <util/system/guard.h> -#include <util/system/mutex.h> - -using namespace NReverseGeocoder; - -static size_t const TIMESTAMP_LIMIT = 32; - -class TLogger { -public: - static TLogger& Inst() { - static TLogger logger; - return logger; - } - - void Setup(IOutputStream& out, ELogLevel level) { - Out_ = &out; - Level_ = level; - } - - void Write(ELogLevel level, const char* message) { - if (level <= Level_) { - TGuard<TMutex> Lock(Lock_); - Out_->Write(message, strlen(message)); - } - } - - IOutputStream& OutputStream() const { - return *Out_; - } - - ELogLevel Level() const { - return Level_; - } - -private: - TLogger() - : Out_() - , Level_(LOG_LEVEL_DISABLE) - { - } - - IOutputStream* Out_; - ELogLevel Level_; - TMutex Lock_; -}; - -ELogLevel NReverseGeocoder::LogLevel() { - return TLogger::Inst().Level(); -} - -void NReverseGeocoder::LogSetup(IOutputStream& out, ELogLevel level) { - TLogger::Inst().Setup(out, level); -} - -IOutputStream& NReverseGeocoder::LogOutputStream() { - return TLogger::Inst().OutputStream(); -} - -static const char* T(char* buffer) { - struct timeval timeVal; - gettimeofday(&timeVal, nullptr); - - struct tm timeInfo; - const time_t sec = timeVal.tv_sec; - localtime_r(&sec, &timeInfo); - - snprintf(buffer, TIMESTAMP_LIMIT, "%02d:%02d:%02d.%06d", - timeInfo.tm_hour, timeInfo.tm_min, timeInfo.tm_sec, (int)timeVal.tv_usec); - - return buffer; -} - -void NReverseGeocoder::LogWrite(ELogLevel level, const char* message) { - if (level > LogLevel()) - return; - - static const char* A[LOG_LEVEL_COUNT] = { - "", // LOG_LEVEL_DISABLE - "\033[90m", // LOG_LEVEL_ERROR - "\033[90m", // LOG_LEVEL_WARNING - "\033[90m", // LOG_LEVEL_INFO - "\033[90m", // LOG_LEVEL_DEBUG - }; - - static const char* B[LOG_LEVEL_COUNT] = { - "", // LOG_LEVEL_DISABLE - "\033[31;1mError\033[0m", // LOG_LEVEL_ERROR - "\033[33;1mWarn\033[0m", // LOG_LEVEL_WARNING - "\033[32;1mInfo\033[0m", // LOG_LEVEL_INFO - "Debug", // LOG_LEVEL_DEBUG - }; - - static const char* C[LOG_LEVEL_COUNT] = { - "", // LOG_LEVEL_DISABLE - "\n", // LOG_LEVEL_ERROR - "\n", // LOG_LEVEL_WARNING - "\n", // LOG_LEVEL_INFO - "\033[0m\n", // LOG_LEVEL_DEBUG - }; - - char buffer[LOG_MESSAGE_LIMIT], tbuffer[TIMESTAMP_LIMIT]; - // Ignore logger snprintf errors. - snprintf(buffer, LOG_MESSAGE_LIMIT, "%s(%s) %s: %s%s", - A[level], T(tbuffer), B[level], message, C[level]); - - TLogger::Inst().Write(level, buffer); -} diff --git a/library/cpp/reverse_geocoder/library/log.h b/library/cpp/reverse_geocoder/library/log.h deleted file mode 100644 index 44cb0cefcf..0000000000 --- a/library/cpp/reverse_geocoder/library/log.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include <util/generic/yexception.h> -#include <util/stream/output.h> - -#include <cstdio> - -namespace NReverseGeocoder { - size_t const LOG_MESSAGE_LIMIT = 1024; - - enum ELogLevel { - LOG_LEVEL_DISABLE = 0, - LOG_LEVEL_ERROR, - LOG_LEVEL_WARNING, - LOG_LEVEL_INFO, - LOG_LEVEL_DEBUG, - LOG_LEVEL_COUNT - }; - - // Init logger. Setup OutputStream and logger level. - void LogSetup(IOutputStream& out, ELogLevel level); - - // Write log message with colors, level and current time. - // Example: - // (13:24:11.123456) Info: Good job! - // (13:24:11.323456) Warn: Ooops :( - // (13:24:22.456789) Error: Hello, world! - void LogWrite(ELogLevel level, const char* message); - - // Log output file descriptor. - IOutputStream& LogOutputStream(); - - // Current log level. - ELogLevel LogLevel(); - - template <typename... TArgs> - void LogWrite(ELogLevel level, const char* fmt, TArgs... args) { - if (level <= LogLevel()) { - char buffer[LOG_MESSAGE_LIMIT]; - // Ignore logger snprintf errors. - snprintf(buffer, LOG_MESSAGE_LIMIT, fmt, std::forward<TArgs>(args)...); - LogWrite(level, buffer); - } - } - - template <typename... TArgs> - void LogError(TArgs... args) { - LogWrite(LOG_LEVEL_ERROR, std::forward<TArgs>(args)...); - } - - template <typename... TArgs> - void LogWarning(TArgs... args) { - LogWrite(LOG_LEVEL_WARNING, std::forward<TArgs>(args)...); - } - - template <typename... TArgs> - void LogInfo(TArgs... args) { - LogWrite(LOG_LEVEL_INFO, std::forward<TArgs>(args)...); - } - - template <typename... TArgs> - void LogDebug(TArgs... args) { - LogWrite(LOG_LEVEL_DEBUG, std::forward<TArgs>(args)...); - } -} diff --git a/library/cpp/reverse_geocoder/library/memory.h b/library/cpp/reverse_geocoder/library/memory.h deleted file mode 100644 index ecbe8bcd66..0000000000 --- a/library/cpp/reverse_geocoder/library/memory.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include <util/system/types.h> - -namespace NReverseGeocoder { - constexpr ui64 B = 1ull; - constexpr ui64 KB = 1024 * B; - constexpr ui64 MB = 1024 * KB; - constexpr ui64 GB = 1024 * MB; - - constexpr size_t MEMORY_ALIGNMENT = 16ull; - - inline unsigned long long AlignMemory(unsigned long long x) { - if (x % MEMORY_ALIGNMENT == 0) - return x; - return x + MEMORY_ALIGNMENT - x % MEMORY_ALIGNMENT; - } - - inline bool IsAlignedMemory(void* ptr) { - return ((uintptr_t)ptr) % MEMORY_ALIGNMENT == 0; - } - -} diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.cpp b/library/cpp/reverse_geocoder/library/pool_allocator.cpp deleted file mode 100644 index 0d841f7db0..0000000000 --- a/library/cpp/reverse_geocoder/library/pool_allocator.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "memory.h" -#include "pool_allocator.h" - -#include <util/generic/yexception.h> - -using namespace NReverseGeocoder; - -NReverseGeocoder::TPoolAllocator::TPoolAllocator(size_t poolSize) { - Ptr_ = new char[poolSize]; - Size_ = poolSize; - Setup(Ptr_, Size_); -} - -NReverseGeocoder::TPoolAllocator::~TPoolAllocator() { - if (Ptr_) - delete[] Ptr_; -} diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.h b/library/cpp/reverse_geocoder/library/pool_allocator.h deleted file mode 100644 index f98bbcd3c1..0000000000 --- a/library/cpp/reverse_geocoder/library/pool_allocator.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "block_allocator.h" - -#include <util/generic/utility.h> -#include <util/generic/noncopyable.h> - -namespace NReverseGeocoder { - class TPoolAllocator: public TBlockAllocator { - public: - TPoolAllocator() - : Ptr_(nullptr) - , Size_(0) - { - } - - TPoolAllocator(TPoolAllocator&& a) - : TBlockAllocator(std::forward<TBlockAllocator>(a)) - , Ptr_(nullptr) - , Size_(0) - { - DoSwap(Ptr_, a.Ptr_); - DoSwap(Size_, a.Size_); - } - - TPoolAllocator& operator=(TPoolAllocator&& a) { - TBlockAllocator::operator=(std::forward<TBlockAllocator>(a)); - DoSwap(Ptr_, a.Ptr_); - DoSwap(Size_, a.Size_); - return *this; - } - - explicit TPoolAllocator(size_t poolSize); - - ~TPoolAllocator() override; - - private: - char* Ptr_; - size_t Size_; - }; - -} diff --git a/library/cpp/reverse_geocoder/library/system.h b/library/cpp/reverse_geocoder/library/system.h deleted file mode 100644 index 499fb2bd91..0000000000 --- a/library/cpp/reverse_geocoder/library/system.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#define SYSTEM_ENDIAN_FLAG (htonl(337)) diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.cpp b/library/cpp/reverse_geocoder/library/unaligned_iter.cpp deleted file mode 100644 index 0322b677dc..0000000000 --- a/library/cpp/reverse_geocoder/library/unaligned_iter.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "unaligned_iter.h" diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.h b/library/cpp/reverse_geocoder/library/unaligned_iter.h deleted file mode 100644 index 827a3e2fd2..0000000000 --- a/library/cpp/reverse_geocoder/library/unaligned_iter.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include <util/system/unaligned_mem.h> -#include <iterator> - -namespace NReverseGeocoder { - /** - * Random-access iterator over a read-only memory range - * of trivially copyable items that may be not aligned properly. - * - * When dereferencing, a copy of item is returned, not a reference. - * Be sure that sizeof(T) is small enough. - * - * Iterator is useful for LowerBound/UpperBound STL algorithms. - */ - template <class T> - class TUnalignedIter: public std::iterator<std::random_access_iterator_tag, T> { - public: - using TSelf = TUnalignedIter<T>; - - explicit TUnalignedIter(const T* ptr) - : Ptr(ptr) - { - } - - T operator*() const { - return ReadUnaligned<T>(Ptr); - } - - bool operator==(TSelf other) const { - return Ptr == other.Ptr; - } - - bool operator<(TSelf other) const { - return Ptr < other.Ptr; - } - - TSelf operator+(ptrdiff_t delta) const { - return TSelf{Ptr + delta}; - } - - ptrdiff_t operator-(TSelf other) const { - return Ptr - other.Ptr; - } - - TSelf& operator+=(ptrdiff_t delta) { - Ptr += delta; - return *this; - } - - TSelf& operator++() { - ++Ptr; - return *this; - } - - private: - const T* Ptr; - }; - - template <class T> - TUnalignedIter<T> UnalignedIter(const T* ptr) { - return TUnalignedIter<T>(ptr); - } -} diff --git a/library/cpp/reverse_geocoder/library/ya.make b/library/cpp/reverse_geocoder/library/ya.make deleted file mode 100644 index ec2eb205a8..0000000000 --- a/library/cpp/reverse_geocoder/library/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -LIBRARY() - -SRCS( - block_allocator.cpp - fs.cpp - log.cpp - pool_allocator.cpp - unaligned_iter.cpp -) - -END() diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 8d1df0fdf8..0000000000 --- a/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,56 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-reverse_geocoder-proto) -target_link_libraries(cpp-reverse_geocoder-proto PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-reverse_geocoder-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto -) -target_proto_addincls(cpp-reverse_geocoder-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-reverse_geocoder-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt deleted file mode 100644 index b53c1692ee..0000000000 --- a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,57 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-reverse_geocoder-proto) -target_link_libraries(cpp-reverse_geocoder-proto PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-reverse_geocoder-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto -) -target_proto_addincls(cpp-reverse_geocoder-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-reverse_geocoder-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt deleted file mode 100644 index b53c1692ee..0000000000 --- a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,57 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-reverse_geocoder-proto) -target_link_libraries(cpp-reverse_geocoder-proto PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-reverse_geocoder-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto -) -target_proto_addincls(cpp-reverse_geocoder-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-reverse_geocoder-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/reverse_geocoder/proto/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 8d1df0fdf8..0000000000 --- a/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,56 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) -get_built_tool_path( - TOOL_protoc_bin - TOOL_protoc_dependency - contrib/tools/protoc/bin - protoc -) -get_built_tool_path( - TOOL_cpp_styleguide_bin - TOOL_cpp_styleguide_dependency - contrib/tools/protoc/plugins/cpp_styleguide - cpp_styleguide -) - -add_library(cpp-reverse_geocoder-proto) -target_link_libraries(cpp-reverse_geocoder-proto PUBLIC - contrib-libs-cxxsupp - yutil - contrib-libs-protobuf -) -target_proto_messages(cpp-reverse_geocoder-proto PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto - ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto -) -target_proto_addincls(cpp-reverse_geocoder-proto - ./ - ${CMAKE_SOURCE_DIR}/ - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src - ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src -) -target_proto_outs(cpp-reverse_geocoder-proto - --cpp_out=${CMAKE_BINARY_DIR}/ - --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ -) diff --git a/library/cpp/reverse_geocoder/proto/geo_data.proto b/library/cpp/reverse_geocoder/proto/geo_data.proto deleted file mode 100644 index 00ecb48bec..0000000000 --- a/library/cpp/reverse_geocoder/proto/geo_data.proto +++ /dev/null @@ -1,42 +0,0 @@ -package NReverseGeocoder.NProto; - -message TGeoData { - required uint64 Magic = 1; - required uint64 Version = 2; - optional uint64 Points = 3; - optional uint64 PointsNumber = 4; - optional uint64 PointsCrc32 = 5; - optional uint64 Edges = 6; - optional uint64 EdgesNumber = 7; - optional uint64 EdgesCrc32 = 8; - optional uint64 EdgeRefs = 9; - optional uint64 EdgeRefsNumber = 10; - optional uint64 EdgeRefsCrc32 = 11; - optional uint64 Parts = 12; - optional uint64 PartsNumber = 13; - optional uint64 PartsCrc32 = 14; - optional uint64 Polygons = 15; - optional uint64 PolygonsNumber = 16; - optional uint64 PolygonsCrc32 = 17; - optional uint64 PolygonRefs = 18; - optional uint64 PolygonRefsNumber = 19; - optional uint64 PolygonRefsCrc32 = 20; - optional uint64 Boxes = 21; - optional uint64 BoxesNumber = 22; - optional uint64 BoxesCrc32 = 23; - optional uint64 Blobs = 24; - optional uint64 BlobsNumber = 25; - optional uint64 BlobsCrc32 = 26; - optional uint64 Kvs = 27; - optional uint64 KvsNumber = 28; - optional uint64 KvsCrc32 = 29; - optional uint64 Regions = 30; - optional uint64 RegionsNumber = 31; - optional uint64 RegionsCrc32 = 32; - optional uint64 RawPolygons = 33; - optional uint64 RawPolygonsNumber = 34; - optional uint64 RawPolygonsCrc32 = 35; - optional uint64 RawEdgeRefs = 36; - optional uint64 RawEdgeRefsNumber = 37; - optional uint64 RawEdgeRefsCrc32 = 38; -}; diff --git a/library/cpp/reverse_geocoder/proto/region.proto b/library/cpp/reverse_geocoder/proto/region.proto deleted file mode 100644 index b782331628..0000000000 --- a/library/cpp/reverse_geocoder/proto/region.proto +++ /dev/null @@ -1,32 +0,0 @@ -package NReverseGeocoder.NProto; - -message TLocation { - required double Lat = 1; - required double Lon = 2; -} - -message TPolygon { - required uint64 PolygonId = 1; - repeated TLocation Locations = 2; - - enum EType { - TYPE_UNKNOWN = 0; - TYPE_INNER = 1; - TYPE_OUTER = 2; - } - - required EType Type = 3; -} - -message TKv { - required string K = 1; - required string V = 2; -} - -message TRegion { - required uint64 RegionId = 1; - optional uint64 ParentId = 2; - repeated TPolygon Polygons = 3; - repeated TKv Kvs = 4; - repeated string Blobs = 5; -} diff --git a/library/cpp/reverse_geocoder/proto/ya.make b/library/cpp/reverse_geocoder/proto/ya.make deleted file mode 100644 index b6f7156210..0000000000 --- a/library/cpp/reverse_geocoder/proto/ya.make +++ /dev/null @@ -1,10 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - geo_data.proto - region.proto -) - -EXCLUDE_TAGS(GO_PROTO) - -END() diff --git a/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 408bf12f04..0000000000 --- a/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,26 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(robotstxtcfg) - -add_library(library-cpp-robots_txt) -target_link_libraries(library-cpp-robots_txt PUBLIC - contrib-libs-cxxsupp - yutil - cpp-robots_txt-robotstxtcfg - library-cpp-case_insensitive_string - library-cpp-charset - cpp-string_utils-url - library-cpp-uri -) -target_sources(library-cpp-robots_txt PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp -) diff --git a/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 73a209cbbe..0000000000 --- a/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,27 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(robotstxtcfg) - -add_library(library-cpp-robots_txt) -target_link_libraries(library-cpp-robots_txt PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-robots_txt-robotstxtcfg - library-cpp-case_insensitive_string - library-cpp-charset - cpp-string_utils-url - library-cpp-uri -) -target_sources(library-cpp-robots_txt PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp -) diff --git a/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 73a209cbbe..0000000000 --- a/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,27 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(robotstxtcfg) - -add_library(library-cpp-robots_txt) -target_link_libraries(library-cpp-robots_txt PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - cpp-robots_txt-robotstxtcfg - library-cpp-case_insensitive_string - library-cpp-charset - cpp-string_utils-url - library-cpp-uri -) -target_sources(library-cpp-robots_txt PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp -) diff --git a/library/cpp/robots_txt/CMakeLists.txt b/library/cpp/robots_txt/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/robots_txt/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 408bf12f04..0000000000 --- a/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,26 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -add_subdirectory(robotstxtcfg) - -add_library(library-cpp-robots_txt) -target_link_libraries(library-cpp-robots_txt PUBLIC - contrib-libs-cxxsupp - yutil - cpp-robots_txt-robotstxtcfg - library-cpp-case_insensitive_string - library-cpp-charset - cpp-string_utils-url - library-cpp-uri -) -target_sources(library-cpp-robots_txt PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp -) diff --git a/library/cpp/robots_txt/constants.h b/library/cpp/robots_txt/constants.h deleted file mode 100644 index e5e2a57e18..0000000000 --- a/library/cpp/robots_txt/constants.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include <util/generic/size_literals.h> -#include <util/system/defaults.h> - - -constexpr auto robots_max = 500_KB; -constexpr auto max_rules_count = 10'000; -constexpr auto max_rule_length = 10_KB; diff --git a/library/cpp/robots_txt/prefix_tree.cpp b/library/cpp/robots_txt/prefix_tree.cpp deleted file mode 100644 index f7b1848a43..0000000000 --- a/library/cpp/robots_txt/prefix_tree.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include <cstring> -#include <algorithm> - -#include "prefix_tree.h" - -TPrefixTreeNodeElement::TPrefixTreeNodeElement() - : Key(nullptr) - , KeyLen(0) - , Val(-1) - , Index(-1) -{ -} - -TPrefixTreeNodeElement::TPrefixTreeNodeElement(const char* key, i32 keyLen = 0, i32 val = -1, i32 index = -1) - : Key(key) - , KeyLen(keyLen) - , Val(val) - , Index(index) -{ -} - -TPrefixTreeNode::TPrefixTreeNode() - : Elements() -{ -} - -int TPrefixTreeNode::Find(char ch) const { - for (size_t i = 0; i < Elements.size(); ++i) - if (ch == *(Elements[i].Key)) - return i; - return -1; -} - -void TPrefixTreeNode::Set(const char* key, i32 keyLen, i32 val, i32 index) { - TPrefixTreeNodeElement element(key, keyLen, val, index); - int i = Find(*key); - if (i < 0) - Elements.push_back(element); - else - Elements[i] = element; -} - -void TPrefixTreeNode::Dump(FILE* logFile) const { - if (!logFile) - logFile = stderr; - fprintf(logFile, "size=%" PRISZT "\n", Elements.size()); - static char b[1234]; - for (size_t i = 0; i < Elements.size(); ++i) { - strncpy(b, Elements[i].Key, Elements[i].KeyLen); - b[Elements[i].KeyLen] = 0; - fprintf(logFile, "{key=[%s]:%d, val=%d, index=%d}\n", b, Elements[i].KeyLen, Elements[i].Val, Elements[i].Index); - } -} - -void TPrefixTree::Dump(FILE* logFile) const { - if (!logFile) - logFile = stderr; - fprintf(logFile, "%" PRISZT " nodes\n", Nodes.size()); - for (size_t i = 0; i < Nodes.size(); ++i) { - fprintf(logFile, "%" PRISZT ": ", i); - Nodes[i].Dump(logFile); - fprintf(logFile, "\n"); - } -} - -TPrefixTree::TPrefixTree(int maxSize) { - Init(maxSize); -} - -void TPrefixTree::Init(int maxSize) { - Nodes.clear(); - Nodes.reserve(std::max(maxSize + 1, 1)); - Nodes.push_back(TPrefixTreeNode()); -} - -void TPrefixTree::Clear() { - Nodes.clear(); - Init(0); -} - -void TPrefixTree::Add(const char* s, i32 index) { - AddInternal(s, Nodes[0], index); -} - -void TPrefixTree::AddInternal(const char* s, TPrefixTreeNode& node, i32 index) { - if (!s || !*s) - return; - - int i = node.Find(*s); - if (i >= 0) { - TPrefixTreeNodeElement& d = node.Elements[i]; - const char* p = d.Key; - while (*s && (p - d.Key) < d.KeyLen && *s == *p) - ++s, ++p; - - if (*s) { - if ((p - d.Key) < d.KeyLen) { - Nodes.push_back(TPrefixTreeNode()); - Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index); - Nodes.back().Set(s, strlen(s), -1, index); - - d.Val = Nodes.size() - 1; - d.KeyLen = p - d.Key; - d.Index = INDEX_BOUND; - } else { - if (d.Val != -1 && index < d.Index) - AddInternal(s, Nodes[d.Val], index); - } - } else { - if ((p - d.Key) < d.KeyLen) { - Nodes.push_back(TPrefixTreeNode()); - Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index); - d.Val = Nodes.size() - 1; - d.KeyLen = p - d.Key; - d.Index = index; - } else { - d.Index = std::min(d.Index, index); - } - } - } else { - node.Set(s, strlen(s), -1, index); - } -} - -int TPrefixTree::GetMemorySize() const { - int res = Nodes.capacity() * sizeof(TPrefixTreeNode); - for (size_t i = 0; i < Nodes.size(); ++i) - res += Nodes[i].Elements.capacity() * sizeof(TPrefixTreeNodeElement); - return res; -} - -void TPrefixTree::Compress() { - Nodes.shrink_to_fit(); - for (size_t i = 0; i < Nodes.size(); ++i) - Nodes[i].Elements.shrink_to_fit(); -} - -i32 TPrefixTree::MinPrefixIndex(const char* s) const { - if (!*s) - return -1; - int i = Nodes[0].Find(*s); - if (i < 0) - return -1; - const TPrefixTreeNodeElement* d = &Nodes[0].Elements[i]; - - const char* p = d->Key; - if (!p || !*p) - return -1; - - i32 result = INDEX_BOUND; - i32 nodeIndex = 0; - while (*s == *p) { - if (++p - d->Key >= d->KeyLen) - result = std::min(result, d->Index); - if (!*++s) - break; - - if (p - d->Key >= d->KeyLen) { - nodeIndex = d->Val; - if (nodeIndex == -1) - break; - i = Nodes[nodeIndex].Find(*s); - if (i < 0) - break; - d = &Nodes[nodeIndex].Elements[i]; - p = d->Key; - if (!p || !*p) - break; - } - } - return result < INDEX_BOUND ? result : -1; -} diff --git a/library/cpp/robots_txt/prefix_tree.h b/library/cpp/robots_txt/prefix_tree.h deleted file mode 100644 index 5feafcb74d..0000000000 --- a/library/cpp/robots_txt/prefix_tree.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include <util/generic/ptr.h> -#include <util/generic/vector.h> -#include <cstdio> -#include <util/generic/noncopyable.h> - -struct TPrefixTreeNodeElement { - const char* Key; - i32 KeyLen; - i32 Val; - i32 Index; - - TPrefixTreeNodeElement(); - TPrefixTreeNodeElement(const char*, i32, i32, i32); -}; - -class TPrefixTreeNode { -public: - TVector<TPrefixTreeNodeElement> Elements; - TPrefixTreeNode(); - - int Find(char) const; - void Set(const char*, i32, i32, i32); - void Dump(FILE*) const; -}; - -class TPrefixTree : TNonCopyable { -private: - static const i32 INDEX_BOUND = 1 << 30; - - TVector<TPrefixTreeNode> Nodes; - -public: - void Init(int); - TPrefixTree(int); - - void Add(const char*, i32); - i32 MinPrefixIndex(const char*) const; - void Clear(); - void Dump(FILE*) const; - int GetMemorySize() const; - void Compress(); - -private: - void AddInternal(const char*, TPrefixTreeNode&, i32); -}; diff --git a/library/cpp/robots_txt/prefix_tree_rules_handler.cpp b/library/cpp/robots_txt/prefix_tree_rules_handler.cpp deleted file mode 100644 index 8dd579d060..0000000000 --- a/library/cpp/robots_txt/prefix_tree_rules_handler.cpp +++ /dev/null @@ -1,706 +0,0 @@ -#include "robots_txt.h" - -#include <util/digest/fnv.h> -#include <util/system/tls.h> -#include <util/generic/buffer.h> -#include <util/generic/yexception.h> - -namespace { - -TString NormalizeRule(TStringBuf rule) { - TString result; - result.reserve(rule.size() + 1); - - // remove consecutive '*' - for (auto c : rule) { - if (c != '*' || !result.EndsWith('*')) { - result.append(c); - } - } - - if (rule == "*") { - result = "/*"; - return result; - } - - // unify suffix - if (result.EndsWith('$')) { - result.pop_back(); - } else if (!result.EndsWith('*')) { - result.append('*'); - } - - return result; -} - -// Prefix rules -bool IsPrefixRule(TStringBuf rule) { - return rule.EndsWith('*') && !TStringBuf(rule.begin(), rule.end() - 1).Contains('*'); -} - -// Converts rule to internal representation, i.e. -// For prefix rules: "/foo", 'D' -> 'D', "/foo" -// For generic rules: "/*foo", 'D' -> ("/*/*foo*", 'd') or ("/*foo$", 'A') -> ("/*foo", 'a') -// The distinction is in uppercase/lowercase rule type -std::pair<TString, char> ConvertRule(TStringBuf rule, char type) { - switch (type) { - case 'H': - case 'S': - case 'C': - case 'P': - return {TString(rule), type}; - case 'A': - case 'D': - break; - default: - return {{}, type}; - } - - auto result = NormalizeRule(rule); - if (IsPrefixRule(result)) { - result.pop_back(); // remove extra '*' from the end - } else { - type = tolower(type); - } - - return {std::move(result), type}; -} - -} // namespace - -TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( - TBotIdSet supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot) - : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot) -{} - -TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( - std::initializer_list<ui32> supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot) - : TRobotsTxtRulesHandlerBase(TBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot) -{} - -TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( - const TSet<ui32>& supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot) - : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot) -{} - -bool TPrefixTreeRobotsTxtRulesHandler::Empty(const ui32 botId) const { - const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; - return !botInfo || (botInfo->BufferPosition <= sizeof(botInfo->BufferPosition)); -} - -TRobotsTxtRulesIterator TPrefixTreeRobotsTxtRulesHandler::GetRulesIterator(const ui32 botId) const { - const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; - if (!botInfo) { - return {}; - } - return TRobotsTxtRulesIterator(botInfo->Buffer.Get() + sizeof(botInfo->BufferPosition), botInfo->Buffer.Get() + botInfo->BufferPosition); -} - -size_t TPrefixTreeRobotsTxtRulesHandler::GetMemorySize() { - size_t allBotsSize = 0; - for (const auto& botInfo : BotIdToPrefixTreeBotInfo) { - if (!botInfo) { - continue; - } - - allBotsSize += botInfo->PrefixRules.GetMemorySize() - + botInfo->BufferSize * sizeof(char) - + botInfo->ComplexRulesSize * sizeof(char**) - + botInfo->RulesSize * sizeof(char*) + (1 << 8); - } - return allBotsSize; -} - -void TPrefixTreeRobotsTxtRulesHandler::ClearInternal(const ui32 botId) { - if (botId >= BotIdToPrefixTreeBotInfo.size()) { - return; - } - BotIdToPrefixTreeBotInfo[botId].Reset(); - TRobotsTxtRulesHandlerBase::ClearInternal(botId); -} - -bool TPrefixTreeRobotsTxtRulesHandler::OptimizeSize() { - ResetOptimized(); - - TMap<ui64, ui32> hashToBotId; - for (auto botId : LoadedBotIds) { - auto& botInfo = BotIdToPrefixTreeBotInfo[botId]; - if (botInfo->BufferPosition <= sizeof(ui32)) { - botInfo.Reset(); - LoadedBotIds.remove(botId); - continue; - } - - ui64 hash = FnvHash<ui64>(botInfo->Buffer.Get(), botInfo->BufferPosition); - if (auto p = hashToBotId.FindPtr(hash)) { - OptimizedBotIdToStoredBotId[botId] = *p; - ClearInternal(botId); - botInfo.Reset(); - } else { - hashToBotId[hash] = botId; - } - } - - if (IsFullTotal()) { - DoAllowAll(); - return false; - } - - return true; -} - -void TPrefixTreeRobotsTxtRulesHandler::Clear() { - for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) - if (IsBotIdSupported(botId)) - ClearInternal(botId); - TRobotsTxtRulesHandlerBase::Clear(); -} - -void TPrefixTreeRobotsTxtRulesHandler::ResizeBuffer(const ui32 botId, int newSize) { - auto& botInfo = GetInfo(botId); - TArrayHolder<char> newBuffer(new char[newSize]); - memcpy(newBuffer.Get(), botInfo.Buffer.Get(), std::min(botInfo.BufferSize, newSize)); - botInfo.Buffer.Swap(newBuffer); - botInfo.BufferSize = newSize; -} - -bool TPrefixTreeRobotsTxtRulesHandler::AddRule(const ui32 botId, TStringBuf rule, char type) { - if (rule.empty() || rule.Contains('\0')) { - return true; - } - - auto& botInfo = GetInfo(botId); - - if (IsFull(botId, rule.size())) { - DoAllowAll(); - return false; - } - - auto [convertedRule, convertedType] = ConvertRule(rule, type); - const auto len = convertedRule.size() + 2; // 1 byte for convertedType and another for '\0' - - if (auto newPos = botInfo.BufferPosition + len; newPos >= size_t(botInfo.BufferSize)) { - size_t newSize = botInfo.BufferSize; - while (newPos >= newSize) - newSize *= 2; - ResizeBuffer(botId, newSize); - } - - auto out = botInfo.Buffer.Get() + botInfo.BufferPosition; - *out++ = convertedType; - strcpy(out, convertedRule.data()); - botInfo.BufferPosition += len; - - if (type == 'A' || type == 'D') { - botInfo.RulesPosition++; - } - - return true; -} - -const char* TPrefixTreeRobotsTxtRulesHandler::GetRule(const ui32 botId, const char* s, char type) const { - const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; - if (!botInfo) { - return nullptr; - } - - int m = botInfo->RulesPosition + 1; - int k = botInfo->PrefixRules.MinPrefixIndex(s); - if (k >= 0) - m = k; - char* rule; - int j; - for (int i = 0; i < botInfo->ComplexRulesPosition; ++i) { - rule = *botInfo->ComplexRules.Get()[i]; - j = botInfo->ComplexRules.Get()[i] - botInfo->Rules.Get(); - if (j >= m) - break; - if (CheckRule(s, rule)) { - m = j; - break; - } - } - if (m >= botInfo->RulesPosition) - return nullptr; - return toupper(*(botInfo->Rules.Get()[m] - 1)) == type ? botInfo->Rules.Get()[m] : nullptr; -} - -inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll(const ui32 botId) const { - const auto id = GetMappedBotId(botId, false); - auto& botInfo = BotIdToPrefixTreeBotInfo[id ? *id : robotstxtcfg::id_anybot]; - return botInfo && botInfo->AllowAll; -} - -inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll() const { - for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) - if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsAllowAll(botId)) { - return false; - } - - return true; -} - -inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll(const ui32 botId, bool useAny) const { - const auto id = GetMappedBotId(botId, false); - if (id) { - const auto& botInfo = BotIdToPrefixTreeBotInfo[*id]; - return botInfo && botInfo->DisallowAll; - } - - auto& botInfo = BotIdToPrefixTreeBotInfo[robotstxtcfg::id_anybot]; - return useAny && botInfo && botInfo->DisallowAll; -} - -inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll() const { - for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) - if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId)) - return false; - - return true; -} - -void TPrefixTreeRobotsTxtRulesHandler::DoAllowAll() { - using robotstxtcfg::id_anybot; - - // Drop all bots to default - SupportedBotIds.insert(id_anybot); - for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { - if (IsBotIdSupported(botId)) { - ClearInternal(botId); - OptimizedBotIdToStoredBotId[botId] = id_anybot; - LoadedBotIds.insert(botId); - } - } - - // Initialize anybot with "allow all" rule - AddRule(id_anybot, "/", 'A'); - GetInfo(id_anybot).AllowAll = true; - SaveRulesToBuffer(); -} - -void TPrefixTreeRobotsTxtRulesHandler::DoDisallowAll() { - for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { - if (!IsBotIdSupported(botId)) - continue; - ClearInternal(botId); - if (botId == robotstxtcfg::id_anybot) { - auto& botInfo = GetInfo(botId); - AddRule(botId, "/", 'D'); - botInfo.DisallowAll = true; - SaveRulesToBuffer(); - } else { - OptimizedBotIdToStoredBotId[botId] = robotstxtcfg::id_anybot; - } - LoadedBotIds.insert(botId); - } -} - -const char* TPrefixTreeRobotsTxtRulesHandler::IsDisallow(const ui32 botId, const char* s, bool useAny) const { - const auto id = GetMappedBotId(botId, useAny); - if (!id) - return nullptr; - - const auto& botInfo = BotIdToPrefixTreeBotInfo[*id]; - if (botInfo && IsDisallowAll(*id, useAny)) { - int index = (const_cast<TPrefixTreeRobotsTxtRulesHandler*>(this))->FindRuleAll(*botInfo, 'D'); - if (index < 0) { //o_O - return botInfo->Rules.Get()[0]; - } else { - return botInfo->Rules.Get()[index]; - } - } - - return GetRule(*id, s, 'D'); -} - -const char* TPrefixTreeRobotsTxtRulesHandler::IsAllow(const ui32 botId, const char* s) const { - const auto id = GetMappedBotId(botId, true); - if (auto p = GetRule(*id, s, 'A')) - return p; - return GetRule(*id, s, 'D') ? nullptr : "/"; -} - -int TPrefixTreeRobotsTxtRulesHandler::StrLenWithoutStars(const char* s) { - int len = 0; - - for (size_t index = 0; s[index]; ++index) { - if (s[index] != '*') { - ++len; - } - } - - return len; -} - -int TPrefixTreeRobotsTxtRulesHandler::TraceBuffer(const ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos) { - CheckBotIdValidity(botId); - auto& prefixBotInfo = GetInfo(botId); - TBotInfo& botInfo = BotIdToInfo[botId]; - - bool store = countRules >= 0; - if (store) { - prefixBotInfo.Rules.Reset(new char*[prefixBotInfo.RulesSize = countRules]); - } - - int beg = -1, n = 0; - *((int*)prefixBotInfo.Buffer.Get()) = prefixBotInfo.BufferSize; - for (size_t i = sizeof(prefixBotInfo.BufferPosition); i < prefixBotInfo.BufferPosition; ++i) - if (prefixBotInfo.Buffer.Get()[i] == '\n' || prefixBotInfo.Buffer.Get()[i] == 0) { - if (beg < 0 || beg + 1 == (int)i) - continue; - - char* s = prefixBotInfo.Buffer.Get() + beg; - if (store) { - switch (*s) { - case 'H': - HostDirective = s + 1; - break; - case 'S': - SiteMaps.insert(s + 1); - break; - case 'C': - ParseCrawlDelay(s + 1, botInfo.CrawlDelay); - break; - case 'P': - CleanParams.insert(s + 1); - break; - default: - prefixBotInfo.Rules.Get()[n] = s + 1; - (*ruleInfos).Get()[n].Len = StrLenWithoutStars(s + 1); - (*ruleInfos).Get()[n].Allow = toupper(*s) == 'A'; - - prefixBotInfo.HasAllow |= toupper(*s) == 'A'; - prefixBotInfo.HasDisallow |= toupper(*s) == 'D'; - break; - } - } - n += (*s != 'H' && *s != 'S' && *s != 'C' && *s != 'P'); - beg = -1; - } else if (beg < 0) - beg = i; - - return n; -} - -int TPrefixTreeRobotsTxtRulesHandler::FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, const char neededType) { - static const char* all[] = {"*", "/", "*/", "/*", "*/*"}; - for (int ruleNumber = prefixBotInfo.RulesSize - 1; ruleNumber >= 0; --ruleNumber) { - const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber]; - char ruleType = *(curRule - 1); - - if (strlen(curRule) > 3) - break; - if (neededType != ruleType) - continue; - - for (size_t i = 0; i < sizeof(all) / sizeof(char*); ++i) - if (strcmp(all[i], curRule) == 0) - return ruleNumber; - } - return -1; -} - -bool TPrefixTreeRobotsTxtRulesHandler::HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow) { - for (int ruleNumber = ruleAllAllow - 1; ruleNumber >= 0; --ruleNumber) { - const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber]; - char ruleType = *(curRule - 1); - if (tolower(ruleType) == 'd') - return true; - } - return false; -} - -bool TPrefixTreeRobotsTxtRulesHandler::CheckAllowDisallowAll(const ui32 botId, const bool checkDisallow) { - CheckBotIdValidity(botId); - - auto& botInfo = GetInfo(botId); - - if (botInfo.RulesSize == 0) - return !checkDisallow; - if (botInfo.RulesPosition <= 0) - return 0; - - if (checkDisallow) - return !botInfo.HasAllow && FindRuleAll(botInfo, 'D') >= 0; - int ruleAllAllow = FindRuleAll(botInfo, 'A'); - if (ruleAllAllow == -1) - return !botInfo.HasDisallow; - return !HasDisallowRulePrevAllowAll(botInfo, ruleAllAllow); -} - -void TPrefixTreeRobotsTxtRulesHandler::SortRules( - TPrefixTreeBotInfo& prefixBotInfo, - size_t count, - const TArrayHolder<TRuleInfo>* ruleInfos) { - TVector<size_t> indexes(count); - for (size_t index = 0; index < count; ++index) - indexes[index] = index; - - TRulesSortFunc sortFunc(ruleInfos); - std::sort(indexes.begin(), indexes.end(), sortFunc); - - TArrayHolder<char*> workingCopy; - workingCopy.Reset(new char*[count]); - - for (size_t index = 0; index < count; ++index) - workingCopy.Get()[index] = prefixBotInfo.Rules.Get()[index]; - for (size_t index = 0; index < count; ++index) - prefixBotInfo.Rules.Get()[index] = workingCopy.Get()[indexes[index]]; -} - -void TPrefixTreeRobotsTxtRulesHandler::SaveRulesToBuffer() { - // as sitemaps, clean-params and HostDirective from prefix tree was deleted - for (const auto& sitemap: SiteMaps) - AddRule(robotstxtcfg::id_anybot, sitemap, 'S'); - for (const auto& param : CleanParams) - AddRule(robotstxtcfg::id_anybot, param, 'P'); - if (!HostDirective.empty()) - AddRule(robotstxtcfg::id_anybot, HostDirective, 'H'); -} - -void TPrefixTreeRobotsTxtRulesHandler::SaveRulesFromBuffer(const ui32 botId) { - CheckBotIdValidity(botId); - - auto& botInfo = GetInfo(botId); - - TArrayHolder<TRuleInfo> ruleInfos; - - int n = TraceBuffer(botId, -1, nullptr), countPrefix = 0; - ruleInfos.Reset(new TRuleInfo[n]); - botInfo.RulesPosition = TraceBuffer(botId, n, &ruleInfos); - assert(botInfo.RulesPosition == n); - - SortRules(botInfo, n, &ruleInfos); - - botInfo.DisallowAll = CheckAllowDisallowAll(botId, true); - botInfo.AllowAll = CheckAllowDisallowAll(botId, false); - - for (int i = 0; i < n; ++i) - countPrefix += !!isupper(*(botInfo.Rules.Get()[i] - 1)); - - botInfo.PrefixRules.Init(countPrefix); - botInfo.ComplexRules.Reset(new char**[botInfo.ComplexRulesSize = n - countPrefix]); - botInfo.ComplexRulesPosition = 0; - - for (int i = 0; i < n; ++i) { - char* s = botInfo.Rules.Get()[i]; - if (isupper(*(s - 1))) - botInfo.PrefixRules.Add(s, i); - else - botInfo.ComplexRules.Get()[botInfo.ComplexRulesPosition++] = &botInfo.Rules.Get()[i]; - } - botInfo.PrefixRules.Compress(); -} - -void TPrefixTreeRobotsTxtRulesHandler::AfterParse(const ui32 botId) { - CheckBotIdValidity(botId); - - auto& botInfo = GetInfo(botId); - - ResizeBuffer(botId, botInfo.BufferPosition); - SaveRulesFromBuffer(botId); - - if (botInfo.RulesPosition == 0) { - AddRule(botId, "/", 'A'); - } -} - -TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeBotInfo& TPrefixTreeRobotsTxtRulesHandler::GetInfo(ui32 botId) { - Y_ENSURE(botId < robotstxtcfg::max_botid); - auto& res = BotIdToPrefixTreeBotInfo[botId]; - if (!res) { - res = MakeHolder<TPrefixTreeBotInfo>(); - } - return *res; -} - -bool TPrefixTreeRobotsTxtRulesHandler::CheckRule(const char* s, const char* rule) { - const char* r = rule; - const char* s_end = s + strlen(s); - const char* r_end = r + strlen(r); - // assert( r && !strstr(r, "**") ); - for (; *s; ++s) { - if ((s_end - s + 1) * 2 < (r_end - r)) - return 0; - while (*r == '*') - ++r; - - if (*s == *r) { - ++r; - } else { - while (r != rule && *r != '*') - --r; - - if (*r != '*') - return 0; - if (*r == '*') - ++r; - if (*r == *s) - ++r; - } - } - return !*r || (!*(r + 1) && *r == '*'); -} - -bool TPrefixTreeRobotsTxtRulesHandler::IsFull(ui32 botId, size_t length) const { - Y_ENSURE(botId < robotstxtcfg::max_botid); - const auto& botInfo = BotIdToPrefixTreeBotInfo[botId]; - if (!botInfo) { - return false; - } - - return (size_t(botInfo->RulesPosition) >= MaxRulesNumber) || (botInfo->BufferPosition + length + 300 > size_t(RobotsMaxSize)); -} - -bool TPrefixTreeRobotsTxtRulesHandler::IsFullTotal() const { - size_t allBotsRulesCount = 0; - size_t allBotsBufferSize = 0; - - for (const auto& botInfo : BotIdToPrefixTreeBotInfo) { - if (botInfo) { - allBotsRulesCount += botInfo->RulesPosition; - allBotsBufferSize += botInfo->BufferPosition; - } - } - - return (allBotsRulesCount >= MaxRulesNumber) || (allBotsBufferSize + 300 > size_t(RobotsMaxSize)); -} - -size_t TPrefixTreeRobotsTxtRulesHandler::GetPacked(const char*& data) const { - Y_STATIC_THREAD(TBuffer) - packedRepresentation; - - // calculate size, needed for packed data - size_t totalPackedSize = sizeof(ui32); // num of botids - ui32 numOfSupportedBots = 0; - - for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) { - if (!IsBotIdSupported(botId)) { - continue; - } - - const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; - // botId + packedDataSize + packedData - totalPackedSize += sizeof(ui32) + (botInfo ? botInfo->BufferPosition : sizeof(ui32)); - ++numOfSupportedBots; - } - - ((TBuffer&)packedRepresentation).Reserve(totalPackedSize); - - // fill packed data - char* packedPtr = ((TBuffer&)packedRepresentation).Data(); - - *((ui32*)packedPtr) = numOfSupportedBots; - packedPtr += sizeof(ui32); - - for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) { - if (!IsBotIdSupported(botId)) { - continue; - } - - const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; - memcpy(packedPtr, &botId, sizeof(ui32)); - packedPtr += sizeof(ui32); - - if (botInfo) { - *((ui32*)botInfo->Buffer.Get()) = botInfo->BufferPosition; - memcpy(packedPtr, botInfo->Buffer.Get(), botInfo->BufferPosition); - packedPtr += botInfo->BufferPosition; - } else { - // In absense of bot info we serialize only size of its buffer, which is 4 because it takes 4 bytes - ui32 emptyBufferPosition = sizeof(ui32); - memcpy(packedPtr, &emptyBufferPosition, sizeof(ui32)); - packedPtr += sizeof(ui32); - } - } - - data = ((TBuffer&)packedRepresentation).Data(); - return totalPackedSize; -} - -void TPrefixTreeRobotsTxtRulesHandler::LoadPacked(const char* botsData, const char* botsDataEnd) { - Clear(); - - if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { - ythrow yexception() << "Buffer overflow"; - } - - ui32 numOfBots = *((ui32*)botsData); - botsData += sizeof(ui32); - - for (ui32 botIndex = 0; botIndex < numOfBots; ++botIndex) { - if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { - ythrow yexception() << "Buffer overflow"; - } - - ui32 botId = 0; - memcpy(&botId, botsData, sizeof(ui32)); - botsData += sizeof(ui32); - - // skip bot id's, that not supported for now - if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId)) { - if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { - ythrow yexception() << "Buffer overflow"; - } - - ui32 oneBotPackedSize = 0; - memcpy(&oneBotPackedSize, botsData, sizeof(ui32)); - botsData += oneBotPackedSize; - - continue; - } - - //SupportedBotIds.insert(botId); - - auto& botInfo = GetInfo(botId); - - if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { - ythrow yexception() << "Buffer overflow"; - } - - static_assert(sizeof(botInfo.BufferSize) == sizeof(ui32), "BufferSize must be 4 bytes"); - static_assert(sizeof(botInfo.BufferPosition) == sizeof(ui32), "BufferPosition must be 4 bytes"); - - memcpy(&botInfo.BufferSize, botsData, sizeof(ui32)); - memcpy(&botInfo.BufferPosition, botsData, sizeof(ui32)); - - if (Y_UNLIKELY(botsDataEnd != nullptr && (botsData + botInfo.BufferSize) > botsDataEnd)) { - ythrow yexception() << "Buffer overflow"; - } - - botInfo.Buffer.Reset(new char[botInfo.BufferSize]); - memcpy(botInfo.Buffer.Get(), botsData, botInfo.BufferSize); - SaveRulesFromBuffer(botId); - - if (botInfo.BufferSize > (int)sizeof(ui32)) { // empty data for robots means, that we don't have section for this bot - LoadedBotIds.insert(botId); - } - - botsData += botInfo.BufferSize; - } - - OptimizeSize(); -} - -void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, FILE* dumpFile) { - if (!dumpFile) - dumpFile = stderr; - fprintf(dumpFile, "User-Agent: %s\n", robotstxtcfg::GetFullName(botId).data()); - for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next()) - fprintf(dumpFile, "%s: %s\n", DirTypeToName(it.GetRuleType()), it.GetInitialRule().data()); -} - -void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, IOutputStream& out) { - out << "User-Agent: " << robotstxtcfg::GetFullName(botId) << Endl; - for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next()) - out << DirTypeToName(it.GetRuleType()) << ": " << it.GetInitialRule() << Endl; -} diff --git a/library/cpp/robots_txt/robots_txt.h b/library/cpp/robots_txt/robots_txt.h deleted file mode 100644 index 5ee48fb14f..0000000000 --- a/library/cpp/robots_txt/robots_txt.h +++ /dev/null @@ -1,605 +0,0 @@ -#pragma once - -#include "constants.h" -#include "robots_txt_parser.h" -#include "prefix_tree.h" -#include "robotstxtcfg.h" - -#include <util/generic/noncopyable.h> -#include <util/generic/map.h> -#include <util/generic/maybe.h> -#include <util/generic/ptr.h> -#include <util/generic/set.h> - -#include <array> -#include <utility> - - -enum EDirectiveType { - USER_AGENT = 1, - DISALLOW = 2, - ALLOW = 3, - HOST = 4, - SITEMAP = 5, - CRAWL_DELAY = 6, - CLEAN_PARAM = 7, - UNKNOWN = 9, -}; - -enum EFormatErrorType { - ERROR_RULE_NOT_SLASH = 1, - ERROR_ASTERISK_MULTI = 2, - ERROR_HOST_MULTI = 3, - ERROR_ROBOTS_HUGE = 4, - ERROR_RULE_BEFORE_USER_AGENT = 5, - ERROR_RULE_HUGE = 6, - ERROR_HOST_FORMAT = 7, - ERROR_TRASH = 8, - ERROR_SITEMAP_FORMAT = 9, - ERROR_CRAWL_DELAY_FORMAT = 10, - ERROR_CRAWL_DELAY_MULTI = 11, - ERROR_CLEAN_PARAM_FORMAT = 12, - - WARNING_EMPTY_RULE = 30, - WARNING_SUSPECT_SYMBOL = 31, - WARNING_UNKNOWN_FIELD = 33, - WARNING_UPPER_REGISTER = 34, - WARNING_SITEMAP = 35, -}; - -class TRobotsTxtRulesIterator { -private: - const char* Begin = nullptr; - const char* End = nullptr; - -public: - TRobotsTxtRulesIterator() = default; - TRobotsTxtRulesIterator(const char* begin, const char* end); - void Next(); - bool HasRule() const; - const char* GetRule() const; - TString GetInitialRule() const; // unlike GetRule(), it neither omits trailing '$' nor adds redundant '*' - EDirectiveType GetRuleType() const; - - static EDirectiveType CharToDirType(char ch); -}; - -class TRobotsTxtRulesHandlerBase { -public: - typedef TVector<std::pair<EFormatErrorType, int>> TErrorVector; - - TRobotsTxtRulesHandlerBase( - TBotIdSet supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot); - - TRobotsTxtRulesHandlerBase( - const TSet<ui32>& supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot); - - virtual ~TRobotsTxtRulesHandlerBase(); - - int GetCrawlDelay(ui32 botId, bool* realInfo = nullptr) const; - int GetMinCrawlDelay(int defaultCrawlDelay = -1) const; - bool IsHandlingErrors() const; - const TString& GetHostDirective() const; - const TVector<TString> GetSiteMaps() const; - const TVector<TString> GetCleanParams() const; - const TErrorVector& GetErrors() const; - TVector<int> GetAcceptedLines(ui32 botId = robotstxtcfg::id_yandexbot) const; - - template <class THostHandler> - static int ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host = nullptr); - static inline void ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId); - static int CheckHost(const char* host); - static int CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl); - static int CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler); - static int CheckAndNormCleanParam(TString& s); - static int ParseCrawlDelay(const char* value, int& crawlDelay); - static EDirectiveType NameToDirType(const char* d); - static const char* DirTypeToName(EDirectiveType t); - - void SetErrorsHandling(bool handleErrors); - void SetHostDirective(const char* hostDirective); - void SetCrawlDelay(ui32 botId, int crawlDelay); - void AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection); - void AddSiteMap(const char* sitemap); - void AddCleanParam(const char* cleanParam); - bool AddRuleWithErrorCheck(ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser); - int OnHost(ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler); - - virtual void Clear(); - virtual bool IsAllowAll(ui32 botId) const = 0; - virtual bool IsAllowAll() const = 0; - virtual bool IsDisallowAll(ui32 botId, bool useAny = true) const = 0; - virtual bool IsDisallowAll() const = 0; - virtual const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const = 0; - virtual const char* IsAllow(ui32 botId, const char* s) const = 0; - virtual TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const = 0; - virtual void Dump(ui32 botId, FILE* logFile) = 0; - virtual void Dump(ui32 botId, IOutputStream& out) = 0; - virtual bool Empty(ui32 botId) const = 0; - virtual void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) = 0; - virtual size_t GetPacked(const char*& data) const = 0; - virtual void AfterParse(ui32 botId) = 0; - virtual void DoAllowAll() = 0; - virtual void DoDisallowAll() = 0; - bool IsBotIdLoaded(ui32 botId) const; - bool IsBotIdSupported(ui32 botId) const; - ui32 GetNotOptimizedBotId(ui32 botId) const; - TMaybe<ui32> GetMappedBotId(ui32 botId, bool useAny = true) const; - -protected: - void CheckBotIdValidity(ui32 botId) const; - virtual bool OptimizeSize() = 0; - -private: - bool HandleErrors; - -protected: - struct TBotInfo { - int CrawlDelay; - - TBotInfo() - : CrawlDelay(-1) - { - } - }; - - TBotIdSet LoadedBotIds; - TSet<TString> SiteMaps; - TSet<TString> CleanParams; - TString HostDirective; - TErrorVector Errors; - typedef std::pair<ui32, ui32> TBotIdAcceptedLine; - TVector<TBotIdAcceptedLine> AcceptedLines; - TVector<ui32> CrossSectionAcceptedLines; - - TVector<TBotInfo> BotIdToInfo; - int CrawlDelay; - size_t RobotsMaxSize; - size_t MaxRulesNumber; - bool SaveDataForAnyBot; - - TBotIdSet SupportedBotIds; - std::array<ui8, robotstxtcfg::max_botid> OptimizedBotIdToStoredBotId; - - virtual bool IsFull(ui32 botId, size_t length) const = 0; - virtual bool IsFullTotal() const = 0; - virtual bool AddRule(ui32 botId, TStringBuf rule, char type) = 0; - //parts of ParseRules - inline static void CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines); - inline static void CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk); - inline static bool CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber); - inline static bool CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber); - inline static bool CheckSupportedBots(const TBotIdSet& currentBotIds, TBotIdSet& wasRuleForBot, const TBotIdSet& isSupportedBot); - inline static bool CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber); - inline static bool ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host); - inline static bool ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value); - inline static bool AddRules( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const char* value, - char type, - const TBotIdSet& currentBotIds, - const TBotIdSet& isSupportedBot); - - inline static bool ProcessCrawlDelay( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const TBotIdSet& currentBotIds, - const TBotIdSet& isSupportedBot, - const char* value); - - inline static void ProcessUserAgent( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const TBotIdSet& currentBotIds, - TBotIdSet& wasRuleForBot, - TBotIdSet& isSupportedBot, - TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength, - const char* value); - - bool CheckRobot( - const char* userAgent, - TBotIdSet& botIds, - const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength = nullptr) const; - - virtual void ClearInternal(ui32 botId); - - void AddError(EFormatErrorType type, int line); - - void ResetOptimized() noexcept; -}; - -class TPrefixTreeRobotsTxtRulesHandler: public TRobotsTxtRulesHandlerBase, TNonCopyable { -private: - static const int INIT_BUFFER_SIZE = 1 << 6; - - struct TRuleInfo { - size_t Len; - bool Allow; - }; - - bool IsFull(ui32 botId, size_t length) const override; - bool IsFullTotal() const override; - bool AddRule(ui32 botId, TStringBuf rule, char type) override; - const char* GetRule(ui32 botId, const char* s, char type) const; - void ResizeBuffer(ui32 botId, int newSize); - void SaveRulesFromBuffer(ui32 botId); - int TraceBuffer(ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos); - bool CheckAllowDisallowAll(ui32 botId, bool checkDisallow); - void SaveRulesToBuffer(); - int StrLenWithoutStars(const char* s); - -protected: - class TRulesSortFunc { - private: - const TArrayHolder<TRuleInfo>* RuleInfos; - - public: - TRulesSortFunc(const TArrayHolder<TRuleInfo>* ruleInfos) - : RuleInfos(ruleInfos) - { - } - bool operator()(const size_t& lhs, const size_t& rhs) { - const TRuleInfo& left = (*RuleInfos).Get()[lhs]; - const TRuleInfo& right = (*RuleInfos).Get()[rhs]; - return (left.Len == right.Len) ? left.Allow && !right.Allow : left.Len > right.Len; - } - }; - - struct TPrefixTreeBotInfo { - bool DisallowAll = false; - bool AllowAll = false; - bool HasDisallow = false; - bool HasAllow = false; - - TArrayHolder<char> Buffer{new char[INIT_BUFFER_SIZE]}; - ui32 BufferPosition = sizeof(BufferPosition); - int BufferSize = INIT_BUFFER_SIZE; - - TArrayHolder<char*> Rules = nullptr; - int RulesPosition = 0; - int RulesSize = 0; - - TArrayHolder<char**> ComplexRules = nullptr; - int ComplexRulesPosition = 0; - int ComplexRulesSize = 0; - - TPrefixTree PrefixRules {0}; - }; - - std::array<THolder<TPrefixTreeBotInfo>, robotstxtcfg::max_botid> BotIdToPrefixTreeBotInfo; - - TPrefixTreeBotInfo& GetInfo(ui32 botId); - static bool CheckRule(const char* s, const char* rule); - void ClearInternal(ui32 botId) override; - bool OptimizeSize() override; - -private: - void SortRules(TPrefixTreeBotInfo& prefixBotInfo, size_t count, const TArrayHolder<TRuleInfo>* ruleInfos); - bool HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow); - int FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, char neededType); - -public: - TPrefixTreeRobotsTxtRulesHandler( - TBotIdSet supportedBotIds = robotstxtcfg::defaultSupportedBotIds, - int robotsMaxSize = robots_max, - int maxRulesCount = -1, - bool saveDataForAnyBot = true); - - TPrefixTreeRobotsTxtRulesHandler( - std::initializer_list<ui32> supportedBotIds, - int robotsMaxSize = robots_max, - int maxRulesCount = -1, - bool saveDataForAnyBot = true); - - TPrefixTreeRobotsTxtRulesHandler( - const TSet<ui32>& supportedBotIds, - int robotsMaxSize = robots_max, - int maxRulesCount = -1, - bool saveDataForAnyBot = true); - - void Clear() override; - void AfterParse(ui32 botId) override; - bool IsAllowAll(ui32 botId) const override; - bool IsAllowAll() const override; - bool IsDisallowAll(ui32 botId, bool useAny = true) const override; - bool IsDisallowAll() const override; - const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const override; - const char* IsAllow(ui32 botId, const char* s) const override; - TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const override; - void DoAllowAll() override; - void DoDisallowAll() override; - bool Empty(ui32 botId) const override; - - void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) override; - size_t GetPacked(const char*& data) const override; - void Dump(ui32 botId, FILE* logFile) override; - void Dump(ui32 botId, IOutputStream& out) override; - size_t GetMemorySize(); -}; - -using TRobotsTxt = TPrefixTreeRobotsTxtRulesHandler; - -void TRobotsTxtRulesHandlerBase::ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId) { - rulesHandler->ClearInternal(botId); - if (botId == robotstxtcfg::id_anybot) { - // as sitemaps, clean-params and HostDirective from prefix tree was deleted - for (const auto& sitemap : rulesHandler->SiteMaps) { - rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, sitemap, 'S', parser); - } - for (const auto& param : rulesHandler->CleanParams) { - rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, param, 'P', parser); - } - if (!rulesHandler->HostDirective.empty()) { - rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, rulesHandler->HostDirective, 'H', parser); - } - } -} - -void TRobotsTxtRulesHandlerBase::CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines) { - if (rulesHandler->IsHandlingErrors()) { - for (size_t i = 0; i < nonRobotsLines.size(); ++i) - rulesHandler->AddError(ERROR_TRASH, nonRobotsLines[i]); - nonRobotsLines.clear(); - } -} - -void TRobotsTxtRulesHandlerBase::CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk) { - if (strcmp(value, "*") == 0) { - if (wasAsterisk) - rulesHandler->AddError(ERROR_ASTERISK_MULTI, lineNumber); - wasAsterisk = true; - } -} - -bool TRobotsTxtRulesHandlerBase::CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber) { - if (wasUserAgent) { - wasRule = true; - return false; - } - if (!ruleBeforeUserAgent) { - ruleBeforeUserAgent = true; - rulesHandler->AddError(ERROR_RULE_BEFORE_USER_AGENT, lineNumber); - } - return true; -} - -bool TRobotsTxtRulesHandlerBase::CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber) { - if (*value && *value != '/' && *value != '*') { - rulesHandler->AddError(ERROR_RULE_NOT_SLASH, lineNumber); - return true; - } - return false; -} - -bool TRobotsTxtRulesHandlerBase::CheckSupportedBots( - const TBotIdSet& currentBotIds, - TBotIdSet& wasRuleForBot, - const TBotIdSet& isSupportedBot) -{ - bool hasAtLeastOneSupportedBot = false; - for (ui32 currentBotId : currentBotIds) { - wasRuleForBot.insert(currentBotId); - hasAtLeastOneSupportedBot = hasAtLeastOneSupportedBot || isSupportedBot.contains(currentBotId); - } - return hasAtLeastOneSupportedBot; -} - -bool TRobotsTxtRulesHandlerBase::CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber) { - if (value && strlen(value) == 0) { - rulesHandler->AddError(WARNING_EMPTY_RULE, lineNumber); - type = type == ALLOW ? DISALLOW : ALLOW; - return true; - } - return false; -} - -bool TRobotsTxtRulesHandlerBase::AddRules( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const char* value, - char type, - const TBotIdSet& currentBotIds, - const TBotIdSet& isSupportedBot) -{ - for (ui32 currentBotId : currentBotIds) { - if (!isSupportedBot.contains(currentBotId)) - continue; - if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, type, parser)) - return true; - } - return false; -} - -bool TRobotsTxtRulesHandlerBase::ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host) { - TString modifiedUrl; - if (!CheckSitemapUrl(value, host, modifiedUrl)) - rulesHandler->AddError(ERROR_SITEMAP_FORMAT, parser.GetLineNumber()); - else { - rulesHandler->AddSiteMap(modifiedUrl.data()); - if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, modifiedUrl.data(), 'S', parser)) - return true; - } - return false; -} - -bool TRobotsTxtRulesHandlerBase::ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value) { - if (!CheckAndNormCleanParam(value)) - rulesHandler->AddError(ERROR_CLEAN_PARAM_FORMAT, parser.GetLineNumber()); - else { - rulesHandler->AddCleanParam(value.data()); - if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, value.data(), 'P', parser)) - return true; - } - return false; -} - -bool TRobotsTxtRulesHandlerBase::ProcessCrawlDelay( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const TBotIdSet& currentBotIds, - const TBotIdSet& isSupportedBot, - const char* value) { - for (ui32 currentBotId : currentBotIds) { - if (!isSupportedBot.contains(currentBotId)) - continue; - if (rulesHandler->BotIdToInfo[currentBotId].CrawlDelay >= 0) { - rulesHandler->AddError(ERROR_CRAWL_DELAY_MULTI, parser.GetLineNumber()); - break; - } - int crawlDelay = -1; - if (!ParseCrawlDelay(value, crawlDelay)) - rulesHandler->AddError(ERROR_CRAWL_DELAY_FORMAT, parser.GetLineNumber()); - else { - rulesHandler->SetCrawlDelay(currentBotId, crawlDelay); - if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, 'C', parser)) - return true; - } - } - return false; -} - -void TRobotsTxtRulesHandlerBase::ProcessUserAgent( - TRobotsTxtRulesHandlerBase* rulesHandler, - TRobotsTxtParser& parser, - const TBotIdSet& currentBotIds, - TBotIdSet& wasSupportedBot, - TBotIdSet& isSupportedBot, - TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength, - const char* value) -{ - ui32 userAgentNameLength = (ui32)strlen(value); - - for (ui32 currentBotId : currentBotIds) { - bool userAgentNameLonger = userAgentNameLength > botIdToMaxAppropriateUserAgentNameLength[currentBotId]; - bool userAgentNameSame = userAgentNameLength == botIdToMaxAppropriateUserAgentNameLength[currentBotId]; - - if (!wasSupportedBot.contains(currentBotId) || userAgentNameLonger) - ClearAllExceptCrossSection(parser, rulesHandler, currentBotId); - - wasSupportedBot.insert(currentBotId); - if (userAgentNameLonger || userAgentNameSame) { - isSupportedBot.insert(currentBotId); // Allow multiple blocks for the same user agent - } - botIdToMaxAppropriateUserAgentNameLength[currentBotId] = Max(userAgentNameLength, botIdToMaxAppropriateUserAgentNameLength[currentBotId]); - } -} - -template <class THostHandler> -int TRobotsTxtRulesHandlerBase::ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host) { - rulesHandler->Clear(); - - TBotIdSet wasSupportedBot; - TBotIdSet wasRuleForBot; - bool wasAsterisk = false; - TVector<int> nonRobotsLines; - TVector<ui32> botIdToMaxAppropriateUserAgentNameLength(robotstxtcfg::max_botid, 0); - static char all[] = "/"; - EDirectiveType prevType = USER_AGENT; - while (parser.HasRecord()) { - TRobotsTxtRulesRecord record = parser.NextRecord(); - bool wasUserAgent = false; - bool isRobotsRecordUseful = false; - TBotIdSet isSupportedBot; - TBotIdSet currentBotIds; - TString field; - TString value; - bool ruleBeforeUserAgent = false; - int ret = 0; - bool wasRule = false; - bool wasBlank = false; - while (record.NextPair(field, value, isRobotsRecordUseful && rulesHandler->IsHandlingErrors(), nonRobotsLines, &wasBlank)) { - CheckRobotsLines(rulesHandler, nonRobotsLines); - EDirectiveType type = NameToDirType(field.data()); - EDirectiveType typeBeforeChange = type; - - if ((prevType != type || wasBlank) && type == USER_AGENT) { - currentBotIds.clear(); - } - prevType = type; - - switch (type) { - case USER_AGENT: - if (wasUserAgent && wasRule) { - wasRule = false; - currentBotIds.clear(); - isSupportedBot.clear(); - } - wasUserAgent = true; - value.to_lower(); - CheckAsterisk(rulesHandler, value.data(), parser.GetLineNumber(), wasAsterisk); - isRobotsRecordUseful = rulesHandler->CheckRobot(value.data(), currentBotIds, &botIdToMaxAppropriateUserAgentNameLength); - if (isRobotsRecordUseful) - ProcessUserAgent(rulesHandler, parser, currentBotIds, wasSupportedBot, isSupportedBot, botIdToMaxAppropriateUserAgentNameLength, value.data()); - break; - - case DISALLOW: - case ALLOW: - if (CheckWasUserAgent(rulesHandler, wasUserAgent, ruleBeforeUserAgent, wasRule, parser.GetLineNumber())) - break; - if (CheckRuleNotSlash(rulesHandler, value.data(), parser.GetLineNumber())) - break; - CheckRule(value.data(), parser.GetLineNumber(), rulesHandler); - if (!CheckSupportedBots(currentBotIds, wasRuleForBot, isSupportedBot)) { - break; - } - if (CheckEmptyRule(rulesHandler, value.data(), type, parser.GetLineNumber())) { - value = all; - if (typeBeforeChange == ALLOW) - continue; - } - - if (AddRules(rulesHandler, parser, value.data(), type == ALLOW ? 'A' : 'D', currentBotIds, isSupportedBot)) - return 2; - break; - - case HOST: - value.to_lower(); - ret = hostHandler->OnHost(robotstxtcfg::id_anybot, parser, value.data(), rulesHandler); - if (ret) - return ret; - break; - - case SITEMAP: - if (ProcessSitemap(rulesHandler, parser, value.data(), host)) - return 2; - break; - - case CLEAN_PARAM: - if (ProcessCleanParam(rulesHandler, parser, value)) - return 2; - break; - - case CRAWL_DELAY: - if (ProcessCrawlDelay(rulesHandler, parser, currentBotIds, isSupportedBot, value.data())) - return 2; - break; - - default: - rulesHandler->AddError(WARNING_UNKNOWN_FIELD, parser.GetLineNumber()); - break; - } - bool isCrossSection = type == SITEMAP || type == HOST || type == CLEAN_PARAM; - if (rulesHandler->IsHandlingErrors() && (isRobotsRecordUseful || isCrossSection)) - rulesHandler->AddAcceptedLine(parser.GetLineNumber(), currentBotIds, isCrossSection); - } - } - - for (auto botId : wasSupportedBot) { - rulesHandler->LoadedBotIds.insert(botId); - if (rulesHandler->IsBotIdSupported(botId)) - rulesHandler->AfterParse(botId); - } - - if (!rulesHandler->OptimizeSize()) { - return 2; - } - - return 1; -} diff --git a/library/cpp/robots_txt/robots_txt_parser.cpp b/library/cpp/robots_txt/robots_txt_parser.cpp deleted file mode 100644 index 8e2fe6073d..0000000000 --- a/library/cpp/robots_txt/robots_txt_parser.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "robots_txt_parser.h" -#include <util/generic/string.h> -#include <util/stream/output.h> - -TRobotsTxtParser::TRobotsTxtParser(IInputStream& inputStream) - : InputStream(inputStream) - , LineNumber(0) - , IsLastSymbolCR(false) -{ -} - -int TRobotsTxtParser::GetLineNumber() { - return LineNumber; -} - -const char* TRobotsTxtParser::ReadLine() { - Line = ""; - char c; - - if (IsLastSymbolCR) { - if (!InputStream.ReadChar(c)) - return nullptr; - if (c != '\n') - Line.append(c); - } - - bool hasMoreSymbols; - while (hasMoreSymbols = InputStream.ReadChar(c)) { - if (c == '\r') { - IsLastSymbolCR = true; - break; - } else { - IsLastSymbolCR = false; - if (c == '\n') - break; - Line.append(c); - } - } - if (!hasMoreSymbols && Line.empty()) - return nullptr; - - // BOM UTF-8: EF BB BF - if (0 == LineNumber && Line.size() >= 3 && Line[0] == '\xEF' && Line[1] == '\xBB' && Line[2] == '\xBF') - Line = Line.substr(3, Line.size() - 3); - - ++LineNumber; - int i = Line.find('#'); - if (i == 0) - Line = ""; - else if (i > 0) - Line = Line.substr(0, i); - return Line.data(); -} - -bool TRobotsTxtParser::IsBlankLine(const char* s) { - for (const char* p = s; *p; ++p) - if (!isspace(*p)) - return 0; - return 1; -} - -char* TRobotsTxtParser::Trim(char* s) { - while (isspace(*s)) - ++s; - char* p = s + strlen(s) - 1; - while (s < p && isspace(*p)) - --p; - *(p + 1) = 0; - return s; -} - -inline bool TRobotsTxtParser::IsRobotsLine(const char* s) { - return strchr(s, ':'); -} - -bool TRobotsTxtParser::HasRecord() { - while (!IsRobotsLine(Line.data())) - if (!ReadLine()) - return 0; - return 1; -} - -TRobotsTxtRulesRecord TRobotsTxtParser::NextRecord() { - return TRobotsTxtRulesRecord(*this); -} - -TRobotsTxtRulesRecord::TRobotsTxtRulesRecord(TRobotsTxtParser& parser) - : Parser(parser) -{ -} - -bool TRobotsTxtRulesRecord::NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank) { - if (wasBlank) { - *wasBlank = false; - } - while (!Parser.IsRobotsLine(Parser.Line.data())) { - if (!Parser.ReadLine()) - return 0; - if (Parser.IsBlankLine(Parser.Line.data())) { - if (wasBlank) { - *wasBlank = true; - } - continue; - } - if (handleErrors && !Parser.IsRobotsLine(Parser.Line.data())) - nonRobotsLines.push_back(Parser.GetLineNumber()); - } - - char* s = strchr(Parser.Line.begin(), ':'); - *s = 0; - char* p = s + 1; - - field = TRobotsTxtParser::Trim(strlwr(Parser.Line.begin())); - value = TRobotsTxtParser::Trim(p); - return 1; -} diff --git a/library/cpp/robots_txt/robots_txt_parser.h b/library/cpp/robots_txt/robots_txt_parser.h deleted file mode 100644 index 8032d0d20b..0000000000 --- a/library/cpp/robots_txt/robots_txt_parser.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include <algorithm> -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/stream/input.h> - -class TRobotsTxtParser; - -class TRobotsTxtRulesRecord { -private: - TRobotsTxtParser& Parser; - -public: - TRobotsTxtRulesRecord(TRobotsTxtParser& parser); - bool NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank = nullptr); -}; - -class TRobotsTxtParser { - friend class TRobotsTxtRulesRecord; - -private: - IInputStream& InputStream; - TString Line; - int LineNumber; - bool IsLastSymbolCR; - - const char* ReadLine(); - static bool IsBlankLine(const char*); - static bool IsRobotsLine(const char*); - -public: - static char* Trim(char*); - TRobotsTxtParser(IInputStream& inputStream); - bool HasRecord(); - TRobotsTxtRulesRecord NextRecord(); - int GetLineNumber(); -}; diff --git a/library/cpp/robots_txt/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg.h deleted file mode 100644 index 5ca1682a0c..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include <library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h> diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 09cfd4b3f1..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-robots_txt-robotstxtcfg) -target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-case_insensitive_string -) -target_sources(cpp-robots_txt-robotstxtcfg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp -) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 6fe7e7a7ad..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,21 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-robots_txt-robotstxtcfg) -target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-case_insensitive_string -) -target_sources(cpp-robots_txt-robotstxtcfg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp -) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 6fe7e7a7ad..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,21 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-robots_txt-robotstxtcfg) -target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-case_insensitive_string -) -target_sources(cpp-robots_txt-robotstxtcfg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp -) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 09cfd4b3f1..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(cpp-robots_txt-robotstxtcfg) -target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-case_insensitive_string -) -target_sources(cpp-robots_txt-robotstxtcfg PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp - ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp -) diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp deleted file mode 100644 index aec668582c..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include "bot_id_set.h" -// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h deleted file mode 100644 index 08aaa68a50..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h +++ /dev/null @@ -1,132 +0,0 @@ -#pragma once - -#include "user_agents.h" - -#include <bitset> - - -/// Simple vector-based set for bot ids, meant to optimize memory and lookups -class TBotIdSet -{ -public: - using TData = std::bitset<robotstxtcfg::max_botid>; - - constexpr TBotIdSet() noexcept = default; - constexpr TBotIdSet(const TBotIdSet&) noexcept = default; - constexpr TBotIdSet(TBotIdSet&&) noexcept = default; - constexpr TBotIdSet& operator = (const TBotIdSet&) noexcept = default; - constexpr TBotIdSet& operator = (TBotIdSet&&) noexcept = default; - - TBotIdSet(std::initializer_list<ui32> botIds) { - for (auto id : botIds) { - insert(id); - } - } - - static TBotIdSet All() noexcept { - TBotIdSet res; - res.Bots.set(); - return res; - } - - constexpr bool contains(ui32 botId) const noexcept { - return (botId < Bots.size()) && Bots[botId]; - } - - bool insert(ui32 botId) noexcept { - if (botId >= Bots.size() || Bots[botId]) { - return false; - } - Bots[botId] = true; - return true; - } - - bool remove(ui32 botId) noexcept { - if (botId >= Bots.size() || !Bots[botId]) { - return false; - } - Bots[botId] = false; - return true; - } - - void clear() noexcept { - Bots.reset(); - } - - size_t size() const noexcept { - return Bots.count(); - } - - bool empty() const noexcept { - return Bots.none(); - } - - bool operator==(const TBotIdSet& rhs) const noexcept = default; - - TBotIdSet operator&(TBotIdSet rhs) const noexcept { - rhs.Bots &= Bots; - return rhs; - } - - TBotIdSet operator|(TBotIdSet rhs) const noexcept { - rhs.Bots |= Bots; - return rhs; - } - - TBotIdSet operator~() const noexcept { - TBotIdSet result; - result.Bots = ~Bots; - return result; - } - - class iterator - { - public: - auto operator * () const noexcept { - return BotId; - } - - iterator& operator ++ () noexcept { - while (BotId < Bots.size()) { - if (Bots[++BotId]) { - break; - } - } - return *this; - } - - bool operator == (const iterator& rhs) const noexcept { - return (&Bots == &rhs.Bots) && (BotId == rhs.BotId); - } - - bool operator != (const iterator& rhs) const noexcept { - return !(*this == rhs); - } - - private: - friend class TBotIdSet; - iterator(const TData& bots, ui32 botId) - : Bots(bots) - , BotId(botId) - { - while (BotId < Bots.size() && !Bots[BotId]) { - ++BotId; - } - } - - private: - const TData& Bots; - ui32 BotId; - }; - - iterator begin() const noexcept { - return {Bots, robotstxtcfg::id_anybot}; - } - - iterator end() const noexcept { - return {Bots, robotstxtcfg::max_botid}; - } - -private: - TData Bots {}; -}; diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp deleted file mode 100644 index c5652b81c5..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include "robotstxtcfg.h" -// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h deleted file mode 100644 index 2cf9430d7c..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "bot_id_set.h" - - -namespace robotstxtcfg { - -static const TBotIdSet defaultSupportedBotIds = {id_defbot}; -static const TBotIdSet allSupportedBotIds = TBotIdSet::All(); - -} // namespace robotstxtcfg diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp deleted file mode 100644 index 60b353a427..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include "user_agents.h" -// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.h b/library/cpp/robots_txt/robotstxtcfg/user_agents.h deleted file mode 100644 index 59245d07cb..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/user_agents.h +++ /dev/null @@ -1,303 +0,0 @@ -#pragma once - -#include <library/cpp/case_insensitive_string/case_insensitive_string.h> - - -namespace robotstxtcfg { - // robots.txt agents and identifiers - - enum EBots : ui32 { - id_anybot = 0, - id_yandexbot = 1, - id_yandexmediabot = 2, - id_yandeximagesbot = 3, - id_googlebot = 4, - id_yandexbotmirr = 5, - id_yahooslurp = 6, - id_msnbot = 7, - id_yandexcatalogbot = 8, - id_yandexdirectbot = 9, - id_yandexblogsbot = 10, - id_yandexnewsbot = 11, - id_yandexpagechk = 12, - id_yandexmetrikabot = 13, - id_yandexbrowser = 14, - id_yandexmarketbot = 15, - id_yandexcalendarbot = 16, - id_yandexwebmasterbot = 17, - id_yandexvideobot = 18, - id_yandeximageresizerbot = 19, - id_yandexadnetbot = 20, - id_yandexpartnerbot = 21, - id_yandexdirectdbot = 22, - id_yandextravelbot = 23, - id_yandexmobilebot = 24, - id_yandexrcabot = 25, - id_yandexdirectdynbot = 26, - id_yandexmobilebot_ed = 27, - id_yandexaccessibilitybot = 28, - id_baidubot = 29, - id_yandexscreenshotbot = 30, - id_yandexmetrikayabs = 31, - id_yandexvideoparserbot = 32, - id_yandexnewsbot4 = 33, - id_yandexmarketbot2 = 34, - id_yandexmedianabot = 35, - id_yandexsearchshopbot = 36, - id_yandexontodbbot = 37, - id_yandexontodbapibot = 38, - id_yandexampbot = 39, - id_yandexvideohosting = 40, - id_yandexmediaselling = 41, - id_yandexverticals = 42, - id_yandexturbobot = 43, - id_yandexzenbot = 44, - id_yandextrackerbot = 45, - id_yandexmetrikabot4 = 46, - id_yandexmobilescreenshotbot = 47, - id_yandexfaviconsbot = 48, - id_yandexrenderresourcesbot = 49, - id_yandexactivity = 50, - max_botid - }; - - static const ui32 id_defbot = id_yandexbot; - - struct TBotInfo { - TCaseInsensitiveStringBuf ReqPrefix; - TCaseInsensitiveStringBuf FullName; - TStringBuf FromField = {}; - TStringBuf UserAgent = {}; - TStringBuf RotorUserAgent = {}; - bool ExplicitDisallow = false; - }; - - static constexpr TStringBuf UserAgentFrom("support@search.yandex.ru"); - - static constexpr TBotInfo BotInfoArr[] = { - {"*", "*"}, - {"Yandex", "YandexBot/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexMedia/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexImages/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Google", "GoogleBot"}, - {"Yandex", "YandexBot/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Slurp", "Slurp"}, - {"msn", "msnbot"}, - {"Yandex", "YandexCatalog/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - - {"Yandex", "YandexBlogs/0.99", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexNews/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexPagechecker/2.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexMetrika/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexBrowser/1.0", UserAgentFrom, - "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5", - "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5", - false}, - {"Yandex", "YandexMarket/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"YandexCalendar", "YandexCalendar/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Yandex", "YandexWebmaster/2.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexVideo/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexImageResizer/2.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - - {"YandexDirect", "YandexDirect/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexPartner", "YandexPartner/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Yandex", "YandexTravel/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"Yandex", "YandexBot/3.0", UserAgentFrom, - "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", - false}, - {"YandexRCA", "YandexRCA/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexDirectDyn", "YandexDirectDyn/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMobileBot", "YandexMobileBot/3.0", UserAgentFrom, - "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", - true}, - {"YandexAccessibilityBot", "YandexAccessibilityBot/3.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Baidu", "Baiduspider"}, - - {"YandexScreenshotBot", "YandexScreenshotBot/3.0", UserAgentFrom, - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)", - true}, - {"YandexMetrika", "YandexMetrika/2.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01)", - "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexVideoParser", "YandexVideoParser/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Yandex", "YandexNews/4.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMarket", "YandexMarket/2.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMedianaBot", "YandexMedianaBot/1.0", UserAgentFrom, - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)", - true}, - {"YandexSearchShop", "YandexSearchShop/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Yandex", "YandexOntoDB/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - false}, - {"YandexOntoDBAPI", "YandexOntoDBAPI/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"Yandex-AMPHTML", "Yandex-AMPHTML", UserAgentFrom, - "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - - {"YandexVideoHosting", "YandexVideoHosting/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMediaSelling", "YandexMediaSelling/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexVerticals", "YandexVerticals/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexTurbo", "YandexTurbo/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexZenRss", "YandexZenRss/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexTracker", "YandexTracker/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMetrika", "YandexMetrika/4.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexMobileScreenShotBot", "YandexMobileScreenShotBot/1.0", UserAgentFrom, - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", - "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", - true}, - {"YandexFavicons", "YandexFavicons/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexRenderResourcesBot", "YandexRenderResourcesBot/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true}, - {"YandexActivity", "YandexActivity/1.0", UserAgentFrom, - "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots)", - "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", - true} - }; - - static_assert(std::size(BotInfoArr) == max_botid); - - constexpr auto GetReqPrefix(ui32 botId) { - return BotInfoArr[botId].ReqPrefix; - } - - constexpr auto GetFullName(ui32 botId) { - return BotInfoArr[botId].FullName; - } - - constexpr auto GetFromField(ui32 botId) { - return BotInfoArr[botId].FromField; - } - - constexpr auto GetUserAgent(ui32 botId) { - return BotInfoArr[botId].UserAgent; - } - - constexpr auto GetRotorUserAgent(ui32 botId) { - return BotInfoArr[botId].RotorUserAgent; - } - - constexpr bool IsExplicitDisallow(ui32 botId) { - return BotInfoArr[botId].ExplicitDisallow; - } - - constexpr bool IsYandexBotId(ui32 botId) { - return !BotInfoArr[botId].UserAgent.empty(); - } - -} // namespace robotstxtcfg diff --git a/library/cpp/robots_txt/robotstxtcfg/ya.make b/library/cpp/robots_txt/robotstxtcfg/ya.make deleted file mode 100644 index 61c731be42..0000000000 --- a/library/cpp/robots_txt/robotstxtcfg/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -LIBRARY() - -SRCS( - bot_id_set.cpp - robotstxtcfg.cpp - user_agents.cpp -) - -PEERDIR( - library/cpp/case_insensitive_string -) - -END() diff --git a/library/cpp/robots_txt/rules_handler.cpp b/library/cpp/robots_txt/rules_handler.cpp deleted file mode 100644 index 4297db9d21..0000000000 --- a/library/cpp/robots_txt/rules_handler.cpp +++ /dev/null @@ -1,514 +0,0 @@ -#include "robots_txt.h" -#include "constants.h" - -#include <library/cpp/uri/http_url.h> -#include <library/cpp/charset/ci_string.h> -#include <library/cpp/string_utils/url/url.h> -#include <util/system/maxlen.h> -#include <util/generic/yexception.h> -#include <util/generic/algorithm.h> - - -namespace { - -TBotIdSet ConvertBotIdSet(const TSet<ui32>& botIds) noexcept { - TBotIdSet result; - for (auto id : botIds) { - result.insert(id); - } - return result; -} - -} // namespace - -TRobotsTxtRulesIterator::TRobotsTxtRulesIterator(const char* begin, const char* end) - : Begin(begin) - , End(end) -{ -} - -void TRobotsTxtRulesIterator::Next() { - while (Begin < End && *Begin) - ++Begin; - while (Begin < End && !isalpha(*Begin)) - ++Begin; -} - -bool TRobotsTxtRulesIterator::HasRule() const { - return Begin < End; -} - -const char* TRobotsTxtRulesIterator::GetRule() const { - return Begin + 1; -} - -TString TRobotsTxtRulesIterator::GetInitialRule() const { - auto begin = Begin + 1; - TStringBuf rule(begin, strlen(begin)); - - switch (*Begin) { - case 'a': - case 'd': - return rule.EndsWith('*') ? TString(rule.Chop(1)) : TString::Join(rule, '$'); - default: - return TString(rule); - } -} - -EDirectiveType TRobotsTxtRulesIterator::GetRuleType() const { - return CharToDirType(*Begin); -} - -EDirectiveType TRobotsTxtRulesIterator::CharToDirType(char ch) { - switch (toupper(ch)) { - case 'A': - return ALLOW; - case 'C': - return CRAWL_DELAY; - case 'D': - return DISALLOW; - case 'H': - return HOST; - case 'P': - return CLEAN_PARAM; - case 'S': - return SITEMAP; - } - return UNKNOWN; -} - -TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase( - TBotIdSet supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot) - : HandleErrors(false) - , SiteMaps() - , CleanParams() - , HostDirective("") - , Errors() - , AcceptedLines() - , CrossSectionAcceptedLines() - , BotIdToInfo(robotstxtcfg::max_botid) - , RobotsMaxSize(robotsMaxSize) - , MaxRulesNumber(maxRulesNumber) - , SaveDataForAnyBot(saveDataForAnyBot) - , SupportedBotIds(supportedBotIds) -{ - Y_ENSURE(!supportedBotIds.empty()); - - if (RobotsMaxSize <= 0) - RobotsMaxSize = robots_max; - if (MaxRulesNumber <= 0) - MaxRulesNumber = max_rules_count; - - ResetOptimized(); -} - -TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase( - const TSet<ui32>& supportedBotIds, - int robotsMaxSize, - int maxRulesNumber, - bool saveDataForAnyBot) - : TRobotsTxtRulesHandlerBase(ConvertBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot) -{} - -TRobotsTxtRulesHandlerBase::~TRobotsTxtRulesHandlerBase() = default; - -void TRobotsTxtRulesHandlerBase::CheckBotIdValidity(const ui32 botId) const { - if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId)) - ythrow yexception() << "robots.txt parser requested for invalid or unsupported botId = " << botId << Endl; - ; -} - -int TRobotsTxtRulesHandlerBase::GetCrawlDelay(const ui32 botId, bool* realInfo) const { - const auto id = GetMappedBotId(botId, false); - if (realInfo) - *realInfo = bool(id); - return BotIdToInfo[id.GetOrElse(robotstxtcfg::id_anybot)].CrawlDelay; -} - -int TRobotsTxtRulesHandlerBase::GetMinCrawlDelay(int defaultCrawlDelay) const { - int res = INT_MAX; - bool useDefault = false; - for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { - if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId)) { - bool realInfo; - int curCrawlDelay = GetCrawlDelay(botId, &realInfo); - if (realInfo) { - if (curCrawlDelay == -1) { - useDefault = true; - } else { - res = Min(res, curCrawlDelay); - } - } - } - } - - if (useDefault && defaultCrawlDelay < res) { - return -1; - } - - if (res == INT_MAX) { - res = GetCrawlDelay(robotstxtcfg::id_anybot); - } - - return res; -} - -void TRobotsTxtRulesHandlerBase::SetCrawlDelay(const ui32 botId, int crawlDelay) { - CheckBotIdValidity(botId); - BotIdToInfo[botId].CrawlDelay = crawlDelay; -} - -const TVector<TString> TRobotsTxtRulesHandlerBase::GetSiteMaps() const { - return TVector<TString>(SiteMaps.begin(), SiteMaps.end()); -} - -void TRobotsTxtRulesHandlerBase::AddSiteMap(const char* sitemap) { - SiteMaps.insert(sitemap); -} - -const TVector<TString> TRobotsTxtRulesHandlerBase::GetCleanParams() const { - return TVector<TString>(CleanParams.begin(), CleanParams.end()); -} - -void TRobotsTxtRulesHandlerBase::AddCleanParam(const char* cleanParam) { - CleanParams.insert(cleanParam); -} - -const TString& TRobotsTxtRulesHandlerBase::GetHostDirective() const { - return HostDirective; -} - -void TRobotsTxtRulesHandlerBase::SetHostDirective(const char* hostDirective) { - HostDirective = hostDirective; -} - -const TRobotsTxtRulesHandlerBase::TErrorVector& TRobotsTxtRulesHandlerBase::GetErrors() const { - return Errors; -} - -TVector<int> TRobotsTxtRulesHandlerBase::GetAcceptedLines(const ui32 botId) const { - TVector<int> ret; - for (size_t i = 0; i < CrossSectionAcceptedLines.size(); ++i) - ret.push_back(CrossSectionAcceptedLines[i]); - - bool hasLinesForBotId = false; - for (size_t i = 0; i < AcceptedLines.size(); ++i) { - if (AcceptedLines[i].first == botId) { - hasLinesForBotId = true; - break; - } - } - - for (size_t i = 0; i < AcceptedLines.size(); ++i) { - if (hasLinesForBotId && AcceptedLines[i].first == botId) { - ret.push_back(AcceptedLines[i].second); - } else if (!hasLinesForBotId && AcceptedLines[i].first == robotstxtcfg::id_anybot) { - ret.push_back(AcceptedLines[i].second); - } - } - - Sort(ret.begin(), ret.end()); - - return ret; -} - -void TRobotsTxtRulesHandlerBase::AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection) { - if (isCrossSection) { - CrossSectionAcceptedLines.push_back(line); - return; - } - - for (auto botId : botIds) { - AcceptedLines.push_back(TBotIdAcceptedLine(botId, line)); - } -} - -void TRobotsTxtRulesHandlerBase::SetErrorsHandling(bool handleErrors) { - HandleErrors = handleErrors; -} - -bool TRobotsTxtRulesHandlerBase::IsHandlingErrors() const { - return HandleErrors; -} - -EDirectiveType TRobotsTxtRulesHandlerBase::NameToDirType(const char* d) { - if (!strcmp("disallow", d)) - return DISALLOW; - if (!strcmp("allow", d)) - return ALLOW; - if (!strcmp("user-agent", d)) - return USER_AGENT; - if (!strcmp("host", d)) - return HOST; - if (!strcmp("sitemap", d)) - return SITEMAP; - if (!strcmp("clean-param", d)) - return CLEAN_PARAM; - if (!strcmp("crawl-delay", d)) - return CRAWL_DELAY; - return UNKNOWN; -} - -const char* TRobotsTxtRulesHandlerBase::DirTypeToName(EDirectiveType t) { - static const char* name[] = {"Allow", "Crawl-Delay", "Disallow", "Host", "Clean-Param", "Sitemap", "User-Agent", "Unknown"}; - switch (t) { - case ALLOW: - return name[0]; - case CRAWL_DELAY: - return name[1]; - case DISALLOW: - return name[2]; - case HOST: - return name[3]; - case CLEAN_PARAM: - return name[4]; - case SITEMAP: - return name[5]; - case USER_AGENT: - return name[6]; - case UNKNOWN: - return name[7]; - } - return name[7]; -} - -bool TRobotsTxtRulesHandlerBase::CheckRobot( - const char* userAgent, - TBotIdSet& botIds, - const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength) const -{ - TCaseInsensitiveStringBuf agent(userAgent); - - for (size_t botIndex = 0; botIndex < robotstxtcfg::max_botid; ++botIndex) { - if (!IsBotIdSupported(botIndex)) - continue; - - bool hasRequiredAgentNamePrefix = agent.StartsWith(robotstxtcfg::GetReqPrefix(botIndex)); - bool isContainedInFullName = robotstxtcfg::GetFullName(botIndex).StartsWith(agent); - bool wasMoreImportantAgent = false; - if (botIdToMaxAppropriateUserAgentNameLength) - wasMoreImportantAgent = agent.size() < (*botIdToMaxAppropriateUserAgentNameLength)[botIndex]; - - if (hasRequiredAgentNamePrefix && isContainedInFullName && !wasMoreImportantAgent) { - botIds.insert(botIndex); - } - } - - return !botIds.empty(); -} - -int TRobotsTxtRulesHandlerBase::CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler) { - if (!rulesHandler->IsHandlingErrors()) - return 0; - - if (auto len = strlen(value); len > max_rule_length) { - rulesHandler->AddError(ERROR_RULE_HUGE, line); - } - - bool upper = false, suspect = false; - for (const char* r = value; *r; ++r) { - if (!upper && isupper(*r)) - upper = true; - if (!suspect && !isalnum(*r) && !strchr("/_?=.-*%&~[]:;@", *r) && (*(r + 1) || *r != '$')) - suspect = true; - } - if (suspect) - rulesHandler->AddError(WARNING_SUSPECT_SYMBOL, line); - if (upper) - rulesHandler->AddError(WARNING_UPPER_REGISTER, line); - return suspect || upper; -} - -void TRobotsTxtRulesHandlerBase::AddError(EFormatErrorType type, int line) { - if (!HandleErrors) - return; - Errors.push_back(std::make_pair(type, line)); -} - -void TRobotsTxtRulesHandlerBase::ResetOptimized() noexcept { - for (ui32 i = 0; i < OptimizedBotIdToStoredBotId.size(); ++i) { - OptimizedBotIdToStoredBotId[i] = i; // by default, every bot maps to itself - } -} - -void TRobotsTxtRulesHandlerBase::Clear() { - SiteMaps.clear(); - CleanParams.clear(); - HostDirective = ""; - if (HandleErrors) { - AcceptedLines.clear(); - CrossSectionAcceptedLines.clear(); - Errors.clear(); - } - - for (size_t botId = 0; botId < BotIdToInfo.size(); ++botId) { - BotIdToInfo[botId].CrawlDelay = -1; - } - - LoadedBotIds.clear(); -} - -void TRobotsTxtRulesHandlerBase::ClearInternal(const ui32 botId) { - CheckBotIdValidity(botId); - BotIdToInfo[botId].CrawlDelay = -1; - - TVector<TBotIdAcceptedLine> newAcceptedLines; - for (size_t i = 0; i < AcceptedLines.size(); ++i) - if (AcceptedLines[i].first != botId) - newAcceptedLines.push_back(AcceptedLines[i]); - - AcceptedLines.swap(newAcceptedLines); -} - -int TRobotsTxtRulesHandlerBase::CheckHost(const char* host) { - THttpURL parsed; - TString copyHost = host; - - if (GetHttpPrefixSize(copyHost) == 0) { - copyHost = TString("http://") + copyHost; - } - - return parsed.Parse(copyHost.data(), THttpURL::FeaturesRobot) == THttpURL::ParsedOK && parsed.GetField(THttpURL::FieldHost) != TString(""); -} - -int TRobotsTxtRulesHandlerBase::CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl) { - if (host != nullptr && strlen(url) > 0 && url[0] == '/') { - modifiedUrl = TString(host) + url; - } else { - modifiedUrl = url; - } - - url = modifiedUrl.data(); - - if (strlen(url) >= URL_MAX - 8) - return 0; - THttpURL parsed; - if (parsed.Parse(url, THttpURL::FeaturesRobot) || !parsed.IsValidAbs()) - return 0; - if (parsed.GetScheme() != THttpURL::SchemeHTTP && parsed.GetScheme() != THttpURL::SchemeHTTPS) - return 0; - return CheckHost(parsed.PrintS(THttpURL::FlagHostPort).data()); -} - -// s - is space separated pair of clean-params (separated by &) and path prefix -int TRobotsTxtRulesHandlerBase::CheckAndNormCleanParam(TString& value) { - if (value.find(' ') == TString::npos) { - value.push_back(' '); - } - - const char* s = value.data(); - if (!s || !*s || strlen(s) > URL_MAX / 2 - 9) - return 0; - const char* p = s; - while (*p && !isspace(*p)) - ++p; - for (; s != p; ++s) { - // allowed only following not alpha-numerical symbols - if (!isalnum(*s) && !strchr("+-=_&%[]{}():.", *s)) - return 0; - // clean-params for prefix can be enumerated by & symbol, && not allowed syntax - if (*s == '&' && *(s + 1) == '&') - return 0; - } - const char* pathPrefix = p + 1; - while (isspace(*p)) - ++p; - char r[URL_MAX]; - char* pr = r; - for (; *p; ++p) { - if (!isalnum(*p) && !strchr(".-/*_,;:%", *p)) - return 0; - if (*p == '*') - *pr++ = '.'; - if (*p == '.') - *pr++ = '\\'; - *pr++ = *p; - } - *pr++ = '.'; - *pr++ = '*'; - *pr = 0; - TString params = value.substr(0, pathPrefix - value.data()); - value = params + r; - return 1; -} - -int TRobotsTxtRulesHandlerBase::ParseCrawlDelay(const char* value, int& crawlDelay) { - static const int MAX_CRAWL_DELAY = 1 << 10; - int val = 0; - const char* p = value; - for (; isdigit(*p); ++p) { - val = val * 10 + *p - '0'; - if (val > MAX_CRAWL_DELAY) - return 0; - } - if (*p) { - if (*p++ != '.') - return 0; - if (strspn(p, "1234567890") != strlen(p)) - return 0; - } - for (const char* s = p; s - p < 3; ++s) - val = val * 10 + (s < p + strlen(p) ? *s - '0' : 0); - crawlDelay = val; - return 1; -} - -bool TRobotsTxtRulesHandlerBase::AddRuleWithErrorCheck(const ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser) { - if (!IsBotIdSupported(botId)) - return true; - - if (!AddRule(botId, rule, type)) { - AddError(ERROR_ROBOTS_HUGE, parser.GetLineNumber()); - AfterParse(botId); - return false; - } - return true; -} - -int TRobotsTxtRulesHandlerBase::OnHost(const ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler) { - // Temporary hack for correct repacking robots.txt from new format to old - // Remove it, when robot-stable-2010-10-17 will be deployed in production - if (!IsBotIdSupported(botId)) - return 0; - // end of hack - - if (rulesHandler->HostDirective != "") - rulesHandler->AddError(ERROR_HOST_MULTI, parser.GetLineNumber()); - else { - if (!CheckHost(value)) - rulesHandler->AddError(ERROR_HOST_FORMAT, parser.GetLineNumber()); - else { - rulesHandler->SetHostDirective(value); - if (!rulesHandler->AddRuleWithErrorCheck(botId, value, 'H', parser)) - return 2; - } - } - return 0; -} - -bool TRobotsTxtRulesHandlerBase::IsBotIdLoaded(const ui32 botId) const { - return LoadedBotIds.contains(botId); -} - -bool TRobotsTxtRulesHandlerBase::IsBotIdSupported(const ui32 botId) const { - return (SaveDataForAnyBot && botId == robotstxtcfg::id_anybot) || SupportedBotIds.contains(botId); -} - -ui32 TRobotsTxtRulesHandlerBase::GetNotOptimizedBotId(const ui32 botId) const { - return (botId < OptimizedBotIdToStoredBotId.size()) - ? OptimizedBotIdToStoredBotId[botId] - : botId; -} - -TMaybe<ui32> TRobotsTxtRulesHandlerBase::GetMappedBotId(ui32 botId, bool useAny) const { - botId = GetNotOptimizedBotId(botId); - CheckBotIdValidity(botId); - if (IsBotIdLoaded(botId)) - return botId; - if (useAny) - return robotstxtcfg::id_anybot; - return {}; -} diff --git a/library/cpp/robots_txt/ya.make b/library/cpp/robots_txt/ya.make deleted file mode 100644 index c12b57ea04..0000000000 --- a/library/cpp/robots_txt/ya.make +++ /dev/null @@ -1,18 +0,0 @@ -LIBRARY() - -SRCS( - prefix_tree.cpp - prefix_tree_rules_handler.cpp - robots_txt_parser.cpp - rules_handler.cpp -) - -PEERDIR( - library/cpp/robots_txt/robotstxtcfg - library/cpp/case_insensitive_string - library/cpp/charset - library/cpp/string_utils/url - library/cpp/uri -) - -END() diff --git a/library/cpp/yconf/CMakeLists.darwin-x86_64.txt b/library/cpp/yconf/CMakeLists.darwin-x86_64.txt deleted file mode 100644 index 4e5bbf836d..0000000000 --- a/library/cpp/yconf/CMakeLists.darwin-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-yconf) -target_link_libraries(library-cpp-yconf PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset - library-cpp-logger -) -target_sources(library-cpp-yconf PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp -) diff --git a/library/cpp/yconf/CMakeLists.linux-aarch64.txt b/library/cpp/yconf/CMakeLists.linux-aarch64.txt deleted file mode 100644 index 8ddf881133..0000000000 --- a/library/cpp/yconf/CMakeLists.linux-aarch64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-yconf) -target_link_libraries(library-cpp-yconf PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset - library-cpp-logger -) -target_sources(library-cpp-yconf PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp -) diff --git a/library/cpp/yconf/CMakeLists.linux-x86_64.txt b/library/cpp/yconf/CMakeLists.linux-x86_64.txt deleted file mode 100644 index 8ddf881133..0000000000 --- a/library/cpp/yconf/CMakeLists.linux-x86_64.txt +++ /dev/null @@ -1,20 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-yconf) -target_link_libraries(library-cpp-yconf PUBLIC - contrib-libs-linux-headers - contrib-libs-cxxsupp - yutil - library-cpp-charset - library-cpp-logger -) -target_sources(library-cpp-yconf PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp -) diff --git a/library/cpp/yconf/CMakeLists.txt b/library/cpp/yconf/CMakeLists.txt deleted file mode 100644 index f8b31df0c1..0000000000 --- a/library/cpp/yconf/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-aarch64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - include(CMakeLists.darwin-x86_64.txt) -elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) - include(CMakeLists.windows-x86_64.txt) -elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) - include(CMakeLists.linux-x86_64.txt) -endif() diff --git a/library/cpp/yconf/CMakeLists.windows-x86_64.txt b/library/cpp/yconf/CMakeLists.windows-x86_64.txt deleted file mode 100644 index 4e5bbf836d..0000000000 --- a/library/cpp/yconf/CMakeLists.windows-x86_64.txt +++ /dev/null @@ -1,19 +0,0 @@ - -# This file was generated by the build system used internally in the Yandex monorepo. -# Only simple modifications are allowed (adding source-files to targets, adding simple properties -# like target_include_directories). These modifications will be ported to original -# ya.make files by maintainers. Any complex modifications which can't be ported back to the -# original buildsystem will not be accepted. - - - -add_library(library-cpp-yconf) -target_link_libraries(library-cpp-yconf PUBLIC - contrib-libs-cxxsupp - yutil - library-cpp-charset - library-cpp-logger -) -target_sources(library-cpp-yconf PRIVATE - ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp -) |