diff options
author | vvvv <vvvv@ydb.tech> | 2023-07-31 18:21:04 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-07-31 18:21:04 +0300 |
commit | dec41c40e51aa407edef81a3c566a5a15780fc49 (patch) | |
tree | 4f197b596b32f35eca368121f0dff913419da9af /library/cpp/reverse_geocoder/core | |
parent | 3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff) | |
download | ydb-dec41c40e51aa407edef81a3c566a5a15780fc49.tar.gz |
YQL-16239 Move purecalc to public
Diffstat (limited to 'library/cpp/reverse_geocoder/core')
38 files changed, 1537 insertions, 0 deletions
diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..17f6e79c96 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,35 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..02361a0a1a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..02361a0a1a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.txt b/library/cpp/reverse_geocoder/core/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..17f6e79c96 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt @@ -0,0 +1,35 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/area_box.cpp b/library/cpp/reverse_geocoder/core/area_box.cpp new file mode 100644 index 0000000000..67038fe4f8 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/area_box.cpp @@ -0,0 +1,9 @@ +#include "area_box.h" + +using namespace NReverseGeocoder; + +TRef NReverseGeocoder::LookupAreaBox(const TPoint& point) { + const TRef boxX = (point.X - NAreaBox::LowerX) / NAreaBox::DeltaX; + const TRef boxY = (point.Y - NAreaBox::LowerY) / NAreaBox::DeltaY; + return boxX * NAreaBox::NumberY + boxY; +} diff --git a/library/cpp/reverse_geocoder/core/area_box.h b/library/cpp/reverse_geocoder/core/area_box.h new file mode 100644 index 0000000000..1077a65fef --- /dev/null +++ b/library/cpp/reverse_geocoder/core/area_box.h @@ -0,0 +1,34 @@ +#pragma once + +#include "common.h" +#include "point.h" + +namespace NReverseGeocoder { + namespace NAreaBox { + const TCoordinate LowerX = ToCoordinate(-180.0); + const TCoordinate UpperX = ToCoordinate(180.0); + const TCoordinate LowerY = ToCoordinate(-90.0); + const TCoordinate UpperY = ToCoordinate(90.0); + const TCoordinate DeltaX = ToCoordinate(0.1); + const TCoordinate DeltaY = ToCoordinate(0.1); + const TCoordinate NumberX = (UpperX - LowerX) / DeltaX; + const TCoordinate NumberY = (UpperY - LowerY) / DeltaY; + const TCoordinate Number = NumberX * NumberY; + + } + + // Area of geo territory. Variable PolygonRefsOffset refers to the polygons lying inside this + // area. Geo map is divided into equal bounding boxes from (NAreaBox::LowerX, NAreaBox::LowerY) + // to (NAreaBox::UpperX, NAreaBox::UpperY) with DeltaX and DeltaY sizes. Logic of filling is in + // generator. + struct Y_PACKED TAreaBox { + TNumber PolygonRefsOffset; + TNumber PolygonRefsNumber; + }; + + static_assert(sizeof(TAreaBox) == 8, "NReverseGeocoder::TAreaBox size mismatch"); + + // Determine in wich area box in geoData is point. + TRef LookupAreaBox(const TPoint& point); + +} diff --git a/library/cpp/reverse_geocoder/core/bbox.cpp b/library/cpp/reverse_geocoder/core/bbox.cpp new file mode 100644 index 0000000000..aa4258ac22 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/bbox.cpp @@ -0,0 +1 @@ +#include "bbox.h" diff --git a/library/cpp/reverse_geocoder/core/bbox.h b/library/cpp/reverse_geocoder/core/bbox.h new file mode 100644 index 0000000000..e8b6e00aa3 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/bbox.h @@ -0,0 +1,66 @@ +#pragma once + +#include "common.h" +#include "point.h" + +#include <util/generic/utility.h> + +namespace NReverseGeocoder { + struct Y_PACKED TBoundingBox { + TCoordinate X1; + TCoordinate Y1; + TCoordinate X2; + TCoordinate Y2; + + TBoundingBox() + : X1(0) + , Y1(0) + , X2(0) + , Y2(0) + { + } + + TBoundingBox(TCoordinate x1, TCoordinate y1, TCoordinate x2, TCoordinate y2) + : X1(x1) + , Y1(y1) + , X2(x2) + , Y2(y2) + { + } + + TBoundingBox(const TPoint* points, TNumber number) { + Init(); + for (TNumber i = 0; i < number; ++i) + Relax(points[i]); + } + + void Init() { + X1 = ToCoordinate(180.0); + Y1 = ToCoordinate(90.0); + X2 = ToCoordinate(-180.0); + Y2 = ToCoordinate(-90.0); + } + + void Relax(const TPoint& p) { + X1 = Min(X1, p.X); + Y1 = Min(Y1, p.Y); + X2 = Max(X2, p.X); + Y2 = Max(Y2, p.Y); + } + + bool HasIntersection(const TBoundingBox& r) const { + if (X1 > r.X2 || X2 < r.X1 || Y1 > r.Y2 || Y2 < r.Y1) + return false; + return true; + } + + bool Contains(const TPoint& p) const { + if (p.X < X1 || p.X > X2 || p.Y < Y1 || p.Y > Y2) + return false; + return true; + } + }; + + static_assert(sizeof(TBoundingBox) == 16, "NReverseGeocoder::TBoundingBox size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/common.cpp b/library/cpp/reverse_geocoder/core/common.cpp new file mode 100644 index 0000000000..67c02a20a0 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/common.cpp @@ -0,0 +1 @@ +#include "common.h" diff --git a/library/cpp/reverse_geocoder/core/common.h b/library/cpp/reverse_geocoder/core/common.h new file mode 100644 index 0000000000..090407ffd9 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/common.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/system/compiler.h> +#include <util/system/types.h> + +namespace NReverseGeocoder { + using TCoordinate = i32; + using TGeoId = ui64; + using TNumber = ui32; + using TRef = ui32; + using TSquare = i64; + using TVersion = ui64; + + const double EARTH_RADIUS = 6371000.0; + + inline TCoordinate ToCoordinate(double x) { + return x * 1e6; + } + + inline double ToDouble(TCoordinate x) { + return x / 1e6; + } + +} diff --git a/library/cpp/reverse_geocoder/core/edge.cpp b/library/cpp/reverse_geocoder/core/edge.cpp new file mode 100644 index 0000000000..86c6ab8535 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/edge.cpp @@ -0,0 +1 @@ +#include "edge.h" diff --git a/library/cpp/reverse_geocoder/core/edge.h b/library/cpp/reverse_geocoder/core/edge.h new file mode 100644 index 0000000000..9d20928857 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/edge.h @@ -0,0 +1,101 @@ +#pragma once + +#include "common.h" +#include "point.h" + +#include <util/generic/utility.h> +#include <util/system/yassert.h> + +namespace NReverseGeocoder { + // TEdge is a type, which represent polygon edge, Beg/End refers on begin/End edge points in + // geographical data. + struct Y_PACKED TEdge { + TRef Beg; + TRef End; + + TEdge() + : Beg(0) + , End(0) + { + } + + TEdge(const TRef& a, const TRef& b) + : Beg(a) + , End(b) + { + } + + bool operator==(const TEdge& e) const { + return Beg == e.Beg && End == e.End; + } + + bool operator!=(const TEdge& e) const { + return Beg != e.Beg || End != e.End; + } + + bool operator<(const TEdge& e) const { + return Beg < e.Beg || (Beg == e.Beg && End < e.End); + } + + // Checks that current edge is lying lower then other edge. Both edges must have a common X + // values, otherwise the behavior is undefined. + bool Lower(const TEdge& e, const TPoint* points) const { + if (*this == e) + return false; + + const TPoint& a1 = points[Beg]; + const TPoint& a2 = points[End]; + const TPoint& b1 = points[e.Beg]; + const TPoint& b2 = points[e.End]; + + Y_ASSERT(a1.X <= a2.X && b1.X <= b2.X); + + if (a1 == b1) { + return (a2 - a1).Cross(b2 - a1) > 0; + } else if (a2 == b2) { + return (a1 - b1).Cross(b2 - b1) > 0; + } else if (b1.X >= a1.X && b1.X <= a2.X) { + return (a2 - a1).Cross(b1 - a1) > 0; + } else if (b2.X >= a1.X && b2.X <= a2.X) { + return (a2 - a1).Cross(b2 - a1) > 0; + } else if (a1.X >= b1.X && a1.X <= b2.X) { + return (a1 - b1).Cross(b2 - b1) > 0; + } else if (a2.X >= b1.X && a2.X <= b2.X) { + return (a2 - b1).Cross(b2 - b1) > 0; + } else { + return false; + } + } + + // Checks that current edge lying lower then given point. Edge and point must have a common X + // values, otherwise the behavior is undefined. + bool Lower(const TPoint& p, const TPoint* points) const { + if (Contains(p, points)) + return false; + + TPoint a = points[Beg]; + TPoint b = points[End]; + + if (a.X > b.X) + DoSwap(a, b); + + return (b - a).Cross(p - a) > 0; + } + + bool Contains(const TPoint& p, const TPoint* points) const { + TPoint a = points[Beg]; + TPoint b = points[End]; + + if (a.X > b.X) + DoSwap(a, b); + + if (p.X < a.X || p.X > b.X) + return false; + + return (b - a).Cross(p - a) == 0; + } + }; + + static_assert(sizeof(TEdge) == 8, "NReverseGeocoder::TEdge size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.cpp b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp new file mode 100644 index 0000000000..4db0534b22 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp @@ -0,0 +1,74 @@ +#include "debug.h" + +#include <library/cpp/reverse_geocoder/library/log.h> +#include <library/cpp/reverse_geocoder/library/memory.h> + +using namespace NReverseGeocoder; +using namespace NGeoData; + +size_t NReverseGeocoder::NGeoData::Space(const IGeoData& g) { + size_t space = 0; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + space += sizeof(TVar); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + space += sizeof(TNumber) + sizeof(TArr) * g.Arr##Number(); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return space; +} + +template <typename TArr> +static float ArraySpace(TNumber number) { + return number * sizeof(TArr) * 1.0 / MB; +} + +void NReverseGeocoder::NGeoData::Show(IOutputStream& out, const IGeoData& g) { + out << "GeoData = " << NGeoData::Space(g) * 1.0 / GB << " GB" << '\n'; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + out << " GeoData." << #Var << " = " << (unsigned long long)g.Var() << '\n'; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + out << " GeoData." << #Arr << " = " \ + << g.Arr##Number() << " x " << sizeof(TArr) << " = " \ + << ArraySpace<TArr>(g.Arr##Number()) << " MB" \ + << '\n'; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR +} + +template <typename TArr> +static bool Equals(const TArr* a, const TArr* b, size_t count) { + return !memcmp(a, b, sizeof(TArr) * count); +} + +bool NReverseGeocoder::NGeoData::Equals(const IGeoData& a, const IGeoData& b) { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + if (a.Var() != b.Var()) { \ + LogError(#Var " not equal"); \ + return false; \ + } + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (!::Equals(a.Arr(), b.Arr(), a.Arr##Number())) { \ + LogError(#Arr " not equal"); \ + return false; \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return true; +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.h b/library/cpp/reverse_geocoder/core/geo_data/debug.h new file mode 100644 index 0000000000..e7a4d9029c --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/debug.h @@ -0,0 +1,16 @@ +#pragma once + +#include "geo_data.h" + +#include <util/stream/output.h> + +namespace NReverseGeocoder { + namespace NGeoData { + size_t Space(const IGeoData& g); + + void Show(IOutputStream& out, const IGeoData& g); + + bool Equals(const IGeoData& a, const IGeoData& b); + + } +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.cpp b/library/cpp/reverse_geocoder/core/geo_data/def.cpp new file mode 100644 index 0000000000..bb9f760d73 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/def.cpp @@ -0,0 +1 @@ +#include "def.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.h b/library/cpp/reverse_geocoder/core/geo_data/def.h new file mode 100644 index 0000000000..d3e331d873 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/def.h @@ -0,0 +1,35 @@ +#pragma once + +#include <library/cpp/reverse_geocoder/core/area_box.h> +#include <library/cpp/reverse_geocoder/core/common.h> +#include <library/cpp/reverse_geocoder/core/edge.h> +#include <library/cpp/reverse_geocoder/core/kv.h> +#include <library/cpp/reverse_geocoder/core/part.h> +#include <library/cpp/reverse_geocoder/core/point.h> +#include <library/cpp/reverse_geocoder/core/polygon.h> +#include <library/cpp/reverse_geocoder/core/region.h> + +namespace NReverseGeocoder { + const TVersion GEO_DATA_VERSION_0 = 0; + const TVersion GEO_DATA_VERSION_1 = 1; + + const TVersion GEO_DATA_CURRENT_VERSION = GEO_DATA_VERSION_1; + +// Geographical data definition. This define need for reflection in map/unmap, show, etc. +#define GEO_BASE_DEF_GEO_DATA \ + GEO_BASE_DEF_VAR(TVersion, Version); \ + GEO_BASE_DEF_ARR(TPoint, Points); \ + GEO_BASE_DEF_ARR(TEdge, Edges); \ + GEO_BASE_DEF_ARR(TRef, EdgeRefs); \ + GEO_BASE_DEF_ARR(TPart, Parts); \ + GEO_BASE_DEF_ARR(TPolygon, Polygons); \ + GEO_BASE_DEF_ARR(TRef, PolygonRefs); \ + GEO_BASE_DEF_ARR(TAreaBox, Boxes); \ + GEO_BASE_DEF_ARR(char, Blobs); \ + GEO_BASE_DEF_ARR(TKv, Kvs); \ + GEO_BASE_DEF_ARR(TRegion, Regions); \ + GEO_BASE_DEF_ARR(TRawPolygon, RawPolygons); \ + GEO_BASE_DEF_ARR(TRef, RawEdgeRefs); \ + // #define GEO_BASE_DEF_GEO_DATA + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp new file mode 100644 index 0000000000..be3310b291 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp @@ -0,0 +1 @@ +#include "geo_data.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.h b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h new file mode 100644 index 0000000000..7cb76bcddc --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h @@ -0,0 +1,24 @@ +#pragma once + +#include "def.h" + +namespace NReverseGeocoder { + class IGeoData { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + virtual const TVar& Var() const = 0; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + virtual const TArr* Arr() const = 0; \ + virtual TNumber Arr##Number() const = 0; + + public: + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + virtual ~IGeoData() { + } + }; + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.cpp b/library/cpp/reverse_geocoder/core/geo_data/map.cpp new file mode 100644 index 0000000000..312f7d7cb0 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/map.cpp @@ -0,0 +1,203 @@ +#include "map.h" + +#include <library/cpp/reverse_geocoder/library/log.h> +#include <library/cpp/reverse_geocoder/library/system.h> +#include <library/cpp/reverse_geocoder/proto/geo_data.pb.h> + +#include <library/cpp/digest/crc32c/crc32c.h> + +#include <util/generic/algorithm.h> +#include <util/generic/buffer.h> +#include <util/generic/vector.h> +#include <util/network/address.h> +#include <util/system/filemap.h> +#include <util/system/unaligned_mem.h> + +using namespace NReverseGeocoder; + +static const TNumber CRC_SIZE = 3; + +void NReverseGeocoder::TGeoDataMap::Init() { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + Var##_ = TVar(); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + Arr##_ = nullptr; \ + Arr##Number_ = 0; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR +} + +NReverseGeocoder::TGeoDataMap::TGeoDataMap() + : Data_(nullptr) + , Size_(0) +{ + Init(); +} + +static bool CheckMemoryConsistency(const NProto::TGeoData& g) { + TVector<std::pair<intptr_t, intptr_t>> segments; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + // undef + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + if (g.Get##Arr##Number() > 0) { \ + intptr_t const beg = g.Get##Arr(); \ + intptr_t const end = g.Get##Arr() + g.Get##Arr##Number() * sizeof(TArr); \ + segments.emplace_back(beg, end); \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + Sort(segments.begin(), segments.end()); + + for (size_t i = 0; i + 1 < segments.size(); ++i) + if (segments[i].second > segments[i + 1].first) + return false; + + return true; +} + +void NReverseGeocoder::TGeoDataMap::Remap() { + Init(); + + if (!Data_) + return; + + const ui64 headerSize = ntohl(ReadUnaligned<ui64>(Data_)); + + NProto::TGeoData header; + if (!header.ParseFromArray(Data_ + sizeof(ui64), headerSize)) + ythrow yexception() << "Unable parse geoData header"; + + if (header.GetMagic() != SYSTEM_ENDIAN_FLAG) + ythrow yexception() << "Different endianness in geoData and host"; + + if (!CheckMemoryConsistency(header)) + ythrow yexception() << "Memory is not consistent!"; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + Var##_ = header.Get##Var(); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (Arr##Number() > 0) { \ + const intptr_t offset = header.Get##Arr(); \ + Arr##_ = (TArr*)(((intptr_t)Data_) + offset); \ + const ui32 hash = Crc32c(Arr##_, std::min(Arr##Number_, CRC_SIZE) * sizeof(TArr)); \ + if (hash != header.Get##Arr##Crc32()) \ + ythrow yexception() << "Wrong crc32 for " << #Arr; \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + if (Version() != GEO_DATA_CURRENT_VERSION) + ythrow yexception() << "Unable use version " << Version() + << "(current version is " << GEO_DATA_CURRENT_VERSION << ")"; +} + +static size_t HeaderSize() { + NProto::TGeoData header; + header.SetMagic(std::numeric_limits<decltype(header.GetMagic())>::max()); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + header.Set##Var(std::numeric_limits<decltype(header.Get##Var())>::max()); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + header.Set##Arr(std::numeric_limits<decltype(header.Get##Arr())>::max()); \ + header.Set##Arr##Crc32(std::numeric_limits<decltype(header.Get##Arr##Crc32())>::max()); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return header.ByteSize(); +} + +static const char* Serialize(const IGeoData& g, TBlockAllocator* allocator, size_t* size) { + size_t const preAllocatedSize = allocator->TotalAllocatedSize(); + char* data = (char*)allocator->Allocate(HeaderSize() + sizeof(ui64)); + + NProto::TGeoData header; + header.SetMagic(SYSTEM_ENDIAN_FLAG); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + header.Set##Var(g.Var()); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (g.Arr##Number() > 0) { \ + TArr* arr = (TArr*)allocator->Allocate(sizeof(TArr) * g.Arr##Number()); \ + memcpy(arr, g.Arr(), sizeof(TArr) * g.Arr##Number()); \ + header.Set##Arr((ui64)(((intptr_t)arr) - ((intptr_t)data))); \ + header.Set##Arr##Crc32(Crc32c(arr, std::min(g.Arr##Number(), CRC_SIZE) * sizeof(TArr))); \ + }; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + const auto str = header.SerializeAsString(); + WriteUnaligned<ui64>(data, (ui64)htonl(str.size())); + memcpy(data + sizeof(ui64), str.data(), str.size()); + + if (size) + *size = allocator->TotalAllocatedSize() - preAllocatedSize; + + return data; +} + +static size_t TotalByteSize(const IGeoData& g) { + size_t total_size = TBlockAllocator::AllocateSize(HeaderSize() + sizeof(ui64)); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + // undef + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + total_size += TBlockAllocator::AllocateSize(sizeof(TArr) * g.Arr##Number()); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return total_size; +} + +NReverseGeocoder::TGeoDataMap::TGeoDataMap(const IGeoData& geoData, TBlockAllocator* allocator) + : TGeoDataMap() +{ + Data_ = Serialize(geoData, allocator, &Size_); + Remap(); +} + +void NReverseGeocoder::TGeoDataMap::SerializeToFile(const TString& path, const IGeoData& data) { + TBlob data_blob = SerializeToBlob(data); + + TFile file(path, CreateAlways | RdWr); + file.Write(data_blob.Data(), data_blob.Length()); +} + +TBlob NReverseGeocoder::TGeoDataMap::SerializeToBlob(const IGeoData& data) { + TBuffer buf; + buf.Resize(TotalByteSize(data)); + memset(buf.data(), 0, buf.size()); + + TBlockAllocator allocator(buf.Data(), buf.Size()); + TGeoDataMap(data, &allocator); + + return TBlob::FromBuffer(buf); +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.h b/library/cpp/reverse_geocoder/core/geo_data/map.h new file mode 100644 index 0000000000..e466bd912e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/map.h @@ -0,0 +1,89 @@ +#pragma once + +#include "geo_data.h" + +#include <library/cpp/reverse_geocoder/library/block_allocator.h> + +#include <util/memory/blob.h> + +namespace NReverseGeocoder { + class TGeoDataMap: public IGeoData, public TNonCopyable { +#define GEO_BASE_DEF_VAR(TVar, Var) \ +public: \ + const TVar& Var() const override { \ + return Var##_; \ + } \ + \ +private: \ + TVar Var##_; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ +public: \ + const TArr* Arr() const override { \ + return Arr##_; \ + } \ + TNumber Arr##Number() const override { \ + return Arr##Number_; \ + } \ + \ +private: \ + TNumber Arr##Number_; \ + const TArr* Arr##_; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + public: + TGeoDataMap(); + + static void SerializeToFile(const TString& path, const IGeoData& data); + + static TBlob SerializeToBlob(const IGeoData& data); + + TGeoDataMap(const IGeoData& data, TBlockAllocator* allocator); + + TGeoDataMap(const char* data, size_t size) + : TGeoDataMap() + { + Data_ = data; + Size_ = size; + Remap(); + } + + TGeoDataMap(TGeoDataMap&& dat) + : TGeoDataMap() + { + DoSwap(Data_, dat.Data_); + DoSwap(Size_, dat.Size_); + Remap(); + dat.Remap(); + } + + TGeoDataMap& operator=(TGeoDataMap&& dat) { + DoSwap(Data_, dat.Data_); + DoSwap(Size_, dat.Size_); + Remap(); + dat.Remap(); + return *this; + } + + const char* Data() const { + return Data_; + } + + size_t Size() const { + return Size_; + } + + private: + void Init(); + + void Remap(); + + const char* Data_; + size_t Size_; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp new file mode 100644 index 0000000000..5ff2d13783 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp @@ -0,0 +1 @@ +#include "proxy.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.h b/library/cpp/reverse_geocoder/core/geo_data/proxy.h new file mode 100644 index 0000000000..fecb9fc7cf --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.h @@ -0,0 +1,68 @@ +#pragma once + +#include "geo_data.h" +#include "map.h" + +#include <util/generic/ptr.h> +#include <util/system/filemap.h> + +namespace NReverseGeocoder { + class IGeoDataProxy { + public: + virtual const IGeoData* GeoData() const = 0; + + virtual ~IGeoDataProxy() { + } + }; + + using TGeoDataProxyPtr = THolder<IGeoDataProxy>; + + class TGeoDataMapProxy: public IGeoDataProxy, public TNonCopyable { + public: + explicit TGeoDataMapProxy(const char* path) + : MemFile_(path) + { + MemFile_.Map(0, MemFile_.Length()); + GeoData_ = TGeoDataMap((const char*)MemFile_.Ptr(), MemFile_.MappedSize()); + } + + const IGeoData* GeoData() const override { + return &GeoData_; + } + + private: + TFileMap MemFile_; + TGeoDataMap GeoData_; + }; + + class TGeoDataWrapper: public IGeoDataProxy, public TNonCopyable { + public: + explicit TGeoDataWrapper(const IGeoData& g) + : GeoData_(&g) + { + } + + const IGeoData* GeoData() const override { + return GeoData_; + } + + private: + const IGeoData* GeoData_; + }; + + class TGeoDataRawProxy: public IGeoDataProxy, public TNonCopyable { + public: + TGeoDataRawProxy(const char* data, size_t dataSize) + : GeoData_(data, dataSize) + { + } + + const IGeoData* GeoData() const override { + return &GeoData_; + } + + private: + TGeoDataMap GeoData_; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/kv.cpp b/library/cpp/reverse_geocoder/core/kv.cpp new file mode 100644 index 0000000000..a48e9c947e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/kv.cpp @@ -0,0 +1 @@ +#include "kv.h" diff --git a/library/cpp/reverse_geocoder/core/kv.h b/library/cpp/reverse_geocoder/core/kv.h new file mode 100644 index 0000000000..639c21de52 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/kv.h @@ -0,0 +1,13 @@ +#pragma once + +#include "common.h" + +namespace NReverseGeocoder { + // k and v is offsets on blobs in geographical data blobs array. See geo_data.h + // for details. + struct TKv { + TNumber K; + TNumber V; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/location.cpp b/library/cpp/reverse_geocoder/core/location.cpp new file mode 100644 index 0000000000..b2d2f54d12 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/location.cpp @@ -0,0 +1 @@ +#include "location.h" diff --git a/library/cpp/reverse_geocoder/core/location.h b/library/cpp/reverse_geocoder/core/location.h new file mode 100644 index 0000000000..5aa3198684 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/location.h @@ -0,0 +1,21 @@ +#pragma once + +namespace NReverseGeocoder { + struct TLocation { + double Lon; + double Lat; + + TLocation() + : Lon(0) + , Lat(0) + { + } + + TLocation(double lon, double lat) + : Lon(lon) + , Lat(lat) + { + } + }; + +} diff --git a/library/cpp/reverse_geocoder/core/part.cpp b/library/cpp/reverse_geocoder/core/part.cpp new file mode 100644 index 0000000000..c973d2171a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/part.cpp @@ -0,0 +1,29 @@ +#include "part.h" + +#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> + +#include <util/generic/algorithm.h> + +using namespace NReverseGeocoder; + +bool NReverseGeocoder::TPart::Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const { + auto edgeRefsBegin = UnalignedIter(edgeRefs) + EdgeRefsOffset; + auto edgeRefsEnd = edgeRefsBegin + edgeRefsNumber; + + // Find lower bound edge, which lying below given point. + auto cmp = [&](const TRef& e, const TPoint& p) { + return edges[e].Lower(p, points); + }; + + auto edgeRef = LowerBound(edgeRefsBegin, edgeRefsEnd, point, cmp); + + if (edgeRef == edgeRefsEnd) + return false; + + if (edges[*edgeRef].Contains(point, points)) + return true; + + // If the point is inside of the polygon then it will intersect the edge an odd number of times. + return (edgeRef - edgeRefsBegin) % 2 == 1; +} diff --git a/library/cpp/reverse_geocoder/core/part.h b/library/cpp/reverse_geocoder/core/part.h new file mode 100644 index 0000000000..9b24fee96f --- /dev/null +++ b/library/cpp/reverse_geocoder/core/part.h @@ -0,0 +1,26 @@ +#pragma once + +#include "common.h" +#include "edge.h" +#include "point.h" + +namespace NReverseGeocoder { + // TPart contains version of persistent scanline. Parts lying in geofraphical data parts array, + // ordered by Coordinate for each polygon. Variable EdgeRefsOffset refers on EdgeRefs array for + // this part. For optimal usage of memory, part does not contain "EdgeRefsNumber" variable, because + // it's can be computed as parts[i + 1].EdgeRefsOffset - parts[i].EdgeRefsOffset for every part + // in geographical data. Especially for this, added fake part into IGeoData with correct + // EdgeRefsOffset. Refs in EdgeRefs are in increasing order for each part. It is necessary to + // quickly determine how many edges is under the point. See generator/ for details. + struct Y_PACKED TPart { + TCoordinate Coordinate; + TNumber EdgeRefsOffset; + + // Checks point lying under odd numbers of edges or on edge. + bool Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const; + }; + + static_assert(sizeof(TPart) == 8, "NReverseGeocoder::TPart size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/point.cpp b/library/cpp/reverse_geocoder/core/point.cpp new file mode 100644 index 0000000000..396e27e596 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/point.cpp @@ -0,0 +1 @@ +#include "point.h" diff --git a/library/cpp/reverse_geocoder/core/point.h b/library/cpp/reverse_geocoder/core/point.h new file mode 100644 index 0000000000..75f1dfc1b4 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/point.h @@ -0,0 +1,52 @@ +#pragma once + +#include "common.h" +#include "location.h" + +namespace NReverseGeocoder { + struct Y_PACKED TPoint { + TCoordinate X; + TCoordinate Y; + + TPoint() + : X(0) + , Y(0) + { + } + + TPoint(const TCoordinate& x1, const TCoordinate& y1) + : X(x1) + , Y(y1) + { + } + + explicit TPoint(const TLocation& l) + : X(ToCoordinate(l.Lon)) + , Y(ToCoordinate(l.Lat)) + { + } + + TPoint operator-(const TPoint& p) const { + return TPoint(X - p.X, Y - p.Y); + } + + bool operator==(const TPoint& b) const { + return X == b.X && Y == b.Y; + } + + bool operator!=(const TPoint& b) const { + return X != b.X || Y != b.Y; + } + + bool operator<(const TPoint& b) const { + return X < b.X || (X == b.X && Y < b.Y); + } + + TSquare Cross(const TPoint& p) const { + return 1ll * X * p.Y - 1ll * Y * p.X; + } + }; + + static_assert(sizeof(TPoint) == 8, "NReverseGeocoder::TPoint size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/polygon.cpp b/library/cpp/reverse_geocoder/core/polygon.cpp new file mode 100644 index 0000000000..2baac2d229 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/polygon.cpp @@ -0,0 +1,91 @@ +#include "polygon.h" + +#include <util/generic/algorithm.h> + +using namespace NReverseGeocoder; + +static bool Check(const TPart* part, const TPoint& point, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) { + const TNumber edgeRefsNumber = (part + 1)->EdgeRefsOffset - part->EdgeRefsOffset; + return part->Contains(point, edgeRefsNumber, edgeRefs, edges, points); +} + +bool NReverseGeocoder::TPolygon::Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const { + if (!Bbox.Contains(point)) + return false; + + parts += PartsOffset; + const TPart* partsEnd = parts + PartsNumber; + + // Find lower bound part, which can contains given point. + const TPart* part = LowerBound(parts, partsEnd, point, [&](const TPart& a, const TPoint& b) { + return a.Coordinate < b.X; + }); + + if (part->Coordinate > point.X) { + if (part == parts) + return false; + --part; + } + + if (point.X < part->Coordinate || point.X > (part + 1)->Coordinate) + return false; + + if (point.X == part->Coordinate) + if (part != parts && Check(part - 1, point, edgeRefs, edges, points)) + return true; + + return Check(part, point, edgeRefs, edges, points); +} + +bool NReverseGeocoder::TPolygonBase::Better(const TPolygonBase& p, const TRegion* regions, + TNumber regionsNumber) const { + if (Square < p.Square) + return true; + + if (Square == p.Square) { + const TRegion* begin = regions; + const TRegion* end = regions + regionsNumber; + + const TRegion* r1 = LowerBound(begin, end, TGeoId(RegionId)); + const TRegion* r2 = LowerBound(begin, end, TGeoId(p.RegionId)); + + if (r1 == end || r1->RegionId != RegionId) + return false; + + if (r2 == end || r2->RegionId != p.RegionId) + return false; + + return r1->Better(*r2); + } + + return false; +} + +bool NReverseGeocoder::TRawPolygon::Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, + const TPoint* points) const { + if (!Bbox.Contains(point)) + return false; + + edgeRefs += EdgeRefsOffset; + + TNumber intersections = 0; + for (TNumber i = 0; i < EdgeRefsNumber; ++i) { + const TEdge& e = edges[edgeRefs[i]]; + + if (e.Contains(point, points)) + return true; + + TPoint a = points[e.Beg]; + TPoint b = points[e.End]; + + if (a.X > b.X) + DoSwap(a, b); + + if (a.X < point.X && b.X >= point.X && e.Lower(point, points)) + ++intersections; + } + + return intersections % 2 == 1; +} diff --git a/library/cpp/reverse_geocoder/core/polygon.h b/library/cpp/reverse_geocoder/core/polygon.h new file mode 100644 index 0000000000..065bba1e38 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/polygon.h @@ -0,0 +1,73 @@ +#pragma once + +#include "bbox.h" +#include "common.h" +#include "edge.h" +#include "part.h" +#include "point.h" +#include "region.h" + +namespace NReverseGeocoder { +#pragma pack(push, 1) + + struct TPolygonBase { + enum EType { + TYPE_UNKNOWN = 0, + TYPE_INNER = 1, + TYPE_OUTER = 2, + }; + + // If TYPE_INNER and polygon contains given point, this means that region with RegionId + // does not contains point. + EType Type; + + ui32 Unused1; + + // Geographical data indetifiers. + TGeoId RegionId; + TGeoId PolygonId; + + // Rectangle in which lies that polygon. + TBoundingBox Bbox; + + // Square of polygon. Need for determine which polygon is better. See better member function. + TSquare Square; + + // Total points number of given polygon. + TNumber PointsNumber; + + // Check that this polygon better then given polygon, which means that this polygons lying + // deeper then given in polygons hierarchy. + bool Better(const TPolygonBase& p, const TRegion* regions, TNumber regionsNumber) const; + }; + + // Polygon is a representation of persistent scanline data structure. + struct TPolygon: public TPolygonBase { + // Versions of persistent scanline. + TNumber PartsOffset; + TNumber PartsNumber; + ui32 Unused2; + + // Fast point in polygon test using persistent scanline. You can see how this data structure + // generated in generator/. + bool Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const; + }; + + static_assert(sizeof(TPolygon) == 64, "NReverseGeocoder::TPolygon size mismatch"); + + // Raw polygon is a polygon representation for slow tests. + struct TRawPolygon: public TPolygonBase { + // Raw polygon edge refs. + TNumber EdgeRefsOffset; + TNumber EdgeRefsNumber; + ui32 Unused2; + + bool Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, + const TPoint* points) const; + }; + + static_assert(sizeof(TRawPolygon) == 64, "NReverseGeocoder::TRawPolygon size mismatch"); + +#pragma pack(pop) +} diff --git a/library/cpp/reverse_geocoder/core/region.cpp b/library/cpp/reverse_geocoder/core/region.cpp new file mode 100644 index 0000000000..62b4acd0a1 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/region.cpp @@ -0,0 +1 @@ +#include "region.h" diff --git a/library/cpp/reverse_geocoder/core/region.h b/library/cpp/reverse_geocoder/core/region.h new file mode 100644 index 0000000000..4b010c7103 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/region.h @@ -0,0 +1,37 @@ +#pragma once + +#include "common.h" + +namespace NReverseGeocoder { + struct Y_PACKED TRegion { + TGeoId RegionId; + TNumber KvsOffset; + TNumber KvsNumber; + TSquare Square; + TNumber PolygonsNumber; + ui32 Unused; + + bool operator==(const TRegion& r) const { + return RegionId == r.RegionId; + } + + bool operator<(const TRegion& r) const { + return RegionId < r.RegionId; + } + + bool operator<(const TGeoId& r) const { + return RegionId < r; + } + + friend bool operator<(const TGeoId& regionId, const TRegion& r) { + return regionId < r.RegionId; + } + + bool Better(const TRegion& r) const { + return Square < r.Square; + } + }; + + static_assert(sizeof(TRegion) == 32, "NReverseGeocoder::TRegion size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp new file mode 100644 index 0000000000..d73e4f2648 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp @@ -0,0 +1,182 @@ +#include "reverse_geocoder.h" +#include "geo_data/geo_data.h" + +#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> + +#include <util/generic/algorithm.h> +#include <util/system/unaligned_mem.h> + +using namespace NReverseGeocoder; + +static bool PolygonContains(const TPolygon& p, const TPoint& point, const IGeoData& geoData) { + const TPart* parts = geoData.Parts(); + const TRef* edgeRefs = geoData.EdgeRefs(); + const TEdge* edges = geoData.Edges(); + const TPoint* points = geoData.Points(); + return p.Contains(point, parts, edgeRefs, edges, points); +} + +template <typename TAnswer> +static void UpdateAnswer(const TAnswer** answer, const TAnswer& polygon, + const IGeoData& geoData) { + if (!*answer) { + *answer = &polygon; + } else { + const TRegion* regions = geoData.Regions(); + const TNumber regionsNumber = geoData.RegionsNumber(); + if (!(*answer)->Better(polygon, regions, regionsNumber)) + *answer = &polygon; + } +} + +static void SortDebug(TReverseGeocoder::TDebug* debug, const IGeoData& geoData) { + const TRegion* regions = geoData.Regions(); + const TNumber regionsNumber = geoData.RegionsNumber(); + + auto cmp = [&](const TGeoId& a, const TGeoId& b) { + const TRegion* r1 = LowerBound(regions, regions + regionsNumber, a); + const TRegion* r2 = LowerBound(regions, regions + regionsNumber, b); + return r1->Better(*r2); + }; + + Sort(debug->begin(), debug->end(), cmp); +} + +TGeoId NReverseGeocoder::TReverseGeocoder::Lookup(const TLocation& location, TDebug* debug) const { + const IGeoData& geoData = *GeoDataProxy_->GeoData(); + + if (debug) + debug->clear(); + + const TPoint point(location); + const TRef boxRef = LookupAreaBox(point); + + if (boxRef >= geoData.BoxesNumber()) + return UNKNOWN_GEO_ID; + + const TNumber refsOffset = geoData.Boxes()[boxRef].PolygonRefsOffset; + const TNumber refsNumber = geoData.Boxes()[boxRef].PolygonRefsNumber; + + const TPolygon* answer = nullptr; + + const TPolygon* p = geoData.Polygons(); + const auto refsBegin = UnalignedIter(geoData.PolygonRefs()) + refsOffset; + const auto refsEnd = refsBegin + refsNumber; + + for (auto iterL = refsBegin, iterR = refsBegin; iterL < refsEnd; iterL = iterR) { + iterR = iterL + 1; + + if (PolygonContains(p[*iterL], point, geoData)) { + if (p[*iterL].Type == TPolygon::TYPE_INNER) { + // All polygons with same RegionId must be skipped if polygon is inner. + // In geoData small inner polygons stored before big outer polygons. + while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) + ++iterR; + + } else { + UpdateAnswer(&answer, p[*iterL], geoData); + + if (debug) + debug->push_back(p[*iterL].RegionId); + + while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) + ++iterR; + } + } + } + + if (debug) + SortDebug(debug, geoData); + + return answer ? answer->RegionId : UNKNOWN_GEO_ID; +} + +TGeoId NReverseGeocoder::TReverseGeocoder::RawLookup(const TLocation& location, TDebug* debug) const { + const IGeoData& geoData = *GeoDataProxy_->GeoData(); + + if (debug) + debug->clear(); + + const TPoint point(location); + + const TRawPolygon* borders = geoData.RawPolygons(); + const TNumber bordersNumber = geoData.RawPolygonsNumber(); + + const TRawPolygon* answer = nullptr; + + TNumber i = 0; + while (i < bordersNumber) { + if (borders[i].Contains(point, geoData.RawEdgeRefs(), geoData.Edges(), geoData.Points())) { + if (borders[i].Type == TRawPolygon::TYPE_INNER) { + TNumber j = i + 1; + while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) + ++j; + + i = j; + + } else { + UpdateAnswer(&answer, borders[i], geoData); + + if (debug) + debug->push_back(borders[i].RegionId); + + TNumber j = i + 1; + while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) + ++j; + + i = j; + } + } else { + ++i; + } + } + + if (debug) + SortDebug(debug, geoData); + + return answer ? answer->RegionId : UNKNOWN_GEO_ID; +} + +bool NReverseGeocoder::TReverseGeocoder::EachKv(TGeoId regionId, TKvCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + const TRegion* begin = g.Regions(); + const TRegion* end = begin + g.RegionsNumber(); + + const TRegion* region = LowerBound(begin, end, regionId); + + if (region == end || region->RegionId != regionId) + return false; + + const TKv* kvs = g.Kvs() + region->KvsOffset; + const char* blobs = g.Blobs(); + + for (TNumber i = 0; i < region->KvsNumber; ++i) { + const char* k = blobs + kvs[i].K; + const char* v = blobs + kvs[i].V; + callback(k, v); + } + + return true; +} + +void NReverseGeocoder::TReverseGeocoder::EachPolygon(TPolygonCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + for (TNumber i = 0; i < g.PolygonsNumber(); ++i) + callback(g.Polygons()[i]); +} + +void NReverseGeocoder::TReverseGeocoder::EachPart(const TPolygon& polygon, TPartCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + const TNumber partsOffset = polygon.PartsOffset; + const TNumber partsNumber = polygon.PartsNumber; + + for (TNumber i = partsOffset; i < partsOffset + partsNumber; ++i) { + const TPart& part = g.Parts()[i]; + const TPart& npart = g.Parts()[i + 1]; + const TNumber edgeRefsNumber = npart.EdgeRefsOffset - part.EdgeRefsOffset; + callback(part, edgeRefsNumber); + } +} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.h b/library/cpp/reverse_geocoder/core/reverse_geocoder.h new file mode 100644 index 0000000000..c74eddb40e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.h @@ -0,0 +1,73 @@ +#pragma once + +#include "common.h" +#include "geo_data/geo_data.h" +#include "geo_data/proxy.h" + +#include <util/generic/noncopyable.h> +#include <util/generic/vector.h> + +#include <functional> + +namespace NReverseGeocoder { + const TGeoId UNKNOWN_GEO_ID = static_cast<TGeoId>(-1); + + // NOTE: Be careful! It's work fine and fast on real world dataset. + // But in theory it's can spent O(n^2) memory (on real world dataset it's just 6n). + // Point in polygon test will be O(log n) always. Memory spent will be O(n) in future! + class TReverseGeocoder: public TNonCopyable { + public: + using TDebug = TVector<TGeoId>; + using TKvCallback = std::function<void(const char*, const char*)>; + using TPolygonCallback = std::function<void(const TPolygon&)>; + using TPartCallback = std::function<void(const TPart&, TNumber)>; + + TReverseGeocoder() + : GeoDataProxy_() + { + } + + TReverseGeocoder(TReverseGeocoder&& g) + : GeoDataProxy_() + { + DoSwap(GeoDataProxy_, g.GeoDataProxy_); + } + + TReverseGeocoder& operator=(TReverseGeocoder&& g) { + DoSwap(GeoDataProxy_, g.GeoDataProxy_); + return *this; + } + + explicit TReverseGeocoder(const char* path) + : GeoDataProxy_(new TGeoDataMapProxy(path)) + { + } + + explicit TReverseGeocoder(const IGeoData& geoData) + : GeoDataProxy_(new TGeoDataWrapper(geoData)) + { + } + + TReverseGeocoder(const char* data, size_t dataSize) + : GeoDataProxy_(new TGeoDataRawProxy(data, dataSize)) + { + } + + TGeoId Lookup(const TLocation& location, TDebug* debug = nullptr) const; + + TGeoId RawLookup(const TLocation& location, TDebug* debug = nullptr) const; + + bool EachKv(TGeoId regionId, TKvCallback callback) const; + + void EachPolygon(TPolygonCallback callback) const; + + void EachPart(const TPolygon& polygon, TPartCallback callback) const; + + const IGeoData& GeoData() const { + return *GeoDataProxy_->GeoData(); + } + + private: + TGeoDataProxyPtr GeoDataProxy_; + }; +} diff --git a/library/cpp/reverse_geocoder/core/ya.make b/library/cpp/reverse_geocoder/core/ya.make new file mode 100644 index 0000000000..9f7dc67464 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/ya.make @@ -0,0 +1,28 @@ +LIBRARY() + +PEERDIR( + library/cpp/reverse_geocoder/library + library/cpp/reverse_geocoder/proto + library/cpp/digest/crc32c +) + +SRCS( + area_box.cpp + bbox.cpp + common.cpp + edge.cpp + reverse_geocoder.cpp + kv.cpp + location.cpp + part.cpp + point.cpp + polygon.cpp + region.cpp + geo_data/debug.cpp + geo_data/def.cpp + geo_data/geo_data.cpp + geo_data/map.cpp + geo_data/proxy.cpp +) + +END() |