aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/robots_txt/robotstxtcfg
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
committervvvv <vvvv@ydb.tech>2023-07-31 18:21:04 +0300
commitdec41c40e51aa407edef81a3c566a5a15780fc49 (patch)
tree4f197b596b32f35eca368121f0dff913419da9af /library/cpp/robots_txt/robotstxtcfg
parent3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff)
downloadydb-dec41c40e51aa407edef81a3c566a5a15780fc49.tar.gz
YQL-16239 Move purecalc to public
Diffstat (limited to 'library/cpp/robots_txt/robotstxtcfg')
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt20
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt21
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt21
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt17
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt20
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/bot_id_set.h132
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h11
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/user_agents.cpp2
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/user_agents.h303
-rw-r--r--library/cpp/robots_txt/robotstxtcfg/ya.make13
12 files changed, 564 insertions, 0 deletions
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..09cfd4b3f1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..6fe7e7a7ad
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..6fe7e7a7ad
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..09cfd4b3f1
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,20 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-robots_txt-robotstxtcfg)
+target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-case_insensitive_string
+)
+target_sources(cpp-robots_txt-robotstxtcfg PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
+)
diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
new file mode 100644
index 0000000000..aec668582c
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp
@@ -0,0 +1,2 @@
+#include "bot_id_set.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h
new file mode 100644
index 0000000000..08aaa68a50
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include "user_agents.h"
+
+#include <bitset>
+
+
+/// Simple vector-based set for bot ids, meant to optimize memory and lookups
+class TBotIdSet
+{
+public:
+ using TData = std::bitset<robotstxtcfg::max_botid>;
+
+ constexpr TBotIdSet() noexcept = default;
+ constexpr TBotIdSet(const TBotIdSet&) noexcept = default;
+ constexpr TBotIdSet(TBotIdSet&&) noexcept = default;
+ constexpr TBotIdSet& operator = (const TBotIdSet&) noexcept = default;
+ constexpr TBotIdSet& operator = (TBotIdSet&&) noexcept = default;
+
+ TBotIdSet(std::initializer_list<ui32> botIds) {
+ for (auto id : botIds) {
+ insert(id);
+ }
+ }
+
+ static TBotIdSet All() noexcept {
+ TBotIdSet res;
+ res.Bots.set();
+ return res;
+ }
+
+ constexpr bool contains(ui32 botId) const noexcept {
+ return (botId < Bots.size()) && Bots[botId];
+ }
+
+ bool insert(ui32 botId) noexcept {
+ if (botId >= Bots.size() || Bots[botId]) {
+ return false;
+ }
+ Bots[botId] = true;
+ return true;
+ }
+
+ bool remove(ui32 botId) noexcept {
+ if (botId >= Bots.size() || !Bots[botId]) {
+ return false;
+ }
+ Bots[botId] = false;
+ return true;
+ }
+
+ void clear() noexcept {
+ Bots.reset();
+ }
+
+ size_t size() const noexcept {
+ return Bots.count();
+ }
+
+ bool empty() const noexcept {
+ return Bots.none();
+ }
+
+ bool operator==(const TBotIdSet& rhs) const noexcept = default;
+
+ TBotIdSet operator&(TBotIdSet rhs) const noexcept {
+ rhs.Bots &= Bots;
+ return rhs;
+ }
+
+ TBotIdSet operator|(TBotIdSet rhs) const noexcept {
+ rhs.Bots |= Bots;
+ return rhs;
+ }
+
+ TBotIdSet operator~() const noexcept {
+ TBotIdSet result;
+ result.Bots = ~Bots;
+ return result;
+ }
+
+ class iterator
+ {
+ public:
+ auto operator * () const noexcept {
+ return BotId;
+ }
+
+ iterator& operator ++ () noexcept {
+ while (BotId < Bots.size()) {
+ if (Bots[++BotId]) {
+ break;
+ }
+ }
+ return *this;
+ }
+
+ bool operator == (const iterator& rhs) const noexcept {
+ return (&Bots == &rhs.Bots) && (BotId == rhs.BotId);
+ }
+
+ bool operator != (const iterator& rhs) const noexcept {
+ return !(*this == rhs);
+ }
+
+ private:
+ friend class TBotIdSet;
+ iterator(const TData& bots, ui32 botId)
+ : Bots(bots)
+ , BotId(botId)
+ {
+ while (BotId < Bots.size() && !Bots[BotId]) {
+ ++BotId;
+ }
+ }
+
+ private:
+ const TData& Bots;
+ ui32 BotId;
+ };
+
+ iterator begin() const noexcept {
+ return {Bots, robotstxtcfg::id_anybot};
+ }
+
+ iterator end() const noexcept {
+ return {Bots, robotstxtcfg::max_botid};
+ }
+
+private:
+ TData Bots {};
+};
diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
new file mode 100644
index 0000000000..c5652b81c5
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp
@@ -0,0 +1,2 @@
+#include "robotstxtcfg.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h
new file mode 100644
index 0000000000..2cf9430d7c
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "bot_id_set.h"
+
+
+namespace robotstxtcfg {
+
+static const TBotIdSet defaultSupportedBotIds = {id_defbot};
+static const TBotIdSet allSupportedBotIds = TBotIdSet::All();
+
+} // namespace robotstxtcfg
diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
new file mode 100644
index 0000000000..60b353a427
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp
@@ -0,0 +1,2 @@
+#include "user_agents.h"
+// header compile test
diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.h b/library/cpp/robots_txt/robotstxtcfg/user_agents.h
new file mode 100644
index 0000000000..59245d07cb
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.h
@@ -0,0 +1,303 @@
+#pragma once
+
+#include <library/cpp/case_insensitive_string/case_insensitive_string.h>
+
+
+namespace robotstxtcfg {
+ // robots.txt agents and identifiers
+
+ enum EBots : ui32 {
+ id_anybot = 0,
+ id_yandexbot = 1,
+ id_yandexmediabot = 2,
+ id_yandeximagesbot = 3,
+ id_googlebot = 4,
+ id_yandexbotmirr = 5,
+ id_yahooslurp = 6,
+ id_msnbot = 7,
+ id_yandexcatalogbot = 8,
+ id_yandexdirectbot = 9,
+ id_yandexblogsbot = 10,
+ id_yandexnewsbot = 11,
+ id_yandexpagechk = 12,
+ id_yandexmetrikabot = 13,
+ id_yandexbrowser = 14,
+ id_yandexmarketbot = 15,
+ id_yandexcalendarbot = 16,
+ id_yandexwebmasterbot = 17,
+ id_yandexvideobot = 18,
+ id_yandeximageresizerbot = 19,
+ id_yandexadnetbot = 20,
+ id_yandexpartnerbot = 21,
+ id_yandexdirectdbot = 22,
+ id_yandextravelbot = 23,
+ id_yandexmobilebot = 24,
+ id_yandexrcabot = 25,
+ id_yandexdirectdynbot = 26,
+ id_yandexmobilebot_ed = 27,
+ id_yandexaccessibilitybot = 28,
+ id_baidubot = 29,
+ id_yandexscreenshotbot = 30,
+ id_yandexmetrikayabs = 31,
+ id_yandexvideoparserbot = 32,
+ id_yandexnewsbot4 = 33,
+ id_yandexmarketbot2 = 34,
+ id_yandexmedianabot = 35,
+ id_yandexsearchshopbot = 36,
+ id_yandexontodbbot = 37,
+ id_yandexontodbapibot = 38,
+ id_yandexampbot = 39,
+ id_yandexvideohosting = 40,
+ id_yandexmediaselling = 41,
+ id_yandexverticals = 42,
+ id_yandexturbobot = 43,
+ id_yandexzenbot = 44,
+ id_yandextrackerbot = 45,
+ id_yandexmetrikabot4 = 46,
+ id_yandexmobilescreenshotbot = 47,
+ id_yandexfaviconsbot = 48,
+ id_yandexrenderresourcesbot = 49,
+ id_yandexactivity = 50,
+ max_botid
+ };
+
+ static const ui32 id_defbot = id_yandexbot;
+
+ struct TBotInfo {
+ TCaseInsensitiveStringBuf ReqPrefix;
+ TCaseInsensitiveStringBuf FullName;
+ TStringBuf FromField = {};
+ TStringBuf UserAgent = {};
+ TStringBuf RotorUserAgent = {};
+ bool ExplicitDisallow = false;
+ };
+
+ static constexpr TStringBuf UserAgentFrom("support@search.yandex.ru");
+
+ static constexpr TBotInfo BotInfoArr[] = {
+ {"*", "*"},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexMedia/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexImages/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Google", "GoogleBot"},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Slurp", "Slurp"},
+ {"msn", "msnbot"},
+ {"Yandex", "YandexCatalog/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+
+ {"Yandex", "YandexBlogs/0.99", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexNews/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexPagechecker/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexMetrika/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexBrowser/1.0", UserAgentFrom,
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5",
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5",
+ false},
+ {"Yandex", "YandexMarket/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YandexCalendar", "YandexCalendar/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexWebmaster/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexVideo/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexImageResizer/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+
+ {"YandexDirect", "YandexDirect/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexPartner", "YandexPartner/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexTravel/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"Yandex", "YandexBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ false},
+ {"YandexRCA", "YandexRCA/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexDirectDyn", "YandexDirectDyn/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMobileBot", "YandexMobileBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexAccessibilityBot", "YandexAccessibilityBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Baidu", "Baiduspider"},
+
+ {"YandexScreenshotBot", "YandexScreenshotBot/3.0", UserAgentFrom,
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexMetrika", "YandexMetrika/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01)",
+ "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexVideoParser", "YandexVideoParser/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexNews/4.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMarket", "YandexMarket/2.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMedianaBot", "YandexMedianaBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)",
+ true},
+ {"YandexSearchShop", "YandexSearchShop/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex", "YandexOntoDB/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ false},
+ {"YandexOntoDBAPI", "YandexOntoDBAPI/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"Yandex-AMPHTML", "Yandex-AMPHTML", UserAgentFrom,
+ "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+
+ {"YandexVideoHosting", "YandexVideoHosting/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMediaSelling", "YandexMediaSelling/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexVerticals", "YandexVerticals/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexTurbo", "YandexTurbo/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexZenRss", "YandexZenRss/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexTracker", "YandexTracker/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMetrika", "YandexMetrika/4.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexMobileScreenShotBot", "YandexMobileScreenShotBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
+ true},
+ {"YandexFavicons", "YandexFavicons/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexRenderResourcesBot", "YandexRenderResourcesBot/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true},
+ {"YandexActivity", "YandexActivity/1.0", UserAgentFrom,
+ "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots)",
+ "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0",
+ true}
+ };
+
+ static_assert(std::size(BotInfoArr) == max_botid);
+
+ constexpr auto GetReqPrefix(ui32 botId) {
+ return BotInfoArr[botId].ReqPrefix;
+ }
+
+ constexpr auto GetFullName(ui32 botId) {
+ return BotInfoArr[botId].FullName;
+ }
+
+ constexpr auto GetFromField(ui32 botId) {
+ return BotInfoArr[botId].FromField;
+ }
+
+ constexpr auto GetUserAgent(ui32 botId) {
+ return BotInfoArr[botId].UserAgent;
+ }
+
+ constexpr auto GetRotorUserAgent(ui32 botId) {
+ return BotInfoArr[botId].RotorUserAgent;
+ }
+
+ constexpr bool IsExplicitDisallow(ui32 botId) {
+ return BotInfoArr[botId].ExplicitDisallow;
+ }
+
+ constexpr bool IsYandexBotId(ui32 botId) {
+ return !BotInfoArr[botId].UserAgent.empty();
+ }
+
+} // namespace robotstxtcfg
diff --git a/library/cpp/robots_txt/robotstxtcfg/ya.make b/library/cpp/robots_txt/robotstxtcfg/ya.make
new file mode 100644
index 0000000000..61c731be42
--- /dev/null
+++ b/library/cpp/robots_txt/robotstxtcfg/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ bot_id_set.cpp
+ robotstxtcfg.cpp
+ user_agents.cpp
+)
+
+PEERDIR(
+ library/cpp/case_insensitive_string
+)
+
+END()