aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/http/fetch
diff options
context:
space:
mode:
authormarakasov <marakasov@yandex-team.ru>2022-02-10 16:49:50 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:50 +0300
commit359de4829c1d82b06fad5e0ad82470e4e9f27c89 (patch)
treea58ba6d37e60106215ea04536f6f33d3e2fe4f6f /library/cpp/http/fetch
parentd94c2eed82b3c1259ac7320eb28e9731cd990c22 (diff)
downloadydb-359de4829c1d82b06fad5e0ad82470e4e9f27c89.tar.gz
Restoring authorship annotation for <marakasov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/http/fetch')
-rw-r--r--library/cpp/http/fetch/httpfsm.h4
-rw-r--r--library/cpp/http/fetch/httpfsm.rl642
-rw-r--r--library/cpp/http/fetch/httpfsm_ut.cpp268
3 files changed, 157 insertions, 157 deletions
diff --git a/library/cpp/http/fetch/httpfsm.h b/library/cpp/http/fetch/httpfsm.h
index c4abdcd0d2..e1b109d26f 100644
--- a/library/cpp/http/fetch/httpfsm.h
+++ b/library/cpp/http/fetch/httpfsm.h
@@ -58,8 +58,8 @@ struct THttpHeaderParser {
char* hreflangpos;
size_t hreflangspace;
- bool AcceptingXRobots;
-
+ bool AcceptingXRobots;
+
THttpAuthHeader* auth_hd;
THttpRequestHeader* request_hd;
diff --git a/library/cpp/http/fetch/httpfsm.rl6 b/library/cpp/http/fetch/httpfsm.rl6
index eab0328b18..e2447a6dfa 100644
--- a/library/cpp/http/fetch/httpfsm.rl6
+++ b/library/cpp/http/fetch/httpfsm.rl6
@@ -227,9 +227,9 @@ mime_type = "text/plain"i %{c(MIME_TEXT)}
| "application/x-gzip"i %{c(MIME_GZIP)}
| "application/json"i %{c(MIME_JSON)}
| ("application/javascript"i | "text/javascript"i) %{c(MIME_JAVASCRIPT)}
- | "application/vnd.android.package-archive"i %{c(MIME_APK)}
- | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)}
- ;
+ | "application/vnd.android.package-archive"i %{c(MIME_APK)}
+ | ("image/x-icon"i | "image/vnd.microsoft.icon"i) %{c(MIME_IMAGE_ICON)}
+ ;
charset_name = token_char+ >clear_buf $update_buf;
@@ -266,7 +266,7 @@ refresh = "refresh"i def int ';' lws "url="i loc_url eoh %set_location;
################# x-robots-tag ################
action set_x_robots {
- if (hd && AcceptingXRobots) {
+ if (hd && AcceptingXRobots) {
if (I > 0)
hd->x_robots_tag |= I;
@@ -277,10 +277,10 @@ action set_x_robots {
}
}
-action accept_x_robots {
- AcceptingXRobots = (bool)I;
-}
-
+action accept_x_robots {
+ AcceptingXRobots = (bool)I;
+}
+
x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)}
| "noindex"i %{c(1)} | "index"i %{c(-1)}
| "nofollow"i %{c(2)} | "follow"i %{c(-2)}
@@ -288,19 +288,19 @@ x_robots_directive = "none"i %{c(3)} | "all"i %{c(-3)}
| "noyaca"i %{c(16)}
| "noodp"i %{c(8)};
-any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*;
-any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*;
-
-unavailable_after_directive = "unavailable_after"i def any_value;
-
-yandex_robot = "yandex"i | "yandexbot"i;
-other_robot = any_key - "unavailable_after"i - yandex_robot;
-robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)};
-
-x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? );
-
-x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh;
-
+any_value = (any_text_char - [, \t])+ (lws (any_text_char - [, \t])+)*;
+any_key = (any_text_char - [:, \t])+ (lws (any_text_char - [:, \t])+)*;
+
+unavailable_after_directive = "unavailable_after"i def any_value;
+
+yandex_robot = "yandex"i | "yandexbot"i;
+other_robot = any_key - "unavailable_after"i - yandex_robot;
+robot_specifier = yandex_robot %{c(1)} | other_robot %{c(0)};
+
+x_robots_value = (robot_specifier def %accept_x_robots)? (unavailable_after_directive | (x_robots_directive %set_x_robots) | any_value? );
+
+x_robots_tag = "x-robots-tag"i def >{ AcceptingXRobots = true; } x_robots_value (lws ',' lws x_robots_value)* eoh;
+
################# rel_canonical ###############
action set_canonical {
if (hd && buflen < FETCHER_URL_MAX) {
diff --git a/library/cpp/http/fetch/httpfsm_ut.cpp b/library/cpp/http/fetch/httpfsm_ut.cpp
index b018e80101..b1904be14c 100644
--- a/library/cpp/http/fetch/httpfsm_ut.cpp
+++ b/library/cpp/http/fetch/httpfsm_ut.cpp
@@ -18,13 +18,13 @@ class THttpHeaderParserTestSuite: public TTestBase {
UNIT_TEST(TestResponseHeaderOnRequest);
UNIT_TEST(TestRequestHeaderOnResponse);
UNIT_TEST(TestXRobotsTagUnknownTags);
- UNIT_TEST(TestXRobotsTagMyBot);
- UNIT_TEST(TestXRobotsTagOtherBot);
- UNIT_TEST(TestXRobotsTagUnavailableAfterAware);
- UNIT_TEST(TestXRobotsTagUnavailableAfterWorks);
+ UNIT_TEST(TestXRobotsTagMyBot);
+ UNIT_TEST(TestXRobotsTagOtherBot);
+ UNIT_TEST(TestXRobotsTagUnavailableAfterAware);
+ UNIT_TEST(TestXRobotsTagUnavailableAfterWorks);
UNIT_TEST(TestXRobotsTagOverridePriority);
- UNIT_TEST(TestXRobotsTagDoesNotBreakCharset);
- UNIT_TEST(TestXRobotsTagAllowsMultiline);
+ UNIT_TEST(TestXRobotsTagDoesNotBreakCharset);
+ UNIT_TEST(TestXRobotsTagAllowsMultiline);
UNIT_TEST(TestRelCanonical);
UNIT_TEST(TestHreflang);
UNIT_TEST(TestHreflangOnLongInput);
@@ -50,13 +50,13 @@ public:
void TestResponseHeaderOnRequest();
void TestRequestHeaderOnResponse();
void TestXRobotsTagUnknownTags();
- void TestXRobotsTagMyBot();
- void TestXRobotsTagOtherBot();
- void TestXRobotsTagUnavailableAfterAware();
- void TestXRobotsTagUnavailableAfterWorks();
+ void TestXRobotsTagMyBot();
+ void TestXRobotsTagOtherBot();
+ void TestXRobotsTagUnavailableAfterAware();
+ void TestXRobotsTagUnavailableAfterWorks();
void TestXRobotsTagOverridePriority();
- void TestXRobotsTagDoesNotBreakCharset();
- void TestXRobotsTagAllowsMultiline();
+ void TestXRobotsTagDoesNotBreakCharset();
+ void TestXRobotsTagAllowsMultiline();
void TestRelCanonical();
void TestHreflang();
void TestHreflangOnLongInput();
@@ -238,80 +238,80 @@ void THttpHeaderParserTestSuite::TestXRobotsTagUnknownTags() {
const char* headers =
"HTTP/1.1 200 OK\r\n"
"Content-Type: text/html\r\n"
- "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n";
+ "x-robots-tag: asdfasdf asdf asdf,,, , noindex,noodpXXX , NOFOLLOW ,noodpnofollow\r\n\r\n";
i32 result = httpHeaderParser->Execute(headers, strlen(headers));
UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
- TestFinish();
-}
-
-void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "Content-Type: text/html\r\n"
- "x-robots-tag: yandex: noindex, nofollow\r\n"
- "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
- TestFinish();
-}
-
-void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "Content-Type: text/html\r\n"
- "x-robots-tag: google: noindex, nofollow\r\n"
- "x-robots-tag: googlebot: noarchive, noodp\r\n"
- "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx");
- TestFinish();
-}
-
-void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- // проверяем только что unavailable_after ничего не ломает
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "Content-Type: text/html\r\n"
- "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n"
- "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
- TestFinish();
-}
-
-void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- // пока не поддерживается
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "Content-Type: text/html\r\n"
- "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1);
- //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx");
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 3);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
TestFinish();
}
+void THttpHeaderParserTestSuite::TestXRobotsTagMyBot() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Type: text/html\r\n"
+ "x-robots-tag: yandex: noindex, nofollow\r\n"
+ "x-robots-tag: yandexbot: noarchive, noodp\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
+ TestFinish();
+}
+
+void THttpHeaderParserTestSuite::TestXRobotsTagOtherBot() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Type: text/html\r\n"
+ "x-robots-tag: google: noindex, nofollow\r\n"
+ "x-robots-tag: googlebot: noarchive, noodp\r\n"
+ "x-robots-tag: !still(-other) bot_: foo, noyaca\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 0);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "xxxxx");
+ TestFinish();
+}
+
+void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterAware() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ // проверяем только что unavailable_after ничего не ломает
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Type: text/html\r\n"
+ "x-robots-tag: unavailable_after: 01 Jan 2999 00:00 UTC, noindex, nofollow\r\n"
+ "x-robots-tag: yandex: unavailable_after: 01 Jan 2999 00:00 UTC, noarchive, noodp\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 15);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0000x");
+ TestFinish();
+}
+
+void THttpHeaderParserTestSuite::TestXRobotsTagUnavailableAfterWorks() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ // пока не поддерживается
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Type: text/html\r\n"
+ "x-robots-tag: unavailable_after: 01 Jan 2000 00:00 UTC\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ //UNIT_ASSERT_EQUAL(httpHeader.x_robots_tag, 1);
+ //UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "0xxxx");
+ TestFinish();
+}
+
void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() {
TestStart();
THttpHeader httpHeader;
@@ -327,59 +327,59 @@ void THttpHeaderParserTestSuite::TestXRobotsTagOverridePriority() {
TestFinish();
}
-void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "X-Robots-Tag: noarchive\r\n"
- "Content-Type: application/json; charset=utf-8\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
- UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
- TestFinish();
-}
-
-void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() {
- TestStart();
- THttpHeader httpHeader;
- httpHeaderParser->Init(&httpHeader);
- const char* headers =
- "HTTP/1.1 200 OK\r\n"
- "X-Robots-Tag\r\n"
- " :\r\n"
- " unavailable_since\r\n"
- " :\r\n"
- " ,\r\n"
- " unavailable_since\r\n"
- " :\r\n"
- " 01 Jan 2000\r\n"
- " 00:00 UTC\r\n"
- " ,\r\n"
- " yandexbot\r\n"
- " :\r\n"
- " noindex\r\n"
- " ,\r\n"
- " garbage\r\n"
- " ,\r\n"
- " nofollow\r\n"
- " ,\r\n"
- " other\r\n"
- " bot\r\n"
- " :\r\n"
- " noarchive\r\n"
- " ,\r\n"
- "Content-Type: application/json; charset=utf-8\r\n\r\n";
- i32 result = httpHeaderParser->Execute(headers, strlen(headers));
- UNIT_ASSERT_EQUAL(result, 2);
- UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
- UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
- UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
- TestFinish();
-}
-
+void THttpHeaderParserTestSuite::TestXRobotsTagDoesNotBreakCharset() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "X-Robots-Tag: noarchive\r\n"
+ "Content-Type: application/json; charset=utf-8\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
+ UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
+ TestFinish();
+}
+
+void THttpHeaderParserTestSuite::TestXRobotsTagAllowsMultiline() {
+ TestStart();
+ THttpHeader httpHeader;
+ httpHeaderParser->Init(&httpHeader);
+ const char* headers =
+ "HTTP/1.1 200 OK\r\n"
+ "X-Robots-Tag\r\n"
+ " :\r\n"
+ " unavailable_since\r\n"
+ " :\r\n"
+ " ,\r\n"
+ " unavailable_since\r\n"
+ " :\r\n"
+ " 01 Jan 2000\r\n"
+ " 00:00 UTC\r\n"
+ " ,\r\n"
+ " yandexbot\r\n"
+ " :\r\n"
+ " noindex\r\n"
+ " ,\r\n"
+ " garbage\r\n"
+ " ,\r\n"
+ " nofollow\r\n"
+ " ,\r\n"
+ " other\r\n"
+ " bot\r\n"
+ " :\r\n"
+ " noarchive\r\n"
+ " ,\r\n"
+ "Content-Type: application/json; charset=utf-8\r\n\r\n";
+ i32 result = httpHeaderParser->Execute(headers, strlen(headers));
+ UNIT_ASSERT_EQUAL(result, 2);
+ UNIT_ASSERT_EQUAL(httpHeader.x_robots_state, "00xxx");
+ UNIT_ASSERT_EQUAL(httpHeader.mime_type, static_cast<ui8>(MIME_JSON));
+ UNIT_ASSERT_EQUAL(httpHeader.charset, static_cast<ui8>(CODES_UTF8));
+ TestFinish();
+}
+
void THttpHeaderParserTestSuite::TestHreflang() {
TestStart();
THttpHeader httpHeader;