diff options
author | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-18 18:53:55 +0300 |
---|---|---|
committer | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-18 18:53:55 +0300 |
commit | 27ee4528d5db1a125f8d8def9e1bdf7a477b7492 (patch) | |
tree | 6e2278cdb027f115086f1d35768473d8b2b87515 | |
parent | 6bb39bdb243edc3adf72eedf5700c691617c615d (diff) | |
download | ydb-27ee4528d5db1a125f8d8def9e1bdf7a477b7492.tar.gz |
intermediate changes
ref:4c3887b325a2b9c0b2481e44130c092b3c8d545f
-rw-r--r-- | build/rules/flake8/migrations.yaml | 2 | ||||
-rw-r--r-- | contrib/python/six/.yandex_meta/yamaker.yaml | 2 | ||||
-rw-r--r-- | contrib/python/six/patches/01-arcadia.patch | 11 | ||||
-rw-r--r-- | contrib/python/six/ya.make | 10 | ||||
-rw-r--r-- | library/cpp/http/misc/httpreqdata.cpp | 136 |
5 files changed, 120 insertions, 41 deletions
diff --git a/build/rules/flake8/migrations.yaml b/build/rules/flake8/migrations.yaml index 206e5fd79c..6663023ff2 100644 --- a/build/rules/flake8/migrations.yaml +++ b/build/rules/flake8/migrations.yaml @@ -418,6 +418,7 @@ migrations: - market/mstat/ch-cache/lib/database - market/reductor/configure/lib - market/reductor/www + - market/report/lite - market/sre/library/python/maaslib - market/sre/services/balancer_api/lib - market/tools/report_stats/lib @@ -3594,7 +3595,6 @@ migrations: - infra/walle/server/tests - market/forecaster/lite - market/idx/marketindexer/medium_tests/yatf - - market/report/lite - ofd/notifier/tests/test_unit - saas/rtyserver_test/tests - search/lingboost/saas/codecs/test_bundle_codecs/tests diff --git a/contrib/python/six/.yandex_meta/yamaker.yaml b/contrib/python/six/.yandex_meta/yamaker.yaml new file mode 100644 index 0000000000..2502bc85b9 --- /dev/null +++ b/contrib/python/six/.yandex_meta/yamaker.yaml @@ -0,0 +1,2 @@ +keep: +- tests/test_six.py diff --git a/contrib/python/six/patches/01-arcadia.patch b/contrib/python/six/patches/01-arcadia.patch new file mode 100644 index 0000000000..57f85cb6d9 --- /dev/null +++ b/contrib/python/six/patches/01-arcadia.patch @@ -0,0 +1,11 @@ +--- contrib/python/six/six.py (index) ++++ contrib/python/six/six.py (working tree) +@@ -274,7 +274,7 @@ _moved_attributes = [ + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), +- MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), ++ # MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), diff --git a/contrib/python/six/ya.make b/contrib/python/six/ya.make index e0c7849214..096129b1e5 100644 --- a/contrib/python/six/ya.make +++ b/contrib/python/six/ya.make @@ -1,11 +1,15 @@ -OWNER(g:python-contrib) +# Generated by devtools/yamaker (pypi). PY23_LIBRARY() -LICENSE(MIT) +OWNER(g:python-contrib) VERSION(1.16.0) +LICENSE(MIT) + +NO_LINT() + PY_SRCS( TOP_LEVEL six.py @@ -17,8 +21,6 @@ RESOURCE_FILES( .dist-info/top_level.txt ) -NO_LINT() - END() RECURSE_FOR_TESTS( diff --git a/library/cpp/http/misc/httpreqdata.cpp b/library/cpp/http/misc/httpreqdata.cpp index 38bdad3996..e8bf6df1a1 100644 --- a/library/cpp/http/misc/httpreqdata.cpp +++ b/library/cpp/http/misc/httpreqdata.cpp @@ -1,7 +1,15 @@ #include "httpreqdata.h" #include <library/cpp/case_insensitive_string/case_insensitive_string.h> + #include <util/stream/mem.h> +#include <util/string/join.h> + +#include <array> + +#ifdef _sse4_2_ +#include <smmintrin.h> +#endif TBaseServerRequestData::TBaseServerRequestData(SOCKET s) : Socket_(s) @@ -98,25 +106,29 @@ TStringBuf TBaseServerRequestData::Environment(TStringBuf key) const { const TString& TBaseServerRequestData::GetCurPage() const { if (!CurPage_ && Host_) { - CurPage_ = "http://"; - CurPage_ += Host_; + std::array<TStringBuf, 7> fragments; + auto fragmentIt = fragments.begin(); + *fragmentIt++ = "http://"sv; + *fragmentIt++ = Host_; if (Port_) { - CurPage_ += ':'; - CurPage_ += Port_; + *fragmentIt++ = ":"sv; + *fragmentIt++ = Port_; } - CurPage_ += Path_; - if (Query_) { - CurPage_ += '?'; - CurPage_ += Query_; + *fragmentIt++ = Path_; + if (!Query_.empty()) { + *fragmentIt++ = "?"sv; + *fragmentIt++ = Query_; } + + CurPage_ = JoinRange(""sv, fragments.begin(), fragmentIt); } return CurPage_; } -bool TBaseServerRequestData::Parse(TStringBuf origReqBuf) { - ParseBuf_.reserve(origReqBuf.size() + 1); - ParseBuf_.assign(origReqBuf.begin(), origReqBuf.end()); - ParseBuf_.push_back('\0'); +bool TBaseServerRequestData::Parse(TStringBuf origReq) { + ParseBuf_.reserve(origReq.size() + 16); + ParseBuf_.assign(origReq.begin(), origReq.end()); + ParseBuf_.insert(ParseBuf_.end(), 16, ' '); char* req = ParseBuf_.data(); while (*req == ' ' || *req == '\t') @@ -126,33 +138,85 @@ bool TBaseServerRequestData::Parse(TStringBuf origReqBuf) { while (req[1] == '/') // remove redundant slashes req++; - // detect url end (can contain some garbage after whitespace, e.g. 'HTTP 1.1') - char* urlEnd = req; - while (*urlEnd && *urlEnd != ' ' && *urlEnd != '\t') - urlEnd++; - if (*urlEnd) - *urlEnd = 0; - - // cut fragment if exists - char* fragment = strchr(req, '#'); - if (fragment) - *fragment = 0; // ignore fragment - else - fragment = urlEnd; - char* path = req; - - // calculate Search length without additional strlen-ing - char* query = strchr(path, '?'); - if (query) { - *query++ = 0; - ptrdiff_t delta = fragment - query; - // indeed, second case is a parse error - Query_ = {query, static_cast<size_t>(delta >= 0 ? delta : (urlEnd - query))}; + char* pathBegin = req; + char* queryBegin = nullptr; + +#ifdef _sse4_2_ + const __m128i simdSpace = _mm_set1_epi8(' '); + const __m128i simdTab = _mm_set1_epi8('\t'); + const __m128i simdHash = _mm_set1_epi8('#'); + const __m128i simdQuestion = _mm_set1_epi8('?'); + + auto isEnd = [=](__m128i x) { + const auto v = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(x, simdSpace), _mm_cmpeq_epi8(x, simdTab)), + _mm_cmpeq_epi8(x, simdHash)); + return !_mm_testz_si128(v, v); + }; + + // No need for the range check because we have padding of spaces at the end. + for (;; req += 16) { + const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req)); + const auto isQuestionSimd = _mm_cmpeq_epi8(x, simdQuestion); + const auto isQuestion = !_mm_testz_si128(isQuestionSimd, isQuestionSimd); + if (isEnd(x)) { + if (isQuestion) { + // The prospective query end and a question sign are both in the + // current block. Need to find out which comes first. + for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) { + if (*req == '?') { + queryBegin = req + 1; + break; + } + } + } + break; + } + if (isQuestion) { + // Find the exact query beginning + for (queryBegin = req; *queryBegin != '?'; ++queryBegin) { + } + ++queryBegin; + + break; + } + } + + // If we bailed out because we found query string begin. Now look for the the end of the query + if (queryBegin) { + for (;; req += 16) { + const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req)); + if (isEnd(x)) { + break; + } + } + } +#else + for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) { + if (*req == '?') { + queryBegin = req + 1; + break; + } + } +#endif + + while (*req != ' ' && *req != '\t' && *req != '#') { + ++req; + } + + char* pathEnd = queryBegin ? queryBegin - 1 : req; + // Make sure Path_ and Query_ are actually zero-reminated. + *pathEnd = '\0'; + *req = '\0'; + Path_ = TStringBuf{pathBegin, pathEnd}; + if (queryBegin) { + Query_ = TStringBuf{queryBegin, req}; + OrigQuery_ = Query_; } else { Query_ = {}; + OrigQuery_ = {}; } - Path_ = path; - OrigQuery_ = Query_; return true; } |