aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/html/pcdata
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/html/pcdata
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/html/pcdata')
-rw-r--r--library/cpp/html/pcdata/pcdata.cpp81
-rw-r--r--library/cpp/html/pcdata/pcdata.h10
-rw-r--r--library/cpp/html/pcdata/pcdata_ut.cpp48
-rw-r--r--library/cpp/html/pcdata/ut/ya.make9
-rw-r--r--library/cpp/html/pcdata/ya.make10
5 files changed, 158 insertions, 0 deletions
diff --git a/library/cpp/html/pcdata/pcdata.cpp b/library/cpp/html/pcdata/pcdata.cpp
new file mode 100644
index 0000000000..740c240fd2
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata.cpp
@@ -0,0 +1,81 @@
+#include "pcdata.h"
+
+#include <util/string/strspn.h>
+
+static TCompactStrSpn sspn("\"<>&'");
+
+static void EncodeHtmlPcdataAppendInternal(const TStringBuf str, TString& strout, bool qAmp) {
+ const char* s = str.data();
+ const char* e = s + str.length();
+
+ for (;;) {
+ const char* next = sspn.FindFirstOf(s, e);
+
+ strout.AppendNoAlias(s, next - s);
+ s = next;
+
+ if (s == e)
+ break;
+
+ switch (*s) {
+ case '\"':
+ strout += TStringBuf("&quot;");
+ ++s;
+ break;
+
+ case '<':
+ strout += TStringBuf("&lt;");
+ ++s;
+ break;
+
+ case '>':
+ strout += TStringBuf("&gt;");
+ ++s;
+ break;
+
+ case '\'':
+ strout += TStringBuf("&#39;");
+ ++s;
+ break;
+
+ case '&':
+ if (qAmp)
+ strout += TStringBuf("&amp;");
+ else
+ strout += TStringBuf("&");
+ ++s;
+ break;
+ }
+ }
+}
+
+void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout) {
+ EncodeHtmlPcdataAppendInternal(str, strout, true);
+}
+
+TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp) {
+ TString strout;
+ EncodeHtmlPcdataAppendInternal(str, strout, qAmp);
+ return strout;
+}
+
+TString DecodeHtmlPcdata(const TString& sz) {
+ TString res;
+ const char* codes[] = {"&quot;", "&lt;", "&gt;", "&#39;", "&#039;", "&amp;", "&apos;", nullptr};
+ const char chars[] = {'\"', '<', '>', '\'', '\'', '&', '\''};
+ for (size_t i = 0; i < sz.length(); ++i) {
+ char c = sz[i];
+ if (c == '&') {
+ for (const char** p = codes; *p; ++p) {
+ size_t len = strlen(*p);
+ if (strncmp(sz.c_str() + i, *p, len) == 0) {
+ i += len - 1;
+ c = chars[p - codes];
+ break;
+ }
+ }
+ }
+ res += c;
+ }
+ return res;
+}
diff --git a/library/cpp/html/pcdata/pcdata.h b/library/cpp/html/pcdata/pcdata.h
new file mode 100644
index 0000000000..7dd741f53d
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+/// Converts a text into HTML-code. Special characters of HTML («<», «>», ...) replaced with entities.
+TString EncodeHtmlPcdata(const TStringBuf str, bool qAmp = true);
+void EncodeHtmlPcdataAppend(const TStringBuf str, TString& strout);
+
+/// Reverse of EncodeHtmlPcdata()
+TString DecodeHtmlPcdata(const TString& sz);
diff --git a/library/cpp/html/pcdata/pcdata_ut.cpp b/library/cpp/html/pcdata/pcdata_ut.cpp
new file mode 100644
index 0000000000..5833f8bc59
--- /dev/null
+++ b/library/cpp/html/pcdata/pcdata_ut.cpp
@@ -0,0 +1,48 @@
+#include "pcdata.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TPcdata) {
+ Y_UNIT_TEST(TestStress) {
+ {
+ ui64 key = 0x000017C0B76C4E87ull;
+ TString res = EncodeHtmlPcdata(TStringBuf((const char*)&key, sizeof(key)));
+ }
+
+ for (size_t i = 0; i < 1000; ++i) {
+ const TString s = NUnitTest::RandomString(i, i);
+
+ UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s);
+ }
+ }
+
+ Y_UNIT_TEST(Test1) {
+ const TString tests[] = {
+ "qw&qw",
+ "&<",
+ ">&qw",
+ "\'&aaa"};
+
+ for (auto s : tests) {
+ UNIT_ASSERT_VALUES_EQUAL(DecodeHtmlPcdata(EncodeHtmlPcdata(s)), s);
+ }
+ }
+
+ Y_UNIT_TEST(Test2) {
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("&qqq"), "&amp;qqq");
+ }
+
+ Y_UNIT_TEST(TestEncodeHtmlPcdataAppend) {
+ TString s;
+ EncodeHtmlPcdataAppend("m&m", s);
+ EncodeHtmlPcdataAppend("'s", s);
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), s);
+ UNIT_ASSERT_VALUES_EQUAL("m&amp;m&#39;s", s);
+ }
+
+ Y_UNIT_TEST(TestStrangeAmpParameter) {
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", true), "m&amp;m&#39;s");
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's"), "m&amp;m&#39;s"); //default
+ UNIT_ASSERT_VALUES_EQUAL(EncodeHtmlPcdata("m&m's", false), "m&m&#39;s");
+ }
+}
diff --git a/library/cpp/html/pcdata/ut/ya.make b/library/cpp/html/pcdata/ut/ya.make
new file mode 100644
index 0000000000..bc947baa89
--- /dev/null
+++ b/library/cpp/html/pcdata/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/html/pcdata)
+
+OWNER(vladon)
+
+SRCS(
+ pcdata_ut.cpp
+)
+
+END()
diff --git a/library/cpp/html/pcdata/ya.make b/library/cpp/html/pcdata/ya.make
new file mode 100644
index 0000000000..637220e706
--- /dev/null
+++ b/library/cpp/html/pcdata/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(vladon)
+
+SRCS(
+ pcdata.cpp
+ pcdata.h
+)
+
+END()