aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode/punycode/punycode.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/unicode/punycode/punycode.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/unicode/punycode/punycode.cpp')
-rw-r--r--library/cpp/unicode/punycode/punycode.cpp143
1 files changed, 143 insertions, 0 deletions
diff --git a/library/cpp/unicode/punycode/punycode.cpp b/library/cpp/unicode/punycode/punycode.cpp
new file mode 100644
index 0000000000..800d1f19fe
--- /dev/null
+++ b/library/cpp/unicode/punycode/punycode.cpp
@@ -0,0 +1,143 @@
+#include "punycode.h"
+#include <contrib/libs/libidn/idna.h>
+#include <contrib/libs/libidn/punycode.h>
+#include <util/charset/wide.h>
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+
+#include <cstdlib>
+
+static inline void CheckPunycodeResult(int rc) {
+ if (rc != PUNYCODE_SUCCESS)
+ ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc));
+}
+
+static inline void CheckIdnaResult(int rc) {
+ if (rc != IDNA_SUCCESS)
+ ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc));
+}
+
+// UTF-32 helpers
+
+static inline void AppendWideToUtf32(const TWtringBuf& in, TVector<ui32>& out) {
+ out.reserve(out.size() + in.size() + 1);
+
+ const wchar16* b = in.begin();
+ const wchar16* e = in.end();
+ while (b < e) {
+ out.push_back(ReadSymbolAndAdvance(b, e));
+ }
+}
+
+static inline void AppendUtf32ToWide(const ui32* in, size_t len, TUtf16String& out) {
+ out.reserve(out.size() + len);
+
+ const ui32* b = in;
+ const ui32* e = in + len;
+ for (; b != e; ++b) {
+ WriteSymbol(wchar32(*b), out);
+ }
+}
+
+TStringBuf WideToPunycode(const TWtringBuf& in16, TString& out) {
+ TVector<ui32> in32;
+ AppendWideToUtf32(in16, in32);
+ size_t outlen = in32.size();
+
+ int rc;
+ do {
+ outlen *= 2;
+ out.ReserveAndResize(outlen);
+ rc = punycode_encode(in32.size(), in32.data(), nullptr, &outlen, out.begin());
+ } while (rc == PUNYCODE_BIG_OUTPUT);
+
+ CheckPunycodeResult(rc);
+
+ out.resize(outlen);
+ return out;
+}
+
+TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out16) {
+ size_t outlen = in.size();
+ TVector<ui32> out32(outlen);
+
+ int rc = punycode_decode(in.size(), in.data(), &outlen, out32.begin(), nullptr);
+ CheckPunycodeResult(rc);
+
+ AppendUtf32ToWide(out32.begin(), outlen, out16);
+ return out16;
+}
+
+namespace {
+ template <typename TChar>
+ struct TIdnaResult {
+ TChar* Data = nullptr;
+
+ ~TIdnaResult() {
+ free(Data);
+ }
+ };
+}
+
+TString HostNameToPunycode(const TWtringBuf& unicodeHost) {
+ TVector<ui32> in32;
+ AppendWideToUtf32(unicodeHost, in32);
+ in32.push_back(0);
+
+ TIdnaResult<char> out;
+ int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0);
+ CheckIdnaResult(rc);
+
+ return out.Data;
+}
+
+TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost) {
+ if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end()))
+ ythrow TPunycodeError() << "Non-ASCII punycode input";
+
+ size_t len = punycodeHost.size();
+ TVector<ui32> in32(len + 1, 0);
+ for (size_t i = 0; i < len; ++i)
+ in32[i] = static_cast<ui8>(punycodeHost[i]);
+ in32[len] = 0;
+
+ TIdnaResult<ui32> out;
+ int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0);
+ CheckIdnaResult(rc);
+
+ TUtf16String decoded;
+ AppendUtf32ToWide(out.Data, std::char_traits<ui32>::length(out.Data), decoded);
+ return decoded;
+}
+
+TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost) {
+ try {
+ return HostNameToPunycode(unicodeHost);
+ } catch (const TPunycodeError&) {
+ return WideToUTF8(unicodeHost);
+ }
+}
+
+TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost) {
+ try {
+ return PunycodeToHostName(punycodeHost);
+ } catch (const TPunycodeError&) {
+ return UTF8ToWide(punycodeHost);
+ }
+}
+
+bool CanBePunycodeHostName(const TStringBuf& host) {
+ if (!IsStringASCII(host.begin(), host.end()))
+ return false;
+
+ static constexpr TStringBuf ACE = "xn--";
+
+ TStringBuf tail(host);
+ while (tail) {
+ const TStringBuf label = tail.NextTok('.');
+ if (label.StartsWith(ACE))
+ return true;
+ }
+
+ return false;
+}