YQL-13710 try to switch to new shiny PG

ref:2bed0445c8e8bd6c575883632adf19445f0a72a7
author: vvvv <vvvv@yandex-team.ru> 2022-03-03 23:09:11 +0300
committer: vvvv <vvvv@yandex-team.ru> 2022-03-03 23:09:11 +0300
commit: 8dbdbab054b6eb352cfffdad09991052008062fc (patch)
tree: 71befed92d327f2cc409bff4986ec510d0530d92 /contrib/libs/icu/i18n/csr2022.h
parent: ed3f4e21f3a3837059607ec2606e1dc910269ac0 (diff)
download: ydb-8dbdbab054b6eb352cfffdad09991052008062fc.tar.gz
1 files changed, 95 insertions, 0 deletions
diff --git a/contrib/libs/icu/i18n/csr2022.h b/contrib/libs/icu/i18n/csr2022.h
new file mode 100644
index 0000000000..cde9019b46
--- /dev/null
+++ b/contrib/libs/icu/i18n/csr2022.h
@@ -0,0 +1,95 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2015, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __CSR2022_H
+#define __CSR2022_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "csrecog.h"
+
+U_NAMESPACE_BEGIN
+
+class CharsetMatch;
+
+/**
+ *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
+ *                           This is a superclass for the individual detectors for
+ *                           each of the detectable members of the ISO 2022 family
+ *                           of encodings.
+ * 
+ *                           The separate classes are nested within this class.
+ * 
+ * @internal
+ */
+class CharsetRecog_2022 : public CharsetRecognizer
+{
+
+public:    
+    virtual ~CharsetRecog_2022() = 0;
+
+protected:
+
+    /**
+     * Matching function shared among the 2022 detectors JP, CN and KR
+     * Counts up the number of legal an unrecognized escape sequences in
+     * the sample of text, and computes a score based on the total number &
+     * the proportion that fit the encoding.
+     * 
+     * 
+     * @param text the byte buffer containing text to analyse
+     * @param textLen  the size of the text in the byte.
+     * @param escapeSequences the byte escape sequences to test for.
+     * @return match quality, in the range of 0-100.
+     */
+    int32_t match_2022(const uint8_t *text,
+                       int32_t textLen,
+                       const uint8_t escapeSequences[][5],
+                       int32_t escapeSequences_length) const;
+
+};
+
+class CharsetRecog_2022JP :public CharsetRecog_2022
+{
+public:
+    virtual ~CharsetRecog_2022JP();
+
+    const char *getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
+class CharsetRecog_2022KR :public CharsetRecog_2022 {
+public:
+    virtual ~CharsetRecog_2022KR();
+
+    const char *getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+
+};
+
+class CharsetRecog_2022CN :public CharsetRecog_2022
+{
+public:
+    virtual ~CharsetRecog_2022CN();
+
+    const char* getName() const;
+
+    UBool match(InputText *textIn, CharsetMatch *results) const;
+};
+#endif
+
+U_NAMESPACE_END
+
+#endif
+#endif /* __CSR2022_H */
author	vvvv <vvvv@yandex-team.ru>	2022-03-03 23:09:11 +0300
committer	vvvv <vvvv@yandex-team.ru>	2022-03-03 23:09:11 +0300
commit	8dbdbab054b6eb352cfffdad09991052008062fc (patch)
tree	71befed92d327f2cc409bff4986ec510d0530d92 /contrib/libs/icu/i18n/csr2022.h
parent	ed3f4e21f3a3837059607ec2606e1dc910269ac0 (diff)
download	ydb-8dbdbab054b6eb352cfffdad09991052008062fc.tar.gz