diff options
author | vvvv <vvvv@yandex-team.ru> | 2022-03-03 23:09:11 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.ru> | 2022-03-03 23:09:11 +0300 |
commit | 8dbdbab054b6eb352cfffdad09991052008062fc (patch) | |
tree | 71befed92d327f2cc409bff4986ec510d0530d92 /contrib/libs/icu/i18n/csr2022.h | |
parent | ed3f4e21f3a3837059607ec2606e1dc910269ac0 (diff) | |
download | ydb-8dbdbab054b6eb352cfffdad09991052008062fc.tar.gz |
YQL-13710 try to switch to new shiny PG
ref:2bed0445c8e8bd6c575883632adf19445f0a72a7
Diffstat (limited to 'contrib/libs/icu/i18n/csr2022.h')
-rw-r--r-- | contrib/libs/icu/i18n/csr2022.h | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/contrib/libs/icu/i18n/csr2022.h b/contrib/libs/icu/i18n/csr2022.h new file mode 100644 index 0000000000..cde9019b46 --- /dev/null +++ b/contrib/libs/icu/i18n/csr2022.h @@ -0,0 +1,95 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 2005-2015, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + */ + +#ifndef __CSR2022_H +#define __CSR2022_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "csrecog.h" + +U_NAMESPACE_BEGIN + +class CharsetMatch; + +/** + * class CharsetRecog_2022 part of the ICU charset detection imlementation. + * This is a superclass for the individual detectors for + * each of the detectable members of the ISO 2022 family + * of encodings. + * + * The separate classes are nested within this class. + * + * @internal + */ +class CharsetRecog_2022 : public CharsetRecognizer +{ + +public: + virtual ~CharsetRecog_2022() = 0; + +protected: + + /** + * Matching function shared among the 2022 detectors JP, CN and KR + * Counts up the number of legal an unrecognized escape sequences in + * the sample of text, and computes a score based on the total number & + * the proportion that fit the encoding. + * + * + * @param text the byte buffer containing text to analyse + * @param textLen the size of the text in the byte. + * @param escapeSequences the byte escape sequences to test for. + * @return match quality, in the range of 0-100. + */ + int32_t match_2022(const uint8_t *text, + int32_t textLen, + const uint8_t escapeSequences[][5], + int32_t escapeSequences_length) const; + +}; + +class CharsetRecog_2022JP :public CharsetRecog_2022 +{ +public: + virtual ~CharsetRecog_2022JP(); + + const char *getName() const; + + UBool match(InputText *textIn, CharsetMatch *results) const; +}; + +#if !UCONFIG_ONLY_HTML_CONVERSION +class CharsetRecog_2022KR :public CharsetRecog_2022 { +public: + virtual ~CharsetRecog_2022KR(); + + const char *getName() const; + + UBool match(InputText *textIn, CharsetMatch *results) const; + +}; + +class CharsetRecog_2022CN :public CharsetRecog_2022 +{ +public: + virtual ~CharsetRecog_2022CN(); + + const char* getName() const; + + UBool match(InputText *textIn, CharsetMatch *results) const; +}; +#endif + +U_NAMESPACE_END + +#endif +#endif /* __CSR2022_H */ |