Moved other yql/essentials libs YQL-19206

init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
author: vvvv <[email protected]> 2024-11-07 12:29:36 +0300
committer: vvvv <[email protected]> 2024-11-07 13:49:47 +0300
commit: d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
tree: b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb
parent: 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
5 files changed, 2884 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/conv.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/conv.c
new file mode 100644
index 00000000000..82bc1ac6af3
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/conv.c
@@ -0,0 +1,838 @@
+/*-------------------------------------------------------------------------
+ *
+ *	  Utility functions for conversion procs.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mb/conv.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "mb/pg_wchar.h"
+
+
+/*
+ * local2local: a generic single byte charset encoding
+ * conversion between two ASCII-superset encodings.
+ *
+ * l points to the source string of length len
+ * p is the output area (must be large enough!)
+ * src_encoding is the PG identifier for the source encoding
+ * dest_encoding is the PG identifier for the target encoding
+ * tab holds conversion entries for the source charset
+ * starting from 128 (0x80). each entry in the table holds the corresponding
+ * code point for the target charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+local2local(const unsigned char *l,
+			unsigned char *p,
+			int len,
+			int src_encoding,
+			int dest_encoding,
+			const unsigned char *tab,
+			bool noError)
+{
+	const unsigned char *start = l;
+	unsigned char c1,
+				c2;
+
+	while (len > 0)
+	{
+		c1 = *l;
+		if (c1 == 0)
+		{
+			if (noError)
+				break;
+			report_invalid_encoding(src_encoding, (const char *) l, len);
+		}
+		if (!IS_HIGHBIT_SET(c1))
+			*p++ = c1;
+		else
+		{
+			c2 = tab[c1 - HIGHBIT];
+			if (c2)
+				*p++ = c2;
+			else
+			{
+				if (noError)
+					break;
+				report_untranslatable_char(src_encoding, dest_encoding,
+										   (const char *) l, len);
+			}
+		}
+		l++;
+		len--;
+	}
+	*p = '\0';
+
+	return l - start;
+}
+
+/*
+ * LATINn ---> MIC when the charset's local codes map directly to MIC
+ *
+ * l points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+latin2mic(const unsigned char *l, unsigned char *p, int len,
+		  int lc, int encoding, bool noError)
+{
+	const unsigned char *start = l;
+	int			c1;
+
+	while (len > 0)
+	{
+		c1 = *l;
+		if (c1 == 0)
+		{
+			if (noError)
+				break;
+			report_invalid_encoding(encoding, (const char *) l, len);
+		}
+		if (IS_HIGHBIT_SET(c1))
+			*p++ = lc;
+		*p++ = c1;
+		l++;
+		len--;
+	}
+	*p = '\0';
+
+	return l - start;
+}
+
+/*
+ * MIC ---> LATINn when the charset's local codes map directly to MIC
+ *
+ * mic points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+mic2latin(const unsigned char *mic, unsigned char *p, int len,
+		  int lc, int encoding, bool noError)
+{
+	const unsigned char *start = mic;
+	int			c1;
+
+	while (len > 0)
+	{
+		c1 = *mic;
+		if (c1 == 0)
+		{
+			if (noError)
+				break;
+			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+		}
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* easy for ASCII */
+			*p++ = c1;
+			mic++;
+			len--;
+		}
+		else
+		{
+			int			l = pg_mule_mblen(mic);
+
+			if (len < l)
+			{
+				if (noError)
+					break;
+				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
+										len);
+			}
+			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
+			{
+				if (noError)
+					break;
+				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
+										   (const char *) mic, len);
+			}
+			*p++ = mic[1];
+			mic += 2;
+			len -= 2;
+		}
+	}
+	*p = '\0';
+
+	return mic - start;
+}
+
+
+/*
+ * latin2mic_with_table: a generic single byte charset encoding
+ * conversion from a local charset to the mule internal code.
+ *
+ * l points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ * tab holds conversion entries for the local charset
+ * starting from 128 (0x80). each entry in the table holds the corresponding
+ * code point for the mule encoding, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+latin2mic_with_table(const unsigned char *l,
+					 unsigned char *p,
+					 int len,
+					 int lc,
+					 int encoding,
+					 const unsigned char *tab,
+					 bool noError)
+{
+	const unsigned char *start = l;
+	unsigned char c1,
+				c2;
+
+	while (len > 0)
+	{
+		c1 = *l;
+		if (c1 == 0)
+		{
+			if (noError)
+				break;
+			report_invalid_encoding(encoding, (const char *) l, len);
+		}
+		if (!IS_HIGHBIT_SET(c1))
+			*p++ = c1;
+		else
+		{
+			c2 = tab[c1 - HIGHBIT];
+			if (c2)
+			{
+				*p++ = lc;
+				*p++ = c2;
+			}
+			else
+			{
+				if (noError)
+					break;
+				report_untranslatable_char(encoding, PG_MULE_INTERNAL,
+										   (const char *) l, len);
+			}
+		}
+		l++;
+		len--;
+	}
+	*p = '\0';
+
+	return l - start;
+}
+
+/*
+ * mic2latin_with_table: a generic single byte charset encoding
+ * conversion from the mule internal code to a local charset.
+ *
+ * mic points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ * tab holds conversion entries for the mule internal code's second byte,
+ * starting from 128 (0x80). each entry in the table holds the corresponding
+ * code point for the local charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+mic2latin_with_table(const unsigned char *mic,
+					 unsigned char *p,
+					 int len,
+					 int lc,
+					 int encoding,
+					 const unsigned char *tab,
+					 bool noError)
+{
+	const unsigned char *start = mic;
+	unsigned char c1,
+				c2;
+
+	while (len > 0)
+	{
+		c1 = *mic;
+		if (c1 == 0)
+		{
+			if (noError)
+				break;
+			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+		}
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* easy for ASCII */
+			*p++ = c1;
+			mic++;
+			len--;
+		}
+		else
+		{
+			int			l = pg_mule_mblen(mic);
+
+			if (len < l)
+			{
+				if (noError)
+					break;
+				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
+										len);
+			}
+			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
+				(c2 = tab[mic[1] - HIGHBIT]) == 0)
+			{
+				if (noError)
+					break;
+				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
+										   (const char *) mic, len);
+				break;			/* keep compiler quiet */
+			}
+			*p++ = c2;
+			mic += 2;
+			len -= 2;
+		}
+	}
+	*p = '\0';
+
+	return mic - start;
+}
+
+/*
+ * comparison routine for bsearch()
+ * this routine is intended for combined UTF8 -> local code
+ */
+static int
+compare3(const void *p1, const void *p2)
+{
+	uint32		s1,
+				s2,
+				d1,
+				d2;
+
+	s1 = *(const uint32 *) p1;
+	s2 = *((const uint32 *) p1 + 1);
+	d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
+	d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
+	return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
+}
+
+/*
+ * comparison routine for bsearch()
+ * this routine is intended for local code -> combined UTF8
+ */
+static int
+compare4(const void *p1, const void *p2)
+{
+	uint32		v1,
+				v2;
+
+	v1 = *(const uint32 *) p1;
+	v2 = ((const pg_local_to_utf_combined *) p2)->code;
+	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
+}
+
+/*
+ * store 32bit character representation into multibyte stream
+ */
+static inline unsigned char *
+store_coded_char(unsigned char *dest, uint32 code)
+{
+	if (code & 0xff000000)
+		*dest++ = code >> 24;
+	if (code & 0x00ff0000)
+		*dest++ = code >> 16;
+	if (code & 0x0000ff00)
+		*dest++ = code >> 8;
+	if (code & 0x000000ff)
+		*dest++ = code;
+	return dest;
+}
+
+/*
+ * Convert a character using a conversion radix tree.
+ *
+ * 'l' is the length of the input character in bytes, and b1-b4 are
+ * the input character's bytes.
+ */
+static inline uint32
+pg_mb_radix_conv(const pg_mb_radix_tree *rt,
+				 int l,
+				 unsigned char b1,
+				 unsigned char b2,
+				 unsigned char b3,
+				 unsigned char b4)
+{
+	if (l == 4)
+	{
+		/* 4-byte code */
+
+		/* check code validity */
+		if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
+			b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
+			b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
+			b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
+			return 0;
+
+		/* perform lookup */
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b4root;
+
+			idx = rt->chars32[b1 + idx - rt->b4_1_lower];
+			idx = rt->chars32[b2 + idx - rt->b4_2_lower];
+			idx = rt->chars32[b3 + idx - rt->b4_3_lower];
+			return rt->chars32[b4 + idx - rt->b4_4_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b4root;
+
+			idx = rt->chars16[b1 + idx - rt->b4_1_lower];
+			idx = rt->chars16[b2 + idx - rt->b4_2_lower];
+			idx = rt->chars16[b3 + idx - rt->b4_3_lower];
+			return rt->chars16[b4 + idx - rt->b4_4_lower];
+		}
+	}
+	else if (l == 3)
+	{
+		/* 3-byte code */
+
+		/* check code validity */
+		if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
+			b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
+			b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
+			return 0;
+
+		/* perform lookup */
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b3root;
+
+			idx = rt->chars32[b2 + idx - rt->b3_1_lower];
+			idx = rt->chars32[b3 + idx - rt->b3_2_lower];
+			return rt->chars32[b4 + idx - rt->b3_3_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b3root;
+
+			idx = rt->chars16[b2 + idx - rt->b3_1_lower];
+			idx = rt->chars16[b3 + idx - rt->b3_2_lower];
+			return rt->chars16[b4 + idx - rt->b3_3_lower];
+		}
+	}
+	else if (l == 2)
+	{
+		/* 2-byte code */
+
+		/* check code validity - first byte */
+		if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
+			b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
+			return 0;
+
+		/* perform lookup */
+		if (rt->chars32)
+		{
+			uint32		idx = rt->b2root;
+
+			idx = rt->chars32[b3 + idx - rt->b2_1_lower];
+			return rt->chars32[b4 + idx - rt->b2_2_lower];
+		}
+		else
+		{
+			uint16		idx = rt->b2root;
+
+			idx = rt->chars16[b3 + idx - rt->b2_1_lower];
+			return rt->chars16[b4 + idx - rt->b2_2_lower];
+		}
+	}
+	else if (l == 1)
+	{
+		/* 1-byte code */
+
+		/* check code validity - first byte */
+		if (b4 < rt->b1_lower || b4 > rt->b1_upper)
+			return 0;
+
+		/* perform lookup */
+		if (rt->chars32)
+			return rt->chars32[b4 + rt->b1root - rt->b1_lower];
+		else
+			return rt->chars16[b4 + rt->b1root - rt->b1_lower];
+	}
+	return 0;					/* shouldn't happen */
+}
+
+/*
+ * UTF8 ---> local code
+ *
+ * utf: input string in UTF8 encoding (need not be null-terminated)
+ * len: length of input string (in bytes)
+ * iso: pointer to the output area (must be large enough!)
+		  (output string will be null-terminated)
+ * map: conversion map for single characters
+ * cmap: conversion map for combined characters
+ *		  (optional, pass NULL if none)
+ * cmapsize: number of entries in the conversion map for combined characters
+ *		  (optional, pass 0 if none)
+ * conv_func: algorithmic encoding conversion function
+ *		  (optional, pass NULL if none)
+ * encoding: PG identifier for the local encoding
+ *
+ * For each character, the cmap (if provided) is consulted first; if no match,
+ * the map is consulted next; if still no match, the conv_func (if provided)
+ * is applied.  An error is raised if no match is found.
+ *
+ * See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+UtfToLocal(const unsigned char *utf, int len,
+		   unsigned char *iso,
+		   const pg_mb_radix_tree *map,
+		   const pg_utf_to_local_combined *cmap, int cmapsize,
+		   utf_local_conversion_func conv_func,
+		   int encoding, bool noError)
+{
+	uint32		iutf;
+	int			l;
+	const pg_utf_to_local_combined *cp;
+	const unsigned char *start = utf;
+
+	if (!PG_VALID_ENCODING(encoding))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid encoding number: %d", encoding)));
+
+	for (; len > 0; len -= l)
+	{
+		unsigned char b1 = 0;
+		unsigned char b2 = 0;
+		unsigned char b3 = 0;
+		unsigned char b4 = 0;
+
+		/* "break" cases all represent errors */
+		if (*utf == '\0')
+			break;
+
+		l = pg_utf_mblen(utf);
+		if (len < l)
+			break;
+
+		if (!pg_utf8_islegal(utf, l))
+			break;
+
+		if (l == 1)
+		{
+			/* ASCII case is easy, assume it's one-to-one conversion */
+			*iso++ = *utf++;
+			continue;
+		}
+
+		/* collect coded char of length l */
+		if (l == 2)
+		{
+			b3 = *utf++;
+			b4 = *utf++;
+		}
+		else if (l == 3)
+		{
+			b2 = *utf++;
+			b3 = *utf++;
+			b4 = *utf++;
+		}
+		else if (l == 4)
+		{
+			b1 = *utf++;
+			b2 = *utf++;
+			b3 = *utf++;
+			b4 = *utf++;
+		}
+		else
+		{
+			elog(ERROR, "unsupported character length %d", l);
+			iutf = 0;			/* keep compiler quiet */
+		}
+		iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
+
+		/* First, try with combined map if possible */
+		if (cmap && len > l)
+		{
+			const unsigned char *utf_save = utf;
+			int			len_save = len;
+			int			l_save = l;
+
+			/* collect next character, same as above */
+			len -= l;
+
+			l = pg_utf_mblen(utf);
+			if (len < l)
+			{
+				/* need more data to decide if this is a combined char */
+				utf -= l_save;
+				break;
+			}
+
+			if (!pg_utf8_islegal(utf, l))
+			{
+				if (!noError)
+					report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+				utf -= l_save;
+				break;
+			}
+
+			/* We assume ASCII character cannot be in combined map */
+			if (l > 1)
+			{
+				uint32		iutf2;
+				uint32		cutf[2];
+
+				if (l == 2)
+				{
+					iutf2 = *utf++ << 8;
+					iutf2 |= *utf++;
+				}
+				else if (l == 3)
+				{
+					iutf2 = *utf++ << 16;
+					iutf2 |= *utf++ << 8;
+					iutf2 |= *utf++;
+				}
+				else if (l == 4)
+				{
+					iutf2 = *utf++ << 24;
+					iutf2 |= *utf++ << 16;
+					iutf2 |= *utf++ << 8;
+					iutf2 |= *utf++;
+				}
+				else
+				{
+					elog(ERROR, "unsupported character length %d", l);
+					iutf2 = 0;	/* keep compiler quiet */
+				}
+
+				cutf[0] = iutf;
+				cutf[1] = iutf2;
+
+				cp = bsearch(cutf, cmap, cmapsize,
+							 sizeof(pg_utf_to_local_combined), compare3);
+
+				if (cp)
+				{
+					iso = store_coded_char(iso, cp->code);
+					continue;
+				}
+			}
+
+			/* fail, so back up to reprocess second character next time */
+			utf = utf_save;
+			len = len_save;
+			l = l_save;
+		}
+
+		/* Now check ordinary map */
+		if (map)
+		{
+			uint32		converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
+
+			if (converted)
+			{
+				iso = store_coded_char(iso, converted);
+				continue;
+			}
+		}
+
+		/* if there's a conversion function, try that */
+		if (conv_func)
+		{
+			uint32		converted = (*conv_func) (iutf);
+
+			if (converted)
+			{
+				iso = store_coded_char(iso, converted);
+				continue;
+			}
+		}
+
+		/* failed to translate this character */
+		utf -= l;
+		if (noError)
+			break;
+		report_untranslatable_char(PG_UTF8, encoding,
+								   (const char *) utf, len);
+	}
+
+	/* if we broke out of loop early, must be invalid input */
+	if (len > 0 && !noError)
+		report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+
+	*iso = '\0';
+
+	return utf - start;
+}
+
+/*
+ * local code ---> UTF8
+ *
+ * iso: input string in local encoding (need not be null-terminated)
+ * len: length of input string (in bytes)
+ * utf: pointer to the output area (must be large enough!)
+		  (output string will be null-terminated)
+ * map: conversion map for single characters
+ * cmap: conversion map for combined characters
+ *		  (optional, pass NULL if none)
+ * cmapsize: number of entries in the conversion map for combined characters
+ *		  (optional, pass 0 if none)
+ * conv_func: algorithmic encoding conversion function
+ *		  (optional, pass NULL if none)
+ * encoding: PG identifier for the local encoding
+ *
+ * For each character, the map is consulted first; if no match, the cmap
+ * (if provided) is consulted next; if still no match, the conv_func
+ * (if provided) is applied.  An error is raised if no match is found.
+ *
+ * See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed.  If noError is true, this can
+ * be less than 'len'.
+ */
+int
+LocalToUtf(const unsigned char *iso, int len,
+		   unsigned char *utf,
+		   const pg_mb_radix_tree *map,
+		   const pg_local_to_utf_combined *cmap, int cmapsize,
+		   utf_local_conversion_func conv_func,
+		   int encoding,
+		   bool noError)
+{
+	uint32		iiso;
+	int			l;
+	const pg_local_to_utf_combined *cp;
+	const unsigned char *start = iso;
+
+	if (!PG_VALID_ENCODING(encoding))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid encoding number: %d", encoding)));
+
+	for (; len > 0; len -= l)
+	{
+		unsigned char b1 = 0;
+		unsigned char b2 = 0;
+		unsigned char b3 = 0;
+		unsigned char b4 = 0;
+
+		/* "break" cases all represent errors */
+		if (*iso == '\0')
+			break;
+
+		if (!IS_HIGHBIT_SET(*iso))
+		{
+			/* ASCII case is easy, assume it's one-to-one conversion */
+			*utf++ = *iso++;
+			l = 1;
+			continue;
+		}
+
+		l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
+		if (l < 0)
+			break;
+
+		/* collect coded char of length l */
+		if (l == 1)
+			b4 = *iso++;
+		else if (l == 2)
+		{
+			b3 = *iso++;
+			b4 = *iso++;
+		}
+		else if (l == 3)
+		{
+			b2 = *iso++;
+			b3 = *iso++;
+			b4 = *iso++;
+		}
+		else if (l == 4)
+		{
+			b1 = *iso++;
+			b2 = *iso++;
+			b3 = *iso++;
+			b4 = *iso++;
+		}
+		else
+		{
+			elog(ERROR, "unsupported character length %d", l);
+			iiso = 0;			/* keep compiler quiet */
+		}
+		iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
+
+		if (map)
+		{
+			uint32		converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
+
+			if (converted)
+			{
+				utf = store_coded_char(utf, converted);
+				continue;
+			}
+
+			/* If there's a combined character map, try that */
+			if (cmap)
+			{
+				cp = bsearch(&iiso, cmap, cmapsize,
+							 sizeof(pg_local_to_utf_combined), compare4);
+
+				if (cp)
+				{
+					utf = store_coded_char(utf, cp->utf1);
+					utf = store_coded_char(utf, cp->utf2);
+					continue;
+				}
+			}
+		}
+
+		/* if there's a conversion function, try that */
+		if (conv_func)
+		{
+			uint32		converted = (*conv_func) (iiso);
+
+			if (converted)
+			{
+				utf = store_coded_char(utf, converted);
+				continue;
+			}
+		}
+
+		/* failed to translate this character */
+		iso -= l;
+		if (noError)
+			break;
+		report_untranslatable_char(encoding, PG_UTF8,
+								   (const char *) iso, len);
+	}
+
+	/* if we broke out of loop early, must be invalid input */
+	if (len > 0 && !noError)
+		report_invalid_encoding(encoding, (const char *) iso, len);
+
+	*utf = '\0';
+
+	return iso - start;
+}
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c
new file mode 100644
index 00000000000..e4f8eaef365
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c
@@ -0,0 +1,1836 @@
+/*-------------------------------------------------------------------------
+ *
+ * mbutils.c
+ *	  This file contains functions for encoding conversion.
+ *
+ * The string-conversion functions in this file share some API quirks.
+ * Note the following:
+ *
+ * The functions return a palloc'd, null-terminated string if conversion
+ * is required.  However, if no conversion is performed, the given source
+ * string pointer is returned as-is.
+ *
+ * Although the presence of a length argument means that callers can pass
+ * non-null-terminated strings, care is required because the same string
+ * will be passed back if no conversion occurs.  Such callers *must* check
+ * whether result == src and handle that case differently.
+ *
+ * If the source and destination encodings are the same, the source string
+ * is returned without any verification; it's assumed to be valid data.
+ * If that might not be the case, the caller is responsible for validating
+ * the string using a separate call to pg_verify_mbstr().  Whenever the
+ * source and destination encodings are different, the functions ensure that
+ * the result is validly encoded according to the destination encoding.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mb/mbutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "varatt.h"
+
+/*
+ * We maintain a simple linked list caching the fmgr lookup info for the
+ * currently selected conversion functions, as well as any that have been
+ * selected previously in the current session.  (We remember previous
+ * settings because we must be able to restore a previous setting during
+ * transaction rollback, without doing any fresh catalog accesses.)
+ *
+ * Since we'll never release this data, we just keep it in TopMemoryContext.
+ */
+typedef struct ConvProcInfo
+{
+	int			s_encoding;		/* server and client encoding IDs */
+	int			c_encoding;
+	FmgrInfo	to_server_info; /* lookup info for conversion procs */
+	FmgrInfo	to_client_info;
+} ConvProcInfo;
+
+static __thread List *ConvProcList = NIL;	/* List of ConvProcInfo */
+
+/*
+ * These variables point to the currently active conversion functions,
+ * or are NULL when no conversion is needed.
+ */
+static __thread FmgrInfo *ToServerConvProc = NULL;
+static __thread FmgrInfo *ToClientConvProc = NULL;
+
+/*
+ * This variable stores the conversion function to convert from UTF-8
+ * to the server encoding.  It's NULL if the server encoding *is* UTF-8,
+ * or if we lack a conversion function for this.
+ */
+static __thread FmgrInfo *Utf8ToServerConvProc = NULL;
+
+/*
+ * These variables track the currently-selected encodings.
+ */
+static __thread const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
+static __thread const pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
+static __thread const pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
+
+/*
+ * During backend startup we can't set client encoding because we (a)
+ * can't look up the conversion functions, and (b) may not know the database
+ * encoding yet either.  So SetClientEncoding() just accepts anything and
+ * remembers it for InitializeClientEncoding() to apply later.
+ */
+static __thread bool backend_startup_complete = false;
+static __thread int	pending_client_encoding = PG_SQL_ASCII;
+
+
+/* Internal functions */
+static char *perform_default_encoding_conversion(const char *src,
+												 int len, bool is_client_to_server);
+static int	cliplen(const char *str, int len, int limit);
+
+
+/*
+ * Prepare for a future call to SetClientEncoding.  Success should mean
+ * that SetClientEncoding is guaranteed to succeed for this encoding request.
+ *
+ * (But note that success before backend_startup_complete does not guarantee
+ * success after ...)
+ *
+ * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
+ */
+int
+PrepareClientEncoding(int encoding)
+{
+	int			current_server_encoding;
+	ListCell   *lc;
+
+	if (!PG_VALID_FE_ENCODING(encoding))
+		return -1;
+
+	/* Can't do anything during startup, per notes above */
+	if (!backend_startup_complete)
+		return 0;
+
+	current_server_encoding = GetDatabaseEncoding();
+
+	/*
+	 * Check for cases that require no conversion function.
+	 */
+	if (current_server_encoding == encoding ||
+		current_server_encoding == PG_SQL_ASCII ||
+		encoding == PG_SQL_ASCII)
+		return 0;
+
+	if (IsTransactionState())
+	{
+		/*
+		 * If we're in a live transaction, it's safe to access the catalogs,
+		 * so look up the functions.  We repeat the lookup even if the info is
+		 * already cached, so that we can react to changes in the contents of
+		 * pg_conversion.
+		 */
+		Oid			to_server_proc,
+					to_client_proc;
+		ConvProcInfo *convinfo;
+		MemoryContext oldcontext;
+
+		to_server_proc = FindDefaultConversionProc(encoding,
+												   current_server_encoding);
+		if (!OidIsValid(to_server_proc))
+			return -1;
+		to_client_proc = FindDefaultConversionProc(current_server_encoding,
+												   encoding);
+		if (!OidIsValid(to_client_proc))
+			return -1;
+
+		/*
+		 * Load the fmgr info into TopMemoryContext (could still fail here)
+		 */
+		convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
+													   sizeof(ConvProcInfo));
+		convinfo->s_encoding = current_server_encoding;
+		convinfo->c_encoding = encoding;
+		fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
+					  TopMemoryContext);
+		fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
+					  TopMemoryContext);
+
+		/* Attach new info to head of list */
+		oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+		ConvProcList = lcons(convinfo, ConvProcList);
+		MemoryContextSwitchTo(oldcontext);
+
+		/*
+		 * We cannot yet remove any older entry for the same encoding pair,
+		 * since it could still be in use.  SetClientEncoding will clean up.
+		 */
+
+		return 0;				/* success */
+	}
+	else
+	{
+		/*
+		 * If we're not in a live transaction, the only thing we can do is
+		 * restore a previous setting using the cache.  This covers all
+		 * transaction-rollback cases.  The only case it might not work for is
+		 * trying to change client_encoding on the fly by editing
+		 * postgresql.conf and SIGHUP'ing.  Which would probably be a stupid
+		 * thing to do anyway.
+		 */
+		foreach(lc, ConvProcList)
+		{
+			ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
+
+			if (oldinfo->s_encoding == current_server_encoding &&
+				oldinfo->c_encoding == encoding)
+				return 0;
+		}
+
+		return -1;				/* it's not cached, so fail */
+	}
+}
+
+/*
+ * Set the active client encoding and set up the conversion-function pointers.
+ * PrepareClientEncoding should have been called previously for this encoding.
+ *
+ * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
+ */
+int
+SetClientEncoding(int encoding)
+{
+	int			current_server_encoding;
+	bool		found;
+	ListCell   *lc;
+
+	if (!PG_VALID_FE_ENCODING(encoding))
+		return -1;
+
+	/* Can't do anything during startup, per notes above */
+	if (!backend_startup_complete)
+	{
+		pending_client_encoding = encoding;
+		return 0;
+	}
+
+	current_server_encoding = GetDatabaseEncoding();
+
+	/*
+	 * Check for cases that require no conversion function.
+	 */
+	if (current_server_encoding == encoding ||
+		current_server_encoding == PG_SQL_ASCII ||
+		encoding == PG_SQL_ASCII)
+	{
+		ClientEncoding = &pg_enc2name_tbl[encoding];
+		ToServerConvProc = NULL;
+		ToClientConvProc = NULL;
+		return 0;
+	}
+
+	/*
+	 * Search the cache for the entry previously prepared by
+	 * PrepareClientEncoding; if there isn't one, we lose.  While at it,
+	 * release any duplicate entries so that repeated Prepare/Set cycles don't
+	 * leak memory.
+	 */
+	found = false;
+	foreach(lc, ConvProcList)
+	{
+		ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
+
+		if (convinfo->s_encoding == current_server_encoding &&
+			convinfo->c_encoding == encoding)
+		{
+			if (!found)
+			{
+				/* Found newest entry, so set up */
+				ClientEncoding = &pg_enc2name_tbl[encoding];
+				ToServerConvProc = &convinfo->to_server_info;
+				ToClientConvProc = &convinfo->to_client_info;
+				found = true;
+			}
+			else
+			{
+				/* Duplicate entry, release it */
+				ConvProcList = foreach_delete_current(ConvProcList, lc);
+				pfree(convinfo);
+			}
+		}
+	}
+
+	if (found)
+		return 0;				/* success */
+	else
+		return -1;				/* it's not cached, so fail */
+}
+
+/*
+ * Initialize client encoding conversions.
+ *		Called from InitPostgres() once during backend startup.
+ */
+void
+InitializeClientEncoding(void)
+{
+	int			current_server_encoding;
+
+	Assert(!backend_startup_complete);
+	backend_startup_complete = true;
+
+	if (PrepareClientEncoding(pending_client_encoding) < 0 ||
+		SetClientEncoding(pending_client_encoding) < 0)
+	{
+		/*
+		 * Oops, the requested conversion is not available. We couldn't fail
+		 * before, but we can now.
+		 */
+		ereport(FATAL,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("conversion between %s and %s is not supported",
+						pg_enc2name_tbl[pending_client_encoding].name,
+						GetDatabaseEncodingName())));
+	}
+
+	/*
+	 * Also look up the UTF8-to-server conversion function if needed.  Since
+	 * the server encoding is fixed within any one backend process, we don't
+	 * have to do this more than once.
+	 */
+	current_server_encoding = GetDatabaseEncoding();
+	if (current_server_encoding != PG_UTF8 &&
+		current_server_encoding != PG_SQL_ASCII)
+	{
+		Oid			utf8_to_server_proc;
+
+		Assert(IsTransactionState());
+		utf8_to_server_proc =
+			FindDefaultConversionProc(PG_UTF8,
+									  current_server_encoding);
+		/* If there's no such conversion, just leave the pointer as NULL */
+		if (OidIsValid(utf8_to_server_proc))
+		{
+			FmgrInfo   *finfo;
+
+			finfo = (FmgrInfo *) MemoryContextAlloc(TopMemoryContext,
+													sizeof(FmgrInfo));
+			fmgr_info_cxt(utf8_to_server_proc, finfo,
+						  TopMemoryContext);
+			/* Set Utf8ToServerConvProc only after data is fully valid */
+			Utf8ToServerConvProc = finfo;
+		}
+	}
+}
+
+/*
+ * returns the current client encoding
+ */
+int
+pg_get_client_encoding(void)
+{
+	return ClientEncoding->encoding;
+}
+
+/*
+ * returns the current client encoding name
+ */
+const char *
+pg_get_client_encoding_name(void)
+{
+	return ClientEncoding->name;
+}
+
+/*
+ * Convert src string to another encoding (general case).
+ *
+ * See the notes about string conversion functions at the top of this file.
+ */
+unsigned char *
+pg_do_encoding_conversion(unsigned char *src, int len,
+						  int src_encoding, int dest_encoding)
+{
+	unsigned char *result;
+	Oid			proc;
+
+	if (len <= 0)
+		return src;				/* empty string is always valid */
+
+	if (src_encoding == dest_encoding)
+		return src;				/* no conversion required, assume valid */
+
+	if (dest_encoding == PG_SQL_ASCII)
+		return src;				/* any string is valid in SQL_ASCII */
+
+	if (src_encoding == PG_SQL_ASCII)
+	{
+		/* No conversion is possible, but we must validate the result */
+		(void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
+		return src;
+	}
+
+	if (!IsTransactionState())	/* shouldn't happen */
+		elog(ERROR, "cannot perform encoding conversion outside a transaction");
+
+	proc = FindDefaultConversionProc(src_encoding, dest_encoding);
+	if (!OidIsValid(proc))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+						pg_encoding_to_char(src_encoding),
+						pg_encoding_to_char(dest_encoding))));
+
+	/*
+	 * Allocate space for conversion result, being wary of integer overflow.
+	 *
+	 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
+	 * required space, so it might exceed MaxAllocSize even though the result
+	 * would actually fit.  We do not want to hand back a result string that
+	 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
+	 * if we just allocate more than that, and don't use it, that's fine.
+	 */
+	if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("out of memory"),
+				 errdetail("String of %d bytes is too long for encoding conversion.",
+						   len)));
+
+	result = (unsigned char *)
+		MemoryContextAllocHuge(CurrentMemoryContext,
+							   (Size) len * MAX_CONVERSION_GROWTH + 1);
+
+	(void) OidFunctionCall6(proc,
+							Int32GetDatum(src_encoding),
+							Int32GetDatum(dest_encoding),
+							CStringGetDatum((char *) src),
+							CStringGetDatum((char *) result),
+							Int32GetDatum(len),
+							BoolGetDatum(false));
+
+	/*
+	 * If the result is large, it's worth repalloc'ing to release any extra
+	 * space we asked for.  The cutoff here is somewhat arbitrary, but we
+	 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
+	 */
+	if (len > 1000000)
+	{
+		Size		resultlen = strlen((char *) result);
+
+		if (resultlen >= MaxAllocSize)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("out of memory"),
+					 errdetail("String of %d bytes is too long for encoding conversion.",
+							   len)));
+
+		result = (unsigned char *) repalloc(result, resultlen + 1);
+	}
+
+	return result;
+}
+
+/*
+ * Convert src string to another encoding.
+ *
+ * This function has a different API than the other conversion functions.
+ * The caller should've looked up the conversion function using
+ * FindDefaultConversionProc().  Unlike the other functions, the converted
+ * result is not palloc'd.  It is written to the caller-supplied buffer
+ * instead.
+ *
+ * src_encoding   - encoding to convert from
+ * dest_encoding  - encoding to convert to
+ * src, srclen    - input buffer and its length in bytes
+ * dest, destlen  - destination buffer and its size in bytes
+ *
+ * The output is null-terminated.
+ *
+ * If destlen < srclen * MAX_CONVERSION_INPUT_LENGTH + 1, the converted output
+ * wouldn't necessarily fit in the output buffer, and the function will not
+ * convert the whole input.
+ *
+ * TODO: The conversion function interface is not great.  Firstly, it
+ * would be nice to pass through the destination buffer size to the
+ * conversion function, so that if you pass a shorter destination buffer, it
+ * could still continue to fill up the whole buffer.  Currently, we have to
+ * assume worst case expansion and stop the conversion short, even if there
+ * is in fact space left in the destination buffer.  Secondly, it would be
+ * nice to return the number of bytes written to the caller, to avoid a call
+ * to strlen().
+ */
+int
+pg_do_encoding_conversion_buf(Oid proc,
+							  int src_encoding,
+							  int dest_encoding,
+							  unsigned char *src, int srclen,
+							  unsigned char *dest, int destlen,
+							  bool noError)
+{
+	Datum		result;
+
+	/*
+	 * If the destination buffer is not large enough to hold the result in the
+	 * worst case, limit the input size passed to the conversion function.
+	 */
+	if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
+		srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
+
+	result = OidFunctionCall6(proc,
+							  Int32GetDatum(src_encoding),
+							  Int32GetDatum(dest_encoding),
+							  CStringGetDatum((char *) src),
+							  CStringGetDatum((char *) dest),
+							  Int32GetDatum(srclen),
+							  BoolGetDatum(noError));
+	return DatumGetInt32(result);
+}
+
+/*
+ * Convert string to encoding encoding_name. The source
+ * encoding is the DB encoding.
+ *
+ * BYTEA convert_to(TEXT string, NAME encoding_name) */
+Datum
+pg_convert_to(PG_FUNCTION_ARGS)
+{
+	Datum		string = PG_GETARG_DATUM(0);
+	Datum		dest_encoding_name = PG_GETARG_DATUM(1);
+	Datum		src_encoding_name = DirectFunctionCall1(namein,
+														CStringGetDatum(DatabaseEncoding->name));
+	Datum		result;
+
+	/*
+	 * pg_convert expects a bytea as its first argument. We're passing it a
+	 * text argument here, relying on the fact that they are both in fact
+	 * varlena types, and thus structurally identical.
+	 */
+	result = DirectFunctionCall3(pg_convert, string,
+								 src_encoding_name, dest_encoding_name);
+
+	PG_RETURN_DATUM(result);
+}
+
+/*
+ * Convert string from encoding encoding_name. The destination
+ * encoding is the DB encoding.
+ *
+ * TEXT convert_from(BYTEA string, NAME encoding_name) */
+Datum
+pg_convert_from(PG_FUNCTION_ARGS)
+{
+	Datum		string = PG_GETARG_DATUM(0);
+	Datum		src_encoding_name = PG_GETARG_DATUM(1);
+	Datum		dest_encoding_name = DirectFunctionCall1(namein,
+														 CStringGetDatum(DatabaseEncoding->name));
+	Datum		result;
+
+	result = DirectFunctionCall3(pg_convert, string,
+								 src_encoding_name, dest_encoding_name);
+
+	/*
+	 * pg_convert returns a bytea, which we in turn return as text, relying on
+	 * the fact that they are both in fact varlena types, and thus
+	 * structurally identical. Although not all bytea values are valid text,
+	 * in this case it will be because we've told pg_convert to return one
+	 * that is valid as text in the current database encoding.
+	 */
+	PG_RETURN_DATUM(result);
+}
+
+/*
+ * Convert string between two arbitrary encodings.
+ *
+ * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
+ */
+Datum
+pg_convert(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+	int			src_encoding = pg_char_to_encoding(src_encoding_name);
+	char	   *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+	int			dest_encoding = pg_char_to_encoding(dest_encoding_name);
+	const char *src_str;
+	char	   *dest_str;
+	bytea	   *retval;
+	int			len;
+
+	if (src_encoding < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid source encoding name \"%s\"",
+						src_encoding_name)));
+	if (dest_encoding < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid destination encoding name \"%s\"",
+						dest_encoding_name)));
+
+	/* make sure that source string is valid */
+	len = VARSIZE_ANY_EXHDR(string);
+	src_str = VARDATA_ANY(string);
+	(void) pg_verify_mbstr(src_encoding, src_str, len, false);
+
+	/* perform conversion */
+	dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
+												  len,
+												  src_encoding,
+												  dest_encoding);
+
+	/* update len if conversion actually happened */
+	if (dest_str != src_str)
+		len = strlen(dest_str);
+
+	/*
+	 * build bytea data type structure.
+	 */
+	retval = (bytea *) palloc(len + VARHDRSZ);
+	SET_VARSIZE(retval, len + VARHDRSZ);
+	memcpy(VARDATA(retval), dest_str, len);
+
+	if (dest_str != src_str)
+		pfree(dest_str);
+
+	/* free memory if allocated by the toaster */
+	PG_FREE_IF_COPY(string, 0);
+
+	PG_RETURN_BYTEA_P(retval);
+}
+
+/*
+ * get the length of the string considered as text in the specified
+ * encoding. Raises an error if the data is not valid in that
+ * encoding.
+ *
+ * INT4 length (BYTEA string, NAME src_encoding_name)
+ */
+Datum
+length_in_encoding(PG_FUNCTION_ARGS)
+{
+	bytea	   *string = PG_GETARG_BYTEA_PP(0);
+	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+	int			src_encoding = pg_char_to_encoding(src_encoding_name);
+	const char *src_str;
+	int			len;
+	int			retval;
+
+	if (src_encoding < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid encoding name \"%s\"",
+						src_encoding_name)));
+
+	len = VARSIZE_ANY_EXHDR(string);
+	src_str = VARDATA_ANY(string);
+
+	retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
+
+	PG_RETURN_INT32(retval);
+}
+
+/*
+ * Get maximum multibyte character length in the specified encoding.
+ *
+ * Note encoding is specified numerically, not by name as above.
+ */
+Datum
+pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
+{
+	int			encoding = PG_GETARG_INT32(0);
+
+	if (PG_VALID_ENCODING(encoding))
+		PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
+	else
+		PG_RETURN_NULL();
+}
+
+/*
+ * Convert client encoding to server encoding.
+ *
+ * See the notes about string conversion functions at the top of this file.
+ */
+char *
+pg_client_to_server(const char *s, int len)
+{
+	return pg_any_to_server(s, len, ClientEncoding->encoding);
+}
+
+/*
+ * Convert any encoding to server encoding.
+ *
+ * See the notes about string conversion functions at the top of this file.
+ *
+ * Unlike the other string conversion functions, this will apply validation
+ * even if encoding == DatabaseEncoding->encoding.  This is because this is
+ * used to process data coming in from outside the database, and we never
+ * want to just assume validity.
+ */
+char *
+pg_any_to_server(const char *s, int len, int encoding)
+{
+	if (len <= 0)
+		return unconstify(char *, s);	/* empty string is always valid */
+
+	if (encoding == DatabaseEncoding->encoding ||
+		encoding == PG_SQL_ASCII)
+	{
+		/*
+		 * No conversion is needed, but we must still validate the data.
+		 */
+		(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
+		return unconstify(char *, s);
+	}
+
+	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
+	{
+		/*
+		 * No conversion is possible, but we must still validate the data,
+		 * because the client-side code might have done string escaping using
+		 * the selected client_encoding.  If the client encoding is ASCII-safe
+		 * then we just do a straight validation under that encoding.  For an
+		 * ASCII-unsafe encoding we have a problem: we dare not pass such data
+		 * to the parser but we have no way to convert it.  We compromise by
+		 * rejecting the data if it contains any non-ASCII characters.
+		 */
+		if (PG_VALID_BE_ENCODING(encoding))
+			(void) pg_verify_mbstr(encoding, s, len, false);
+		else
+		{
+			int			i;
+
+			for (i = 0; i < len; i++)
+			{
+				if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
+					ereport(ERROR,
+							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+							 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
+									pg_enc2name_tbl[PG_SQL_ASCII].name,
+									(unsigned char) s[i])));
+			}
+		}
+		return unconstify(char *, s);
+	}
+
+	/* Fast path if we can use cached conversion function */
+	if (encoding == ClientEncoding->encoding)
+		return perform_default_encoding_conversion(s, len, true);
+
+	/* General case ... will not work outside transactions */
+	return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
+											  len,
+											  encoding,
+											  DatabaseEncoding->encoding);
+}
+
+/*
+ * Convert server encoding to client encoding.
+ *
+ * See the notes about string conversion functions at the top of this file.
+ */
+char *
+pg_server_to_client(const char *s, int len)
+{
+	return pg_server_to_any(s, len, ClientEncoding->encoding);
+}
+
+/*
+ * Convert server encoding to any encoding.
+ *
+ * See the notes about string conversion functions at the top of this file.
+ */
+char *
+pg_server_to_any(const char *s, int len, int encoding)
+{
+	if (len <= 0)
+		return unconstify(char *, s);	/* empty string is always valid */
+
+	if (encoding == DatabaseEncoding->encoding ||
+		encoding == PG_SQL_ASCII)
+		return unconstify(char *, s);	/* assume data is valid */
+
+	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
+	{
+		/* No conversion is possible, but we must validate the result */
+		(void) pg_verify_mbstr(encoding, s, len, false);
+		return unconstify(char *, s);
+	}
+
+	/* Fast path if we can use cached conversion function */
+	if (encoding == ClientEncoding->encoding)
+		return perform_default_encoding_conversion(s, len, false);
+
+	/* General case ... will not work outside transactions */
+	return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
+											  len,
+											  DatabaseEncoding->encoding,
+											  encoding);
+}
+
+/*
+ *	Perform default encoding conversion using cached FmgrInfo. Since
+ *	this function does not access database at all, it is safe to call
+ *	outside transactions.  If the conversion has not been set up by
+ *	SetClientEncoding(), no conversion is performed.
+ */
+static char *
+perform_default_encoding_conversion(const char *src, int len,
+									bool is_client_to_server)
+{
+	char	   *result;
+	int			src_encoding,
+				dest_encoding;
+	FmgrInfo   *flinfo;
+
+	if (is_client_to_server)
+	{
+		src_encoding = ClientEncoding->encoding;
+		dest_encoding = DatabaseEncoding->encoding;
+		flinfo = ToServerConvProc;
+	}
+	else
+	{
+		src_encoding = DatabaseEncoding->encoding;
+		dest_encoding = ClientEncoding->encoding;
+		flinfo = ToClientConvProc;
+	}
+
+	if (flinfo == NULL)
+		return unconstify(char *, src);
+
+	/*
+	 * Allocate space for conversion result, being wary of integer overflow.
+	 * See comments in pg_do_encoding_conversion.
+	 */
+	if ((Size) len >= (MaxAllocHugeSize / (Size) MAX_CONVERSION_GROWTH))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("out of memory"),
+				 errdetail("String of %d bytes is too long for encoding conversion.",
+						   len)));
+
+	result = (char *)
+		MemoryContextAllocHuge(CurrentMemoryContext,
+							   (Size) len * MAX_CONVERSION_GROWTH + 1);
+
+	FunctionCall6(flinfo,
+				  Int32GetDatum(src_encoding),
+				  Int32GetDatum(dest_encoding),
+				  CStringGetDatum(src),
+				  CStringGetDatum(result),
+				  Int32GetDatum(len),
+				  BoolGetDatum(false));
+
+	/*
+	 * Release extra space if there might be a lot --- see comments in
+	 * pg_do_encoding_conversion.
+	 */
+	if (len > 1000000)
+	{
+		Size		resultlen = strlen(result);
+
+		if (resultlen >= MaxAllocSize)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("out of memory"),
+					 errdetail("String of %d bytes is too long for encoding conversion.",
+							   len)));
+
+		result = (char *) repalloc(result, resultlen + 1);
+	}
+
+	return result;
+}
+
+/*
+ * Convert a single Unicode code point into a string in the server encoding.
+ *
+ * The code point given by "c" is converted and stored at *s, which must
+ * have at least MAX_UNICODE_EQUIVALENT_STRING+1 bytes available.
+ * The output will have a trailing '\0'.  Throws error if the conversion
+ * cannot be performed.
+ *
+ * Note that this relies on having previously looked up any required
+ * conversion function.  That's partly for speed but mostly because the parser
+ * may call this outside any transaction, or in an aborted transaction.
+ */
+void
+pg_unicode_to_server(pg_wchar c, unsigned char *s)
+{
+	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
+	int			c_as_utf8_len;
+	int			server_encoding;
+
+	/*
+	 * Complain if invalid Unicode code point.  The choice of errcode here is
+	 * debatable, but really our caller should have checked this anyway.
+	 */
+	if (!is_valid_unicode_codepoint(c))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("invalid Unicode code point")));
+
+	/* Otherwise, if it's in ASCII range, conversion is trivial */
+	if (c <= 0x7F)
+	{
+		s[0] = (unsigned char) c;
+		s[1] = '\0';
+		return;
+	}
+
+	/* If the server encoding is UTF-8, we just need to reformat the code */
+	server_encoding = GetDatabaseEncoding();
+	if (server_encoding == PG_UTF8)
+	{
+		unicode_to_utf8(c, s);
+		s[pg_utf_mblen(s)] = '\0';
+		return;
+	}
+
+	/* For all other cases, we must have a conversion function available */
+	if (Utf8ToServerConvProc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("conversion between %s and %s is not supported",
+						pg_enc2name_tbl[PG_UTF8].name,
+						GetDatabaseEncodingName())));
+
+	/* Construct UTF-8 source string */
+	unicode_to_utf8(c, c_as_utf8);
+	c_as_utf8_len = pg_utf_mblen(c_as_utf8);
+	c_as_utf8[c_as_utf8_len] = '\0';
+
+	/* Convert, or throw error if we can't */
+	FunctionCall6(Utf8ToServerConvProc,
+				  Int32GetDatum(PG_UTF8),
+				  Int32GetDatum(server_encoding),
+				  CStringGetDatum((char *) c_as_utf8),
+				  CStringGetDatum((char *) s),
+				  Int32GetDatum(c_as_utf8_len),
+				  BoolGetDatum(false));
+}
+
+/*
+ * Convert a single Unicode code point into a string in the server encoding.
+ *
+ * Same as pg_unicode_to_server(), except that we don't throw errors,
+ * but simply return false on conversion failure.
+ */
+bool
+pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
+{
+	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
+	int			c_as_utf8_len;
+	int			converted_len;
+	int			server_encoding;
+
+	/* Fail if invalid Unicode code point */
+	if (!is_valid_unicode_codepoint(c))
+		return false;
+
+	/* Otherwise, if it's in ASCII range, conversion is trivial */
+	if (c <= 0x7F)
+	{
+		s[0] = (unsigned char) c;
+		s[1] = '\0';
+		return true;
+	}
+
+	/* If the server encoding is UTF-8, we just need to reformat the code */
+	server_encoding = GetDatabaseEncoding();
+	if (server_encoding == PG_UTF8)
+	{
+		unicode_to_utf8(c, s);
+		s[pg_utf_mblen(s)] = '\0';
+		return true;
+	}
+
+	/* For all other cases, we must have a conversion function available */
+	if (Utf8ToServerConvProc == NULL)
+		return false;
+
+	/* Construct UTF-8 source string */
+	unicode_to_utf8(c, c_as_utf8);
+	c_as_utf8_len = pg_utf_mblen(c_as_utf8);
+	c_as_utf8[c_as_utf8_len] = '\0';
+
+	/* Convert, but without throwing error if we can't */
+	converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
+												Int32GetDatum(PG_UTF8),
+												Int32GetDatum(server_encoding),
+												CStringGetDatum((char *) c_as_utf8),
+												CStringGetDatum((char *) s),
+												Int32GetDatum(c_as_utf8_len),
+												BoolGetDatum(true)));
+
+	/* Conversion was successful iff it consumed the whole input */
+	return (converted_len == c_as_utf8_len);
+}
+
+
+/* convert a multibyte string to a wchar */
+int
+pg_mb2wchar(const char *from, pg_wchar *to)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
+}
+
+/* convert a multibyte string to a wchar with a limited length */
+int
+pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
+}
+
+/* same, with any encoding */
+int
+pg_encoding_mb2wchar_with_len(int encoding,
+							  const char *from, pg_wchar *to, int len)
+{
+	return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
+}
+
+/* convert a wchar string to a multibyte */
+int
+pg_wchar2mb(const pg_wchar *from, char *to)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
+}
+
+/* convert a wchar string to a multibyte with a limited length */
+int
+pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
+}
+
+/* same, with any encoding */
+int
+pg_encoding_wchar2mb_with_len(int encoding,
+							  const pg_wchar *from, char *to, int len)
+{
+	return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
+}
+
+/* returns the byte length of a multibyte character */
+int
+pg_mblen(const char *mbstr)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+}
+
+/* returns the display length of a multibyte character */
+int
+pg_dsplen(const char *mbstr)
+{
+	return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
+}
+
+/* returns the length (counted in wchars) of a multibyte string */
+int
+pg_mbstrlen(const char *mbstr)
+{
+	int			len = 0;
+
+	/* optimization for single byte encoding */
+	if (pg_database_encoding_max_length() == 1)
+		return strlen(mbstr);
+
+	while (*mbstr)
+	{
+		mbstr += pg_mblen(mbstr);
+		len++;
+	}
+	return len;
+}
+
+/* returns the length (counted in wchars) of a multibyte string
+ * (not necessarily NULL terminated)
+ */
+int
+pg_mbstrlen_with_len(const char *mbstr, int limit)
+{
+	int			len = 0;
+
+	/* optimization for single byte encoding */
+	if (pg_database_encoding_max_length() == 1)
+		return limit;
+
+	while (limit > 0 && *mbstr)
+	{
+		int			l = pg_mblen(mbstr);
+
+		limit -= l;
+		mbstr += l;
+		len++;
+	}
+	return len;
+}
+
+/*
+ * returns the byte length of a multibyte string
+ * (not necessarily NULL terminated)
+ * that is no longer than limit.
+ * this function does not break multibyte character boundary.
+ */
+int
+pg_mbcliplen(const char *mbstr, int len, int limit)
+{
+	return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
+								 len, limit);
+}
+
+/*
+ * pg_mbcliplen with specified encoding
+ */
+int
+pg_encoding_mbcliplen(int encoding, const char *mbstr,
+					  int len, int limit)
+{
+	mblen_converter mblen_fn;
+	int			clen = 0;
+	int			l;
+
+	/* optimization for single byte encoding */
+	if (pg_encoding_max_length(encoding) == 1)
+		return cliplen(mbstr, len, limit);
+
+	mblen_fn = pg_wchar_table[encoding].mblen;
+
+	while (len > 0 && *mbstr)
+	{
+		l = (*mblen_fn) ((const unsigned char *) mbstr);
+		if ((clen + l) > limit)
+			break;
+		clen += l;
+		if (clen == limit)
+			break;
+		len -= l;
+		mbstr += l;
+	}
+	return clen;
+}
+
+/*
+ * Similar to pg_mbcliplen except the limit parameter specifies the
+ * character length, not the byte length.
+ */
+int
+pg_mbcharcliplen(const char *mbstr, int len, int limit)
+{
+	int			clen = 0;
+	int			nch = 0;
+	int			l;
+
+	/* optimization for single byte encoding */
+	if (pg_database_encoding_max_length() == 1)
+		return cliplen(mbstr, len, limit);
+
+	while (len > 0 && *mbstr)
+	{
+		l = pg_mblen(mbstr);
+		nch++;
+		if (nch > limit)
+			break;
+		clen += l;
+		len -= l;
+		mbstr += l;
+	}
+	return clen;
+}
+
+/* mbcliplen for any single-byte encoding */
+static int
+cliplen(const char *str, int len, int limit)
+{
+	int			l = 0;
+
+	len = Min(len, limit);
+	while (l < len && str[l])
+		l++;
+	return l;
+}
+
+void
+SetDatabaseEncoding(int encoding)
+{
+	if (!PG_VALID_BE_ENCODING(encoding))
+		elog(ERROR, "invalid database encoding: %d", encoding);
+
+	DatabaseEncoding = &pg_enc2name_tbl[encoding];
+	Assert(DatabaseEncoding->encoding == encoding);
+}
+
+void
+SetMessageEncoding(int encoding)
+{
+	/* Some calls happen before we can elog()! */
+	Assert(PG_VALID_ENCODING(encoding));
+
+	MessageEncoding = &pg_enc2name_tbl[encoding];
+	Assert(MessageEncoding->encoding == encoding);
+}
+
+#ifdef ENABLE_NLS
+/*
+ * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
+ * codeset.  Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
+ * fail for gettext-internal causes like out-of-memory.
+ */
+static bool
+raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
+{
+	bool		elog_ok = (CurrentMemoryContext != NULL);
+	int			i;
+
+	for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
+	{
+		if (pg_enc2gettext_tbl[i].encoding == encoding)
+		{
+			if (bind_textdomain_codeset(domainname,
+										pg_enc2gettext_tbl[i].name) != NULL)
+				return true;
+
+			if (elog_ok)
+				elog(LOG, "bind_textdomain_codeset failed");
+			else
+				write_stderr("bind_textdomain_codeset failed");
+
+			break;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Bind a gettext message domain to the codeset corresponding to the database
+ * encoding.  For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
+ * Return the MessageEncoding implied by the new settings.
+ *
+ * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
+ * When that matches the database encoding, we don't need to do anything.  In
+ * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
+ * database encoding, except for the C locale.  (On Windows, we also permit a
+ * discrepancy under the UTF8 encoding.)  For the C locale, explicitly bind
+ * gettext to the right codeset.
+ *
+ * On Windows, gettext defaults to the Windows ANSI code page.  This is a
+ * convenient departure for software that passes the strings to Windows ANSI
+ * APIs, but we don't do that.  Compel gettext to use database encoding or,
+ * failing that, the LC_CTYPE encoding as it would on other platforms.
+ *
+ * This function is called before elog() and palloc() are usable.
+ */
+int
+pg_bind_textdomain_codeset(const char *domainname)
+{
+	bool		elog_ok = (CurrentMemoryContext != NULL);
+	int			encoding = GetDatabaseEncoding();
+	int			new_msgenc;
+
+#ifndef WIN32
+	const char *ctype = setlocale(LC_CTYPE, NULL);
+
+	if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
+#endif
+		if (encoding != PG_SQL_ASCII &&
+			raw_pg_bind_textdomain_codeset(domainname, encoding))
+			return encoding;
+
+	new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
+	if (new_msgenc < 0)
+		new_msgenc = PG_SQL_ASCII;
+
+#ifdef WIN32
+	if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
+		/* On failure, the old message encoding remains valid. */
+		return GetMessageEncoding();
+#endif
+
+	return new_msgenc;
+}
+#endif
+
+/*
+ * The database encoding, also called the server encoding, represents the
+ * encoding of data stored in text-like data types.  Affected types include
+ * cstring, text, varchar, name, xml, and json.
+ */
+int
+GetDatabaseEncoding(void)
+{
+	return DatabaseEncoding->encoding;
+}
+
+const char *
+GetDatabaseEncodingName(void)
+{
+	return DatabaseEncoding->name;
+}
+
+Datum
+getdatabaseencoding(PG_FUNCTION_ARGS)
+{
+	return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
+}
+
+Datum
+pg_client_encoding(PG_FUNCTION_ARGS)
+{
+	return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
+}
+
+Datum
+PG_char_to_encoding(PG_FUNCTION_ARGS)
+{
+	Name		s = PG_GETARG_NAME(0);
+
+	PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
+}
+
+Datum
+PG_encoding_to_char(PG_FUNCTION_ARGS)
+{
+	int32		encoding = PG_GETARG_INT32(0);
+	const char *encoding_name = pg_encoding_to_char(encoding);
+
+	return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
+}
+
+/*
+ * gettext() returns messages in this encoding.  This often matches the
+ * database encoding, but it differs for SQL_ASCII databases, for processes
+ * not attached to a database, and under a database encoding lacking iconv
+ * support (MULE_INTERNAL).
+ */
+int
+GetMessageEncoding(void)
+{
+	return MessageEncoding->encoding;
+}
+
+
+/*
+ * Generic character incrementer function.
+ *
+ * Not knowing anything about the properties of the encoding in use, we just
+ * keep incrementing the last byte until we get a validly-encoded result,
+ * or we run out of values to try.  We don't bother to try incrementing
+ * higher-order bytes, so there's no growth in runtime for wider characters.
+ * (If we did try to do that, we'd need to consider the likelihood that 255
+ * is not a valid final byte in the encoding.)
+ */
+static bool
+pg_generic_charinc(unsigned char *charptr, int len)
+{
+	unsigned char *lastbyte = charptr + len - 1;
+	mbchar_verifier mbverify;
+
+	/* We can just invoke the character verifier directly. */
+	mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
+
+	while (*lastbyte < (unsigned char) 255)
+	{
+		(*lastbyte)++;
+		if ((*mbverify) (charptr, len) == len)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * UTF-8 character incrementer function.
+ *
+ * For a one-byte character less than 0x7F, we just increment the byte.
+ *
+ * For a multibyte character, every byte but the first must fall between 0x80
+ * and 0xBF; and the first byte must be between 0xC0 and 0xF4.  We increment
+ * the last byte that's not already at its maximum value.  If we can't find a
+ * byte that's less than the maximum allowable value, we simply fail.  We also
+ * need some special-case logic to skip regions used for surrogate pair
+ * handling, as those should not occur in valid UTF-8.
+ *
+ * Note that we don't reset lower-order bytes back to their minimums, since
+ * we can't afford to make an exhaustive search (see make_greater_string).
+ */
+static bool
+pg_utf8_increment(unsigned char *charptr, int length)
+{
+	unsigned char a;
+	unsigned char limit;
+
+	switch (length)
+	{
+		default:
+			/* reject lengths 5 and 6 for now */
+			return false;
+		case 4:
+			a = charptr[3];
+			if (a < 0xBF)
+			{
+				charptr[3]++;
+				break;
+			}
+			/* FALL THRU */
+		case 3:
+			a = charptr[2];
+			if (a < 0xBF)
+			{
+				charptr[2]++;
+				break;
+			}
+			/* FALL THRU */
+		case 2:
+			a = charptr[1];
+			switch (*charptr)
+			{
+				case 0xED:
+					limit = 0x9F;
+					break;
+				case 0xF4:
+					limit = 0x8F;
+					break;
+				default:
+					limit = 0xBF;
+					break;
+			}
+			if (a < limit)
+			{
+				charptr[1]++;
+				break;
+			}
+			/* FALL THRU */
+		case 1:
+			a = *charptr;
+			if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
+				return false;
+			charptr[0]++;
+			break;
+	}
+
+	return true;
+}
+
+/*
+ * EUC-JP character incrementer function.
+ *
+ * If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
+ * representing JIS X 0201 characters with the second byte ranging between
+ * 0xa1 and 0xdf.  We just increment the last byte if it's less than 0xdf,
+ * and otherwise rewrite the whole sequence to 0xa1 0xa1.
+ *
+ * If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
+ * in which the last two bytes range between 0xa1 and 0xfe.  The last byte
+ * is incremented if possible, otherwise the second-to-last byte.
+ *
+ * If the sequence starts with a value other than the above and its MSB
+ * is set, it must be a two-byte sequence representing JIS X 0208 characters
+ * with both bytes ranging between 0xa1 and 0xfe.  The last byte is
+ * incremented if possible, otherwise the second-to-last byte.
+ *
+ * Otherwise, the sequence is a single-byte ASCII character. It is
+ * incremented up to 0x7f.
+ */
+static bool
+pg_eucjp_increment(unsigned char *charptr, int length)
+{
+	unsigned char c1,
+				c2;
+	int			i;
+
+	c1 = *charptr;
+
+	switch (c1)
+	{
+		case SS2:				/* JIS X 0201 */
+			if (length != 2)
+				return false;
+
+			c2 = charptr[1];
+
+			if (c2 >= 0xdf)
+				charptr[0] = charptr[1] = 0xa1;
+			else if (c2 < 0xa1)
+				charptr[1] = 0xa1;
+			else
+				charptr[1]++;
+			break;
+
+		case SS3:				/* JIS X 0212 */
+			if (length != 3)
+				return false;
+
+			for (i = 2; i > 0; i--)
+			{
+				c2 = charptr[i];
+				if (c2 < 0xa1)
+				{
+					charptr[i] = 0xa1;
+					return true;
+				}
+				else if (c2 < 0xfe)
+				{
+					charptr[i]++;
+					return true;
+				}
+			}
+
+			/* Out of 3-byte code region */
+			return false;
+
+		default:
+			if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
+			{
+				if (length != 2)
+					return false;
+
+				for (i = 1; i >= 0; i--)
+				{
+					c2 = charptr[i];
+					if (c2 < 0xa1)
+					{
+						charptr[i] = 0xa1;
+						return true;
+					}
+					else if (c2 < 0xfe)
+					{
+						charptr[i]++;
+						return true;
+					}
+				}
+
+				/* Out of 2 byte code region */
+				return false;
+			}
+			else
+			{					/* ASCII, single byte */
+				if (c1 > 0x7e)
+					return false;
+				(*charptr)++;
+			}
+			break;
+	}
+
+	return true;
+}
+
+/*
+ * get the character incrementer for the encoding for the current database
+ */
+mbcharacter_incrementer
+pg_database_encoding_character_incrementer(void)
+{
+	/*
+	 * Eventually it might be best to add a field to pg_wchar_table[], but for
+	 * now we just use a switch.
+	 */
+	switch (GetDatabaseEncoding())
+	{
+		case PG_UTF8:
+			return pg_utf8_increment;
+
+		case PG_EUC_JP:
+			return pg_eucjp_increment;
+
+		default:
+			return pg_generic_charinc;
+	}
+}
+
+/*
+ * fetch maximum length of the encoding for the current database
+ */
+int
+pg_database_encoding_max_length(void)
+{
+	return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
+}
+
+/*
+ * Verify mbstr to make sure that it is validly encoded in the current
+ * database encoding.  Otherwise same as pg_verify_mbstr().
+ */
+bool
+pg_verifymbstr(const char *mbstr, int len, bool noError)
+{
+	return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
+}
+
+/*
+ * Verify mbstr to make sure that it is validly encoded in the specified
+ * encoding.
+ */
+bool
+pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
+{
+	int			oklen;
+
+	Assert(PG_VALID_ENCODING(encoding));
+
+	oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
+	if (oklen != len)
+	{
+		if (noError)
+			return false;
+		report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
+	}
+	return true;
+}
+
+/*
+ * Verify mbstr to make sure that it is validly encoded in the specified
+ * encoding.
+ *
+ * mbstr is not necessarily zero terminated; length of mbstr is
+ * specified by len.
+ *
+ * If OK, return length of string in the encoding.
+ * If a problem is found, return -1 when noError is
+ * true; when noError is false, ereport() a descriptive message.
+ *
+ * Note: We cannot use the faster encoding-specific mbverifystr() function
+ * here, because we need to count the number of characters in the string.
+ */
+int
+pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
+{
+	mbchar_verifier mbverifychar;
+	int			mb_len;
+
+	Assert(PG_VALID_ENCODING(encoding));
+
+	/*
+	 * In single-byte encodings, we need only reject nulls (\0).
+	 */
+	if (pg_encoding_max_length(encoding) <= 1)
+	{
+		const char *nullpos = memchr(mbstr, 0, len);
+
+		if (nullpos == NULL)
+			return len;
+		if (noError)
+			return -1;
+		report_invalid_encoding(encoding, nullpos, 1);
+	}
+
+	/* fetch function pointer just once */
+	mbverifychar = pg_wchar_table[encoding].mbverifychar;
+
+	mb_len = 0;
+
+	while (len > 0)
+	{
+		int			l;
+
+		/* fast path for ASCII-subset characters */
+		if (!IS_HIGHBIT_SET(*mbstr))
+		{
+			if (*mbstr != '\0')
+			{
+				mb_len++;
+				mbstr++;
+				len--;
+				continue;
+			}
+			if (noError)
+				return -1;
+			report_invalid_encoding(encoding, mbstr, len);
+		}
+
+		l = (*mbverifychar) ((const unsigned char *) mbstr, len);
+
+		if (l < 0)
+		{
+			if (noError)
+				return -1;
+			report_invalid_encoding(encoding, mbstr, len);
+		}
+
+		mbstr += l;
+		len -= l;
+		mb_len++;
+	}
+	return mb_len;
+}
+
+/*
+ * check_encoding_conversion_args: check arguments of a conversion function
+ *
+ * "expected" arguments can be either an encoding ID or -1 to indicate that
+ * the caller will check whether it accepts the ID.
+ *
+ * Note: the errors here are not really user-facing, so elog instead of
+ * ereport seems sufficient.  Also, we trust that the "expected" encoding
+ * arguments are valid encoding IDs, but we don't trust the actuals.
+ */
+void
+check_encoding_conversion_args(int src_encoding,
+							   int dest_encoding,
+							   int len,
+							   int expected_src_encoding,
+							   int expected_dest_encoding)
+{
+	if (!PG_VALID_ENCODING(src_encoding))
+		elog(ERROR, "invalid source encoding ID: %d", src_encoding);
+	if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
+		elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
+			 pg_enc2name_tbl[expected_src_encoding].name,
+			 pg_enc2name_tbl[src_encoding].name);
+	if (!PG_VALID_ENCODING(dest_encoding))
+		elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
+	if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
+		elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
+			 pg_enc2name_tbl[expected_dest_encoding].name,
+			 pg_enc2name_tbl[dest_encoding].name);
+	if (len < 0)
+		elog(ERROR, "encoding conversion length must not be negative");
+}
+
+/*
+ * report_invalid_encoding: complain about invalid multibyte character
+ *
+ * note: len is remaining length of string, not length of character;
+ * len must be greater than zero, as we always examine the first byte.
+ */
+void
+report_invalid_encoding(int encoding, const char *mbstr, int len)
+{
+	int			l = pg_encoding_mblen(encoding, mbstr);
+	char		buf[8 * 5 + 1];
+	char	   *p = buf;
+	int			j,
+				jlimit;
+
+	jlimit = Min(l, len);
+	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+
+	for (j = 0; j < jlimit; j++)
+	{
+		p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
+		if (j < jlimit - 1)
+			p += sprintf(p, " ");
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+			 errmsg("invalid byte sequence for encoding \"%s\": %s",
+					pg_enc2name_tbl[encoding].name,
+					buf)));
+}
+
+/*
+ * report_untranslatable_char: complain about untranslatable character
+ *
+ * note: len is remaining length of string, not length of character;
+ * len must be greater than zero, as we always examine the first byte.
+ */
+void
+report_untranslatable_char(int src_encoding, int dest_encoding,
+						   const char *mbstr, int len)
+{
+	int			l = pg_encoding_mblen(src_encoding, mbstr);
+	char		buf[8 * 5 + 1];
+	char	   *p = buf;
+	int			j,
+				jlimit;
+
+	jlimit = Min(l, len);
+	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+
+	for (j = 0; j < jlimit; j++)
+	{
+		p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
+		if (j < jlimit - 1)
+			p += sprintf(p, " ");
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+			 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
+					buf,
+					pg_enc2name_tbl[src_encoding].name,
+					pg_enc2name_tbl[dest_encoding].name)));
+}
+
+
+#ifdef WIN32
+/*
+ * Convert from MessageEncoding to a palloc'ed, null-terminated utf16
+ * string. The character length is also passed to utf16len if not
+ * null. Returns NULL iff failed. Before MessageEncoding initialization, "str"
+ * should be ASCII-only; this will function as though MessageEncoding is UTF8.
+ */
+WCHAR *
+pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
+{
+	int			msgenc = GetMessageEncoding();
+	WCHAR	   *utf16;
+	int			dstlen;
+	UINT		codepage;
+
+	if (msgenc == PG_SQL_ASCII)
+		/* No conversion is possible, and SQL_ASCII is never utf16. */
+		return NULL;
+
+	codepage = pg_enc2name_tbl[msgenc].codepage;
+
+	/*
+	 * Use MultiByteToWideChar directly if there is a corresponding codepage,
+	 * or double conversion through UTF8 if not.  Double conversion is needed,
+	 * for example, in an ENCODING=LATIN8, LC_CTYPE=C database.
+	 */
+	if (codepage != 0)
+	{
+		utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
+		dstlen = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
+		utf16[dstlen] = (WCHAR) 0;
+	}
+	else
+	{
+		char	   *utf8;
+
+		/*
+		 * XXX pg_do_encoding_conversion() requires a transaction.  In the
+		 * absence of one, hope for the input to be valid UTF8.
+		 */
+		if (IsTransactionState())
+		{
+			utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
+													  len,
+													  msgenc,
+													  PG_UTF8);
+			if (utf8 != str)
+				len = strlen(utf8);
+		}
+		else
+			utf8 = (char *) str;
+
+		utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
+		dstlen = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
+		utf16[dstlen] = (WCHAR) 0;
+
+		if (utf8 != str)
+			pfree(utf8);
+	}
+
+	if (dstlen == 0 && len > 0)
+	{
+		pfree(utf16);
+		return NULL;			/* error */
+	}
+
+	if (utf16len)
+		*utf16len = dstlen;
+	return utf16;
+}
+
+#endif							/* WIN32 */
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/stringinfo_mb.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/stringinfo_mb.c
new file mode 100644
index 00000000000..67a958d72be
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/stringinfo_mb.c
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * stringinfo_mb.c
+ *		Multibyte encoding-aware additional StringInfo facilities
+ *
+ * This is separate from common/stringinfo.c so that frontend users
+ * of that file need not pull in unnecessary multibyte-encoding support
+ * code.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mb/stringinfo_mb.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "mb/stringinfo_mb.h"
+#include "mb/pg_wchar.h"
+
+
+/*
+ * appendStringInfoStringQuoted
+ *
+ * Append up to maxlen bytes from s to str, or the whole input string if
+ * maxlen < 0, adding single quotes around it and doubling all single quotes.
+ * Add an ellipsis if the copy is incomplete.
+ */
+void
+appendStringInfoStringQuoted(StringInfo str, const char *s, int maxlen)
+{
+	char	   *copy = NULL;
+	const char *chunk_search_start,
+			   *chunk_copy_start,
+			   *chunk_end;
+	int			slen;
+	bool		ellipsis;
+
+	Assert(str != NULL);
+
+	slen = strlen(s);
+	if (maxlen >= 0 && maxlen < slen)
+	{
+		int			finallen = pg_mbcliplen(s, slen, maxlen);
+
+		copy = pnstrdup(s, finallen);
+		chunk_search_start = copy;
+		chunk_copy_start = copy;
+
+		ellipsis = true;
+	}
+	else
+	{
+		chunk_search_start = s;
+		chunk_copy_start = s;
+
+		ellipsis = false;
+	}
+
+	appendStringInfoCharMacro(str, '\'');
+
+	while ((chunk_end = strchr(chunk_search_start, '\'')) != NULL)
+	{
+		/* copy including the found delimiting ' */
+		appendBinaryStringInfoNT(str,
+								 chunk_copy_start,
+								 chunk_end - chunk_copy_start + 1);
+
+		/* in order to double it, include this ' into the next chunk as well */
+		chunk_copy_start = chunk_end;
+		chunk_search_start = chunk_end + 1;
+	}
+
+	/* copy the last chunk and terminate */
+	if (ellipsis)
+		appendStringInfo(str, "%s...'", chunk_copy_start);
+	else
+		appendStringInfo(str, "%s'", chunk_copy_start);
+
+	if (copy)
+		pfree(copy);
+}
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrcmp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrcmp.c
new file mode 100644
index 00000000000..dad3ae023a3
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrcmp.c
@@ -0,0 +1,47 @@
+/*
+ * src/backend/utils/mb/wstrcmp.c
+ *
+ *-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	  notice, this list of conditions and the following disclaimer in the
+ *	  documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *	  may be used to endorse or promote products derived from this software
+ *	  without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* can be used in either frontend or backend */
+#include "postgres_fe.h"
+
+#include "mb/pg_wchar.h"
+
+int
+pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2)
+{
+	while ((pg_wchar) *s1 == *s2++)
+		if (*s1++ == 0)
+			return 0;
+	return *(const unsigned char *) s1 - *(const pg_wchar *) (s2 - 1);
+}
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrncmp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrncmp.c
new file mode 100644
index 00000000000..ea4823fc6f8
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/wstrncmp.c
@@ -0,0 +1,77 @@
+/*
+ * src/backend/utils/mb/wstrncmp.c
+ *
+ *
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from FreeBSD 2.2.1-RELEASE software.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	  notice, this list of conditions and the following disclaimer in the
+ *	  documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *	  may be used to endorse or promote products derived from this software
+ *	  without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* can be used in either frontend or backend */
+#include "postgres_fe.h"
+
+#include "mb/pg_wchar.h"
+
+int
+pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n)
+{
+	if (n == 0)
+		return 0;
+	do
+	{
+		if (*s1 != *s2++)
+			return (*s1 - *(s2 - 1));
+		if (*s1++ == 0)
+			break;
+	} while (--n != 0);
+	return 0;
+}
+
+int
+pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n)
+{
+	if (n == 0)
+		return 0;
+	do
+	{
+		if ((pg_wchar) ((unsigned char) *s1) != *s2++)
+			return ((pg_wchar) ((unsigned char) *s1) - *(s2 - 1));
+		if (*s1++ == 0)
+			break;
+	} while (--n != 0);
+	return 0;
+}
+
+size_t
+pg_wchar_strlen(const pg_wchar *str)
+{
+	const pg_wchar *s;
+
+	for (s = str; *s; ++s)
+		;
+	return (s - str);
+}
author	vvvv <[email protected]>	2024-11-07 12:29:36 +0300
committer	vvvv <[email protected]>	2024-11-07 13:49:47 +0300
commit	d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
tree	b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb
parent	13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)