diff options
| author | vvvv <[email protected]> | 2025-10-24 14:59:50 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2025-10-24 15:29:24 +0300 | 
| commit | 5b0d18921f2a509d8363c40a5ca208dfed026287 (patch) | |
| tree | d1369c696d3a9e9a65b68d9208e198269a48cfbc /yql/essentials/parser/pg_wrapper/postgresql/src/common | |
| parent | e7fbdb6e81ae4a296e710b133de7a2a04b31bbc4 (diff) | |
YQL-20567 upgrade PG up to 16.10 & fix instructions
init
commit_hash:81aba13295273281d19d2d332a48ff1c44977447
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/common')
4 files changed, 113 insertions, 12 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c index 168001b0f3b..35d0e53b398 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c @@ -721,8 +721,11 @@ json_lex_string(JsonLexContext *lex)  	} while (0)  #define FAIL_AT_CHAR_END(code) \  	do { \ -		lex->token_terminator = \ -			s + pg_encoding_mblen_bounded(lex->input_encoding, s); \ +		ptrdiff_t	remaining = end - s; \ +		int			charlen; \ +		charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \ +												  s, remaining); \ +		lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \  		return code; \  	} while (0) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c index 3cf498866a5..e7e909a0c87 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c @@ -21,8 +21,13 @@   */  #ifndef FRONTEND  #include "postgres.h" +#include "utils/memutils.h"  #else  #include "postgres_fe.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize	((Size) 0x3fffffff) /* 1 gigabyte - 1 */ +  #endif  #include "common/saslprep.h" @@ -1077,6 +1082,8 @@ pg_saslprep(const char *input, char **output)  	input_size = pg_utf8_string_len(input);  	if (input_size < 0)  		return SASLPREP_INVALID_UTF8; +	if (input_size >= MaxAllocSize / sizeof(pg_wchar)) +		goto oom;  	input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));  	if (!input_chars) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c index 6448564a08c..38c97ef0b4c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c @@ -74,7 +74,7 @@ scram_SaltedPassword(const char *password,  	memcpy(result, Ui_prev, key_length);  	/* Subsequent iterations */ -	for (i = 2; i <= iterations; i++) +	for (i = 1; i < iterations; i++)  	{  #ifndef FRONTEND  		/* @@ -200,8 +200,7 @@ scram_ServerKey(const uint8 *salted_password,   *   * The password should already have been processed with SASLprep, if necessary!   * - * If iterations is 0, default number of iterations is used.  The result is - * palloc'd or malloc'd, so caller is responsible for freeing it. + * The result is palloc'd or malloc'd, so caller is responsible for freeing it.   *   * On error, returns NULL and sets *errstr to point to a message about the   * error details. diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c index fbac11deb4d..82ea3a4e834 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c @@ -12,11 +12,32 @@   */  #include "c.h" +#include <limits.h> +  #include "mb/pg_wchar.h"  #include "utils/ascii.h"  /* + * In today's multibyte encodings other than UTF8, this two-byte sequence + * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0. + * + * For historical reasons, several verifychar implementations opt to reject + * this pair specifically.  Byte pair range constraints, in encoding + * originator documentation, always excluded this pair.  No core conversion + * could translate it.  However, longstanding verifychar implementations + * accepted any non-NUL byte.  big5_to_euc_tw and big5_to_mic even translate + * pairs not valid per encoding originator documentation.  To avoid tightening + * core or non-core conversions in a security patch, we sought this one pair. + * + * PQescapeString() historically used spaces for BYTE1; many other values + * could suffice for BYTE1. + */ +#define NONUTF8_INVALID_BYTE0 (0x8d) +#define NONUTF8_INVALID_BYTE1 (' ') + + +/*   * Operations on multi-byte encodings are driven by a table of helper   * functions.   * @@ -1526,6 +1547,11 @@ pg_big5_verifychar(const unsigned char *s, int len)  	if (len < l)  		return -1; +	if (l == 2 && +		s[0] == NONUTF8_INVALID_BYTE0 && +		s[1] == NONUTF8_INVALID_BYTE1) +		return -1; +  	while (--l > 0)  	{  		if (*++s == '\0') @@ -1575,6 +1601,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)  	if (len < l)  		return -1; +	if (l == 2 && +		s[0] == NONUTF8_INVALID_BYTE0 && +		s[1] == NONUTF8_INVALID_BYTE1) +		return -1; +  	while (--l > 0)  	{  		if (*++s == '\0') @@ -1624,6 +1655,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)  	if (len < l)  		return -1; +	if (l == 2 && +		s[0] == NONUTF8_INVALID_BYTE0 && +		s[1] == NONUTF8_INVALID_BYTE1) +		return -1; +  	while (--l > 0)  	{  		if (*++s == '\0') @@ -2069,6 +2105,19 @@ pg_utf8_islegal(const unsigned char *source, int length)  /* + * Fills the provided buffer with two bytes such that: + *   pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0 + */ +void +pg_encoding_set_invalid(int encoding, char *dst) +{ +	Assert(pg_encoding_max_length(encoding) > 1); + +	dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0); +	dst[1] = NONUTF8_INVALID_BYTE1; +} + +/*   *-------------------------------------------------------------------   * encoding info table   * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) @@ -2122,10 +2171,27 @@ const pg_wchar_tbl pg_wchar_table[] = {  /*   * Returns the byte length of a multibyte character.   * - * Caution: when dealing with text that is not certainly valid in the - * specified encoding, the result may exceed the actual remaining - * string length.  Callers that are not prepared to deal with that - * should use pg_encoding_mblen_bounded() instead. + * Choose "mblen" functions based on the input string characteristics. + * pg_encoding_mblen() can be used when ANY of these conditions are met: + * + * - The input string is zero-terminated + * + * - The input string is known to be valid in the encoding (e.g., string + *   converted from database encoding) + * + * - The encoding is not GB18030 (e.g., when only database encodings are + *   passed to 'encoding' parameter) + * + * encoding==GB18030 requires examining up to two bytes to determine character + * length.  Therefore, callers satisfying none of those conditions must use + * pg_encoding_mblen_or_incomplete() instead, as access to mbstr[1] cannot be + * guaranteed to be within allocation bounds. + * + * When dealing with text that is not certainly valid in the specified + * encoding, the result may exceed the actual remaining string length. + * Callers that are not prepared to deal with that should use Min(remaining, + * pg_encoding_mblen_or_incomplete()).  For zero-terminated strings, that and + * pg_encoding_mblen_bounded() are interchangeable.   */  int  pg_encoding_mblen(int encoding, const char *mbstr) @@ -2136,8 +2202,28 @@ pg_encoding_mblen(int encoding, const char *mbstr)  }  /* - * Returns the byte length of a multibyte character; but not more than - * the distance to end of string. + * Returns the byte length of a multibyte character (possibly not + * zero-terminated), or INT_MAX if too few bytes remain to determine a length. + */ +int +pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, +								size_t remaining) +{ +	/* +	 * Define zero remaining as too few, even for single-byte encodings. +	 * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read +	 * zero; others read one. +	 */ +	if (remaining < 1 || +		(encoding == PG_GB18030 && IS_HIGHBIT_SET(*mbstr) && remaining < 2)) +		return INT_MAX; +	return pg_encoding_mblen(encoding, mbstr); +} + +/* + * Returns the byte length of a multibyte character; but not more than the + * distance to the terminating zero byte.  For input that might lack a + * terminating zero, use Min(remaining, pg_encoding_mblen_or_incomplete()).   */  int  pg_encoding_mblen_bounded(int encoding, const char *mbstr) @@ -2190,5 +2276,11 @@ pg_encoding_max_length(int encoding)  {  	Assert(PG_VALID_ENCODING(encoding)); -	return pg_wchar_table[encoding].maxmblen; +	/* +	 * Check for the encoding despite the assert, due to some mingw versions +	 * otherwise issuing bogus warnings. +	 */ +	return PG_VALID_ENCODING(encoding) ? +		pg_wchar_table[encoding].maxmblen : +		pg_wchar_table[PG_SQL_ASCII].maxmblen;  }  | 
