diff options
| author | vvvv <[email protected]> | 2024-11-07 12:29:36 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2024-11-07 13:49:47 +0300 | 
| commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
| tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/common | |
| parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) | |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/common')
46 files changed, 17968 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/archive.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/archive.c new file mode 100644 index 00000000000..641a58ee888 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/archive.c @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------- + * + * archive.c + *	  Common WAL archive routines + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/archive.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/archive.h" +#include "common/percentrepl.h" + +/* + * BuildRestoreCommand + * + * Builds a restore command to retrieve a file from WAL archives, replacing + * the supported aliases with values supplied by the caller as defined by + * the GUC parameter restore_command: xlogpath for %p, xlogfname for %f and + * lastRestartPointFname for %r. + * + * The result is a palloc'd string for the restore command built.  The + * caller is responsible for freeing it.  If any of the required arguments + * is NULL and that the corresponding alias is found in the command given + * by the caller, then an error is thrown. + */ +char * +BuildRestoreCommand(const char *restoreCommand, +					const char *xlogpath, +					const char *xlogfname, +					const char *lastRestartPointFname) +{ +	char	   *nativePath = NULL; +	char	   *result; + +	if (xlogpath) +	{ +		nativePath = pstrdup(xlogpath); +		make_native_path(nativePath); +	} + +	result = replace_percent_placeholders(restoreCommand, "restore_command", "frp", +										  xlogfname, lastRestartPointFname, nativePath); + +	if (nativePath) +		pfree(nativePath); + +	return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/base64.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/base64.c new file mode 100644 index 00000000000..ec4eb49382c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/base64.c @@ -0,0 +1,242 @@ +/*------------------------------------------------------------------------- + * + * base64.c + *	  Encoding and decoding routines for base64 without whitespace. + * + * Copyright (c) 2001-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + *	  src/common/base64.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/base64.h" + +/* + * BASE64 + */ + +static const char _base64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static const int8 b64lookup[128] = { +	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, +	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, +	-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, +	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, +	-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, +	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, +}; + +/* + * pg_b64_encode + * + * Encode into base64 the given string.  Returns the length of the encoded + * string, and -1 in the event of an error with the result buffer zeroed + * for safety. + */ +int +pg_b64_encode(const char *src, int len, char *dst, int dstlen) +{ +	char	   *p; +	const char *s, +			   *end = src + len; +	int			pos = 2; +	uint32		buf = 0; + +	s = src; +	p = dst; + +	while (s < end) +	{ +		buf |= (unsigned char) *s << (pos << 3); +		pos--; +		s++; + +		/* write it out */ +		if (pos < 0) +		{ +			/* +			 * Leave if there is an overflow in the area allocated for the +			 * encoded string. +			 */ +			if ((p - dst + 4) > dstlen) +				goto error; + +			*p++ = _base64[(buf >> 18) & 0x3f]; +			*p++ = _base64[(buf >> 12) & 0x3f]; +			*p++ = _base64[(buf >> 6) & 0x3f]; +			*p++ = _base64[buf & 0x3f]; + +			pos = 2; +			buf = 0; +		} +	} +	if (pos != 2) +	{ +		/* +		 * Leave if there is an overflow in the area allocated for the encoded +		 * string. +		 */ +		if ((p - dst + 4) > dstlen) +			goto error; + +		*p++ = _base64[(buf >> 18) & 0x3f]; +		*p++ = _base64[(buf >> 12) & 0x3f]; +		*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; +		*p++ = '='; +	} + +	Assert((p - dst) <= dstlen); +	return p - dst; + +error: +	memset(dst, 0, dstlen); +	return -1; +} + +/* + * pg_b64_decode + * + * Decode the given base64 string.  Returns the length of the decoded + * string on success, and -1 in the event of an error with the result + * buffer zeroed for safety. + */ +int +pg_b64_decode(const char *src, int len, char *dst, int dstlen) +{ +	const char *srcend = src + len, +			   *s = src; +	char	   *p = dst; +	char		c; +	int			b = 0; +	uint32		buf = 0; +	int			pos = 0, +				end = 0; + +	while (s < srcend) +	{ +		c = *s++; + +		/* Leave if a whitespace is found */ +		if (c == ' ' || c == '\t' || c == '\n' || c == '\r') +			goto error; + +		if (c == '=') +		{ +			/* end sequence */ +			if (!end) +			{ +				if (pos == 2) +					end = 1; +				else if (pos == 3) +					end = 2; +				else +				{ +					/* +					 * Unexpected "=" character found while decoding base64 +					 * sequence. +					 */ +					goto error; +				} +			} +			b = 0; +		} +		else +		{ +			b = -1; +			if (c > 0 && c < 127) +				b = b64lookup[(unsigned char) c]; +			if (b < 0) +			{ +				/* invalid symbol found */ +				goto error; +			} +		} +		/* add it to buffer */ +		buf = (buf << 6) + b; +		pos++; +		if (pos == 4) +		{ +			/* +			 * Leave if there is an overflow in the area allocated for the +			 * decoded string. +			 */ +			if ((p - dst + 1) > dstlen) +				goto error; +			*p++ = (buf >> 16) & 255; + +			if (end == 0 || end > 1) +			{ +				/* overflow check */ +				if ((p - dst + 1) > dstlen) +					goto error; +				*p++ = (buf >> 8) & 255; +			} +			if (end == 0 || end > 2) +			{ +				/* overflow check */ +				if ((p - dst + 1) > dstlen) +					goto error; +				*p++ = buf & 255; +			} +			buf = 0; +			pos = 0; +		} +	} + +	if (pos != 0) +	{ +		/* +		 * base64 end sequence is invalid.  Input data is missing padding, is +		 * truncated or is otherwise corrupted. +		 */ +		goto error; +	} + +	Assert((p - dst) <= dstlen); +	return p - dst; + +error: +	memset(dst, 0, dstlen); +	return -1; +} + +/* + * pg_b64_enc_len + * + * Returns to caller the length of the string if it were encoded with + * base64 based on the length provided by caller.  This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual encoding. + */ +int +pg_b64_enc_len(int srclen) +{ +	/* 3 bytes will be converted to 4 */ +	return (srclen + 2) / 3 * 4; +} + +/* + * pg_b64_dec_len + * + * Returns to caller the length of the string if it were to be decoded + * with base64, based on the length given by caller.  This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual decoding. + */ +int +pg_b64_dec_len(int srclen) +{ +	return (srclen * 3) >> 2; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/checksum_helper.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/checksum_helper.c new file mode 100644 index 00000000000..21ff8954fd8 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/checksum_helper.c @@ -0,0 +1,232 @@ +/*------------------------------------------------------------------------- + * + * checksum_helper.c + *	  Compute a checksum of any of various types using common routines + * + * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *		  src/common/checksum_helper.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/checksum_helper.h" + +/* + * If 'name' is a recognized checksum type, set *type to the corresponding + * constant and return true. Otherwise, set *type to CHECKSUM_TYPE_NONE and + * return false. + */ +bool +pg_checksum_parse_type(char *name, pg_checksum_type *type) +{ +	pg_checksum_type result_type = CHECKSUM_TYPE_NONE; +	bool		result = true; + +	if (pg_strcasecmp(name, "none") == 0) +		result_type = CHECKSUM_TYPE_NONE; +	else if (pg_strcasecmp(name, "crc32c") == 0) +		result_type = CHECKSUM_TYPE_CRC32C; +	else if (pg_strcasecmp(name, "sha224") == 0) +		result_type = CHECKSUM_TYPE_SHA224; +	else if (pg_strcasecmp(name, "sha256") == 0) +		result_type = CHECKSUM_TYPE_SHA256; +	else if (pg_strcasecmp(name, "sha384") == 0) +		result_type = CHECKSUM_TYPE_SHA384; +	else if (pg_strcasecmp(name, "sha512") == 0) +		result_type = CHECKSUM_TYPE_SHA512; +	else +		result = false; + +	*type = result_type; +	return result; +} + +/* + * Get the canonical human-readable name corresponding to a checksum type. + */ +char * +pg_checksum_type_name(pg_checksum_type type) +{ +	switch (type) +	{ +		case CHECKSUM_TYPE_NONE: +			return "NONE"; +		case CHECKSUM_TYPE_CRC32C: +			return "CRC32C"; +		case CHECKSUM_TYPE_SHA224: +			return "SHA224"; +		case CHECKSUM_TYPE_SHA256: +			return "SHA256"; +		case CHECKSUM_TYPE_SHA384: +			return "SHA384"; +		case CHECKSUM_TYPE_SHA512: +			return "SHA512"; +	} + +	Assert(false); +	return "???"; +} + +/* + * Initialize a checksum context for checksums of the given type. + * Returns 0 for a success, -1 for a failure. + */ +int +pg_checksum_init(pg_checksum_context *context, pg_checksum_type type) +{ +	context->type = type; + +	switch (type) +	{ +		case CHECKSUM_TYPE_NONE: +			/* do nothing */ +			break; +		case CHECKSUM_TYPE_CRC32C: +			INIT_CRC32C(context->raw_context.c_crc32c); +			break; +		case CHECKSUM_TYPE_SHA224: +			context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA224); +			if (context->raw_context.c_sha2 == NULL) +				return -1; +			if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) +			{ +				pg_cryptohash_free(context->raw_context.c_sha2); +				return -1; +			} +			break; +		case CHECKSUM_TYPE_SHA256: +			context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA256); +			if (context->raw_context.c_sha2 == NULL) +				return -1; +			if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) +			{ +				pg_cryptohash_free(context->raw_context.c_sha2); +				return -1; +			} +			break; +		case CHECKSUM_TYPE_SHA384: +			context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA384); +			if (context->raw_context.c_sha2 == NULL) +				return -1; +			if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) +			{ +				pg_cryptohash_free(context->raw_context.c_sha2); +				return -1; +			} +			break; +		case CHECKSUM_TYPE_SHA512: +			context->raw_context.c_sha2 = pg_cryptohash_create(PG_SHA512); +			if (context->raw_context.c_sha2 == NULL) +				return -1; +			if (pg_cryptohash_init(context->raw_context.c_sha2) < 0) +			{ +				pg_cryptohash_free(context->raw_context.c_sha2); +				return -1; +			} +			break; +	} + +	return 0; +} + +/* + * Update a checksum context with new data. + * Returns 0 for a success, -1 for a failure. + */ +int +pg_checksum_update(pg_checksum_context *context, const uint8 *input, +				   size_t len) +{ +	switch (context->type) +	{ +		case CHECKSUM_TYPE_NONE: +			/* do nothing */ +			break; +		case CHECKSUM_TYPE_CRC32C: +			COMP_CRC32C(context->raw_context.c_crc32c, input, len); +			break; +		case CHECKSUM_TYPE_SHA224: +		case CHECKSUM_TYPE_SHA256: +		case CHECKSUM_TYPE_SHA384: +		case CHECKSUM_TYPE_SHA512: +			if (pg_cryptohash_update(context->raw_context.c_sha2, input, len) < 0) +				return -1; +			break; +	} + +	return 0; +} + +/* + * Finalize a checksum computation and write the result to an output buffer. + * + * The caller must ensure that the buffer is at least PG_CHECKSUM_MAX_LENGTH + * bytes in length. The return value is the number of bytes actually written, + * or -1 for a failure. + */ +int +pg_checksum_final(pg_checksum_context *context, uint8 *output) +{ +	int			retval = 0; + +	StaticAssertDecl(sizeof(pg_crc32c) <= PG_CHECKSUM_MAX_LENGTH, +					 "CRC-32C digest too big for PG_CHECKSUM_MAX_LENGTH"); +	StaticAssertDecl(PG_SHA224_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, +					 "SHA224 digest too big for PG_CHECKSUM_MAX_LENGTH"); +	StaticAssertDecl(PG_SHA256_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, +					 "SHA256 digest too big for PG_CHECKSUM_MAX_LENGTH"); +	StaticAssertDecl(PG_SHA384_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, +					 "SHA384 digest too big for PG_CHECKSUM_MAX_LENGTH"); +	StaticAssertDecl(PG_SHA512_DIGEST_LENGTH <= PG_CHECKSUM_MAX_LENGTH, +					 "SHA512 digest too big for PG_CHECKSUM_MAX_LENGTH"); + +	switch (context->type) +	{ +		case CHECKSUM_TYPE_NONE: +			break; +		case CHECKSUM_TYPE_CRC32C: +			FIN_CRC32C(context->raw_context.c_crc32c); +			retval = sizeof(pg_crc32c); +			memcpy(output, &context->raw_context.c_crc32c, retval); +			break; +		case CHECKSUM_TYPE_SHA224: +			retval = PG_SHA224_DIGEST_LENGTH; +			if (pg_cryptohash_final(context->raw_context.c_sha2, +									output, retval) < 0) +				return -1; +			pg_cryptohash_free(context->raw_context.c_sha2); +			break; +		case CHECKSUM_TYPE_SHA256: +			retval = PG_SHA256_DIGEST_LENGTH; +			if (pg_cryptohash_final(context->raw_context.c_sha2, +									output, retval) < 0) +				return -1; +			pg_cryptohash_free(context->raw_context.c_sha2); +			break; +		case CHECKSUM_TYPE_SHA384: +			retval = PG_SHA384_DIGEST_LENGTH; +			if (pg_cryptohash_final(context->raw_context.c_sha2, +									output, retval) < 0) +				return -1; +			pg_cryptohash_free(context->raw_context.c_sha2); +			break; +		case CHECKSUM_TYPE_SHA512: +			retval = PG_SHA512_DIGEST_LENGTH; +			if (pg_cryptohash_final(context->raw_context.c_sha2, +									output, retval) < 0) +				return -1; +			pg_cryptohash_free(context->raw_context.c_sha2); +			break; +	} + +	Assert(retval <= PG_CHECKSUM_MAX_LENGTH); +	return retval; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c new file mode 100644 index 00000000000..47b18b8c600 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c @@ -0,0 +1,476 @@ +/*------------------------------------------------------------------------- + * + * compression.c + * + * Shared code for compression methods and specifications. + * + * A compression specification specifies the parameters that should be used + * when performing compression with a specific algorithm. The simplest + * possible compression specification is an integer, which sets the + * compression level. + * + * Otherwise, a compression specification is a comma-separated list of items, + * each having the form keyword or keyword=value. + * + * Currently, the supported keywords are "level", "long", and "workers". + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *		  src/common/compression.c + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#ifdef USE_ZSTD +#error #include <zstd.h> +#endif +#ifdef HAVE_LIBZ +#include <zlib.h> +#endif + +#include "common/compression.h" + +static int	expect_integer_value(char *keyword, char *value, +								 pg_compress_specification *result); +static bool expect_boolean_value(char *keyword, char *value, +								 pg_compress_specification *result); + +/* + * Look up a compression algorithm by name. Returns true and sets *algorithm + * if the name is recognized. Otherwise returns false. + */ +bool +parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm) +{ +	if (strcmp(name, "none") == 0) +		*algorithm = PG_COMPRESSION_NONE; +	else if (strcmp(name, "gzip") == 0) +		*algorithm = PG_COMPRESSION_GZIP; +	else if (strcmp(name, "lz4") == 0) +		*algorithm = PG_COMPRESSION_LZ4; +	else if (strcmp(name, "zstd") == 0) +		*algorithm = PG_COMPRESSION_ZSTD; +	else +		return false; +	return true; +} + +/* + * Get the human-readable name corresponding to a particular compression + * algorithm. + */ +const char * +get_compress_algorithm_name(pg_compress_algorithm algorithm) +{ +	switch (algorithm) +	{ +		case PG_COMPRESSION_NONE: +			return "none"; +		case PG_COMPRESSION_GZIP: +			return "gzip"; +		case PG_COMPRESSION_LZ4: +			return "lz4"; +		case PG_COMPRESSION_ZSTD: +			return "zstd"; +			/* no default, to provoke compiler warnings if values are added */ +	} +	Assert(false); +	return "???";				/* placate compiler */ +} + +/* + * Parse a compression specification for a specified algorithm. + * + * See the file header comments for a brief description of what a compression + * specification is expected to look like. + * + * On return, all fields of the result object will be initialized. + * In particular, result->parse_error will be NULL if no errors occurred + * during parsing, and will otherwise contain an appropriate error message. + * The caller may free this error message string using pfree, if desired. + * Note, however, even if there's no parse error, the string might not make + * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK. + * + * The compression level is assigned by default if not directly specified + * by the specification. + * + * Use validate_compress_specification() to find out whether a compression + * specification is semantically sensible. + */ +void +parse_compress_specification(pg_compress_algorithm algorithm, char *specification, +							 pg_compress_specification *result) +{ +	int			bare_level; +	char	   *bare_level_endp; + +	/* Initial setup of result object. */ +	result->algorithm = algorithm; +	result->options = 0; +	result->parse_error = NULL; + +	/* +	 * Assign a default level depending on the compression method.  This may +	 * be enforced later. +	 */ +	switch (result->algorithm) +	{ +		case PG_COMPRESSION_NONE: +			result->level = 0; +			break; +		case PG_COMPRESSION_LZ4: +#ifdef USE_LZ4 +			result->level = 0;	/* fast compression mode */ +#else +			result->parse_error = +				psprintf(_("this build does not support compression with %s"), +						 "LZ4"); +#endif +			break; +		case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD +			result->level = ZSTD_CLEVEL_DEFAULT; +#else +			result->parse_error = +				psprintf(_("this build does not support compression with %s"), +						 "ZSTD"); +#endif +			break; +		case PG_COMPRESSION_GZIP: +#ifdef HAVE_LIBZ +			result->level = Z_DEFAULT_COMPRESSION; +#else +			result->parse_error = +				psprintf(_("this build does not support compression with %s"), +						 "gzip"); +#endif +			break; +	} + +	/* If there is no specification, we're done already. */ +	if (specification == NULL) +		return; + +	/* As a special case, the specification can be a bare integer. */ +	bare_level = strtol(specification, &bare_level_endp, 10); +	if (specification != bare_level_endp && *bare_level_endp == '\0') +	{ +		result->level = bare_level; +		return; +	} + +	/* Look for comma-separated keyword or keyword=value entries. */ +	while (1) +	{ +		char	   *kwstart; +		char	   *kwend; +		char	   *vstart; +		char	   *vend; +		int			kwlen; +		int			vlen; +		bool		has_value; +		char	   *keyword; +		char	   *value; + +		/* Figure start, end, and length of next keyword and any value. */ +		kwstart = kwend = specification; +		while (*kwend != '\0' && *kwend != ',' && *kwend != '=') +			++kwend; +		kwlen = kwend - kwstart; +		if (*kwend != '=') +		{ +			vstart = vend = NULL; +			vlen = 0; +			has_value = false; +		} +		else +		{ +			vstart = vend = kwend + 1; +			while (*vend != '\0' && *vend != ',') +				++vend; +			vlen = vend - vstart; +			has_value = true; +		} + +		/* Reject empty keyword. */ +		if (kwlen == 0) +		{ +			result->parse_error = +				pstrdup(_("found empty string where a compression option was expected")); +			break; +		} + +		/* Extract keyword and value as separate C strings. */ +		keyword = palloc(kwlen + 1); +		memcpy(keyword, kwstart, kwlen); +		keyword[kwlen] = '\0'; +		if (!has_value) +			value = NULL; +		else +		{ +			value = palloc(vlen + 1); +			memcpy(value, vstart, vlen); +			value[vlen] = '\0'; +		} + +		/* Handle whatever keyword we found. */ +		if (strcmp(keyword, "level") == 0) +		{ +			result->level = expect_integer_value(keyword, value, result); + +			/* +			 * No need to set a flag in "options", there is a default level +			 * set at least thanks to the logic above. +			 */ +		} +		else if (strcmp(keyword, "workers") == 0) +		{ +			result->workers = expect_integer_value(keyword, value, result); +			result->options |= PG_COMPRESSION_OPTION_WORKERS; +		} +		else if (strcmp(keyword, "long") == 0) +		{ +			result->long_distance = expect_boolean_value(keyword, value, result); +			result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE; +		} +		else +			result->parse_error = +				psprintf(_("unrecognized compression option: \"%s\""), keyword); + +		/* Release memory, just to be tidy. */ +		pfree(keyword); +		if (value != NULL) +			pfree(value); + +		/* +		 * If we got an error or have reached the end of the string, stop. +		 * +		 * If there is no value, then the end of the keyword might have been +		 * the end of the string. If there is a value, then the end of the +		 * keyword cannot have been the end of the string, but the end of the +		 * value might have been. +		 */ +		if (result->parse_error != NULL || +			(vend == NULL ? *kwend == '\0' : *vend == '\0')) +			break; + +		/* Advance to next entry and loop around. */ +		specification = vend == NULL ? kwend + 1 : vend + 1; +	} +} + +/* + * Parse 'value' as an integer and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1. + */ +static int +expect_integer_value(char *keyword, char *value, pg_compress_specification *result) +{ +	int			ivalue; +	char	   *ivalue_endp; + +	if (value == NULL) +	{ +		result->parse_error = +			psprintf(_("compression option \"%s\" requires a value"), +					 keyword); +		return -1; +	} + +	ivalue = strtol(value, &ivalue_endp, 10); +	if (ivalue_endp == value || *ivalue_endp != '\0') +	{ +		result->parse_error = +			psprintf(_("value for compression option \"%s\" must be an integer"), +					 keyword); +		return -1; +	} +	return ivalue; +} + +/* + * Parse 'value' as a boolean and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1.  The caller must check result->parse_error to determine if + * the call was successful. + * + * Valid values are: yes, no, on, off, 1, 0. + * + * Inspired by ParseVariableBool(). + */ +static bool +expect_boolean_value(char *keyword, char *value, pg_compress_specification *result) +{ +	if (value == NULL) +		return true; + +	if (pg_strcasecmp(value, "yes") == 0) +		return true; +	if (pg_strcasecmp(value, "on") == 0) +		return true; +	if (pg_strcasecmp(value, "1") == 0) +		return true; + +	if (pg_strcasecmp(value, "no") == 0) +		return false; +	if (pg_strcasecmp(value, "off") == 0) +		return false; +	if (pg_strcasecmp(value, "0") == 0) +		return false; + +	result->parse_error = +		psprintf(_("value for compression option \"%s\" must be a Boolean value"), +				 keyword); +	return false; +} + +/* + * Returns NULL if the compression specification string was syntactically + * valid and semantically sensible.  Otherwise, returns an error message. + * + * Does not test whether this build of PostgreSQL supports the requested + * compression method. + */ +char * +validate_compress_specification(pg_compress_specification *spec) +{ +	int			min_level = 1; +	int			max_level = 1; +	int			default_level = 0; + +	/* If it didn't even parse OK, it's definitely no good. */ +	if (spec->parse_error != NULL) +		return spec->parse_error; + +	/* +	 * Check that the algorithm expects a compression level and it is within +	 * the legal range for the algorithm. +	 */ +	switch (spec->algorithm) +	{ +		case PG_COMPRESSION_GZIP: +			max_level = 9; +#ifdef HAVE_LIBZ +			default_level = Z_DEFAULT_COMPRESSION; +#endif +			break; +		case PG_COMPRESSION_LZ4: +			max_level = 12; +			default_level = 0;	/* fast mode */ +			break; +		case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD +			max_level = ZSTD_maxCLevel(); +			min_level = ZSTD_minCLevel(); +			default_level = ZSTD_CLEVEL_DEFAULT; +#endif +			break; +		case PG_COMPRESSION_NONE: +			if (spec->level != 0) +				return psprintf(_("compression algorithm \"%s\" does not accept a compression level"), +								get_compress_algorithm_name(spec->algorithm)); +			break; +	} + +	if ((spec->level < min_level || spec->level > max_level) && +		spec->level != default_level) +		return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"), +						get_compress_algorithm_name(spec->algorithm), +						min_level, max_level, default_level); + +	/* +	 * Of the compression algorithms that we currently support, only zstd +	 * allows parallel workers. +	 */ +	if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 && +		(spec->algorithm != PG_COMPRESSION_ZSTD)) +	{ +		return psprintf(_("compression algorithm \"%s\" does not accept a worker count"), +						get_compress_algorithm_name(spec->algorithm)); +	} + +	/* +	 * Of the compression algorithms that we currently support, only zstd +	 * supports long-distance mode. +	 */ +	if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 && +		(spec->algorithm != PG_COMPRESSION_ZSTD)) +	{ +		return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"), +						get_compress_algorithm_name(spec->algorithm)); +	} + +	return NULL; +} + +#ifdef FRONTEND + +/* + * Basic parsing of a value specified through a command-line option, commonly + * -Z/--compress. + * + * The parsing consists of a METHOD:DETAIL string fed later to + * parse_compress_specification().  This only extracts METHOD and DETAIL. + * If only an integer is found, the method is implied by the value specified. + */ +void +parse_compress_options(const char *option, char **algorithm, char **detail) +{ +	char	   *sep; +	char	   *endp; +	long		result; + +	/* +	 * Check whether the compression specification consists of a bare integer. +	 * +	 * For backward-compatibility, assume "none" if the integer found is zero +	 * and "gzip" otherwise. +	 */ +	result = strtol(option, &endp, 10); +	if (*endp == '\0') +	{ +		if (result == 0) +		{ +			*algorithm = pstrdup("none"); +			*detail = NULL; +		} +		else +		{ +			*algorithm = pstrdup("gzip"); +			*detail = pstrdup(option); +		} +		return; +	} + +	/* +	 * Check whether there is a compression detail following the algorithm +	 * name. +	 */ +	sep = strchr(option, ':'); +	if (sep == NULL) +	{ +		*algorithm = pstrdup(option); +		*detail = NULL; +	} +	else +	{ +		char	   *alg; + +		alg = palloc((sep - option) + 1); +		memcpy(alg, option, sep - option); +		alg[sep - option] = '\0'; + +		*algorithm = alg; +		*detail = pstrdup(sep + 1); +	} +} +#endif							/* FRONTEND */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/config_info.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/config_info.c new file mode 100644 index 00000000000..b4a6d6ca911 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/config_info.c @@ -0,0 +1,204 @@ +/*------------------------------------------------------------------------- + * + * config_info.c + *		Common code for pg_config output + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/config_info.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/config_info.h" + + +/* + * get_configdata(const char *my_exec_path, size_t *configdata_len) + * + * Get configure-time constants. The caller is responsible + * for pfreeing the result. + */ +ConfigData * +get_configdata(const char *my_exec_path, size_t *configdata_len) +{ +	ConfigData *configdata; +	char		path[MAXPGPATH]; +	char	   *lastsep; +	int			i = 0; + +	/* Adjust this to match the number of items filled below */ +	*configdata_len = 23; +	configdata = palloc_array(ConfigData, *configdata_len); + +	configdata[i].name = pstrdup("BINDIR"); +	strlcpy(path, my_exec_path, sizeof(path)); +	lastsep = strrchr(path, '/'); +	if (lastsep) +		*lastsep = '\0'; +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("DOCDIR"); +	get_doc_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("HTMLDIR"); +	get_html_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("INCLUDEDIR"); +	get_include_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("PKGINCLUDEDIR"); +	get_pkginclude_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("INCLUDEDIR-SERVER"); +	get_includeserver_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("LIBDIR"); +	get_lib_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("PKGLIBDIR"); +	get_pkglib_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("LOCALEDIR"); +	get_locale_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("MANDIR"); +	get_man_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("SHAREDIR"); +	get_share_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("SYSCONFDIR"); +	get_etc_path(my_exec_path, path); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("PGXS"); +	strlcpy(path, "/var/empty/tmp/out/lib", sizeof(path)); +/* commented out to be able to point to nix $out path +	get_pkglib_path(my_exec_path, path); +*/ +	strlcat(path, "/pgxs/src/makefiles/pgxs.mk", sizeof(path)); +	cleanup_path(path); +	configdata[i].setting = pstrdup(path); +	i++; + +	configdata[i].name = pstrdup("CONFIGURE"); +	configdata[i].setting = pstrdup(CONFIGURE_ARGS); +	i++; + +	configdata[i].name = pstrdup("CC"); +#ifdef VAL_CC +	configdata[i].setting = pstrdup(VAL_CC); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("CPPFLAGS"); +#ifdef VAL_CPPFLAGS +	configdata[i].setting = pstrdup(VAL_CPPFLAGS); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("CFLAGS"); +#ifdef VAL_CFLAGS +	configdata[i].setting = pstrdup(VAL_CFLAGS); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("CFLAGS_SL"); +#ifdef VAL_CFLAGS_SL +	configdata[i].setting = pstrdup(VAL_CFLAGS_SL); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("LDFLAGS"); +#ifdef VAL_LDFLAGS +	configdata[i].setting = pstrdup(VAL_LDFLAGS); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("LDFLAGS_EX"); +#ifdef VAL_LDFLAGS_EX +	configdata[i].setting = pstrdup(VAL_LDFLAGS_EX); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("LDFLAGS_SL"); +#ifdef VAL_LDFLAGS_SL +	configdata[i].setting = pstrdup(VAL_LDFLAGS_SL); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("LIBS"); +#ifdef VAL_LIBS +	configdata[i].setting = pstrdup(VAL_LIBS); +#else +	configdata[i].setting = pstrdup(_("not recorded")); +#endif +	i++; + +	configdata[i].name = pstrdup("VERSION"); +	configdata[i].setting = pstrdup("PostgreSQL " PG_VERSION); +	i++; + +	Assert(i == *configdata_len); + +	return configdata; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/controldata_utils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/controldata_utils.c new file mode 100644 index 00000000000..70a64f36471 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/controldata_utils.c @@ -0,0 +1,269 @@ +/*------------------------------------------------------------------------- + * + * controldata_utils.c + *		Common code for control data file output. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/controldata_utils.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "access/xlog_internal.h" +#include "catalog/pg_control.h" +#include "common/controldata_utils.h" +#include "common/file_perm.h" +#ifdef FRONTEND +#error #include "common/logging.h" +#endif +#include "port/pg_crc32c.h" + +#ifndef FRONTEND +#include "pgstat.h" +#include "storage/fd.h" +#endif + +/* + * get_controlfile() + * + * Get controlfile values.  The result is returned as a palloc'd copy of the + * control file data. + * + * crc_ok_p can be used by the caller to see whether the CRC of the control + * file data is correct. + */ +ControlFileData * +get_controlfile(const char *DataDir, bool *crc_ok_p) +{ +	ControlFileData *ControlFile; +	int			fd; +	char		ControlFilePath[MAXPGPATH]; +	pg_crc32c	crc; +	int			r; +#ifdef FRONTEND +	pg_crc32c	last_crc; +	int			retries = 0; +#endif + +	Assert(crc_ok_p); + +	ControlFile = palloc_object(ControlFileData); +	snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); + +#ifdef FRONTEND +	INIT_CRC32C(last_crc); + +retry: +#endif + +#ifndef FRONTEND +	if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not open file \"%s\" for reading: %m", +						ControlFilePath))); +#else +	if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1) +		pg_fatal("could not open file \"%s\" for reading: %m", +				 ControlFilePath); +#endif + +	r = read(fd, ControlFile, sizeof(ControlFileData)); +	if (r != sizeof(ControlFileData)) +	{ +		if (r < 0) +#ifndef FRONTEND +			ereport(ERROR, +					(errcode_for_file_access(), +					 errmsg("could not read file \"%s\": %m", ControlFilePath))); +#else +			pg_fatal("could not read file \"%s\": %m", ControlFilePath); +#endif +		else +#ifndef FRONTEND +			ereport(ERROR, +					(errcode(ERRCODE_DATA_CORRUPTED), +					 errmsg("could not read file \"%s\": read %d of %zu", +							ControlFilePath, r, sizeof(ControlFileData)))); +#else +			pg_fatal("could not read file \"%s\": read %d of %zu", +					 ControlFilePath, r, sizeof(ControlFileData)); +#endif +	} + +#ifndef FRONTEND +	if (CloseTransientFile(fd) != 0) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not close file \"%s\": %m", +						ControlFilePath))); +#else +	if (close(fd) != 0) +		pg_fatal("could not close file \"%s\": %m", ControlFilePath); +#endif + +	/* Check the CRC. */ +	INIT_CRC32C(crc); +	COMP_CRC32C(crc, +				(char *) ControlFile, +				offsetof(ControlFileData, crc)); +	FIN_CRC32C(crc); + +	*crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); + +#ifdef FRONTEND + +	/* +	 * If the server was writing at the same time, it is possible that we read +	 * partially updated contents on some systems.  If the CRC doesn't match, +	 * retry a limited number of times until we compute the same bad CRC twice +	 * in a row with a short sleep in between.  Then the failure is unlikely +	 * to be due to a concurrent write. +	 */ +	if (!*crc_ok_p && +		(retries == 0 || !EQ_CRC32C(crc, last_crc)) && +		retries < 10) +	{ +		retries++; +		last_crc = crc; +		pg_usleep(10000); +		goto retry; +	} +#endif + +	/* Make sure the control file is valid byte order. */ +	if (ControlFile->pg_control_version % 65536 == 0 && +		ControlFile->pg_control_version / 65536 != 0) +#ifndef FRONTEND +		elog(ERROR, _("byte ordering mismatch")); +#else +		pg_log_warning("possible byte ordering mismatch\n" +					   "The byte ordering used to store the pg_control file might not match the one\n" +					   "used by this program.  In that case the results below would be incorrect, and\n" +					   "the PostgreSQL installation would be incompatible with this data directory."); +#endif + +	return ControlFile; +} + +/* + * update_controlfile() + * + * Update controlfile values with the contents given by caller.  The + * contents to write are included in "ControlFile". "do_sync" can be + * optionally used to flush the updated control file.  Note that it is up + * to the caller to properly lock ControlFileLock when calling this + * routine in the backend. + */ +void +update_controlfile(const char *DataDir, +				   ControlFileData *ControlFile, bool do_sync) +{ +	int			fd; +	char		buffer[PG_CONTROL_FILE_SIZE]; +	char		ControlFilePath[MAXPGPATH]; + +	/* Update timestamp  */ +	ControlFile->time = (pg_time_t) time(NULL); + +	/* Recalculate CRC of control file */ +	INIT_CRC32C(ControlFile->crc); +	COMP_CRC32C(ControlFile->crc, +				(char *) ControlFile, +				offsetof(ControlFileData, crc)); +	FIN_CRC32C(ControlFile->crc); + +	/* +	 * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding +	 * the excess over sizeof(ControlFileData), to avoid premature EOF related +	 * errors when reading it. +	 */ +	memset(buffer, 0, PG_CONTROL_FILE_SIZE); +	memcpy(buffer, ControlFile, sizeof(ControlFileData)); + +	snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); + +#ifndef FRONTEND + +	/* +	 * All errors issue a PANIC, so no need to use OpenTransientFile() and to +	 * worry about file descriptor leaks. +	 */ +	if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0) +		ereport(PANIC, +				(errcode_for_file_access(), +				 errmsg("could not open file \"%s\": %m", +						ControlFilePath))); +#else +	if ((fd = open(ControlFilePath, O_WRONLY | PG_BINARY, +				   pg_file_create_mode)) == -1) +		pg_fatal("could not open file \"%s\": %m", ControlFilePath); +#endif + +	errno = 0; +#ifndef FRONTEND +	pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); +#endif +	if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) +	{ +		/* if write didn't set errno, assume problem is no disk space */ +		if (errno == 0) +			errno = ENOSPC; + +#ifndef FRONTEND +		ereport(PANIC, +				(errcode_for_file_access(), +				 errmsg("could not write file \"%s\": %m", +						ControlFilePath))); +#else +		pg_fatal("could not write file \"%s\": %m", ControlFilePath); +#endif +	} +#ifndef FRONTEND +	pgstat_report_wait_end(); +#endif + +	if (do_sync) +	{ +#ifndef FRONTEND +		pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE); +		if (pg_fsync(fd) != 0) +			ereport(PANIC, +					(errcode_for_file_access(), +					 errmsg("could not fsync file \"%s\": %m", +							ControlFilePath))); +		pgstat_report_wait_end(); +#else +		if (fsync(fd) != 0) +			pg_fatal("could not fsync file \"%s\": %m", ControlFilePath); +#endif +	} + +	if (close(fd) != 0) +	{ +#ifndef FRONTEND +		ereport(PANIC, +				(errcode_for_file_access(), +				 errmsg("could not close file \"%s\": %m", +						ControlFilePath))); +#else +		pg_fatal("could not close file \"%s\": %m", ControlFilePath); +#endif +	} +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/cryptohash_openssl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/cryptohash_openssl.c new file mode 100644 index 00000000000..a654cd4ad40 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/cryptohash_openssl.c @@ -0,0 +1,353 @@ +/*------------------------------------------------------------------------- + * + * cryptohash_openssl.c + *	  Set of wrapper routines on top of OpenSSL to support cryptographic + *	  hash functions. + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *		  src/common/cryptohash_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <openssl/err.h> +#include <openssl/evp.h> + +#include "common/cryptohash.h" +#include "common/md5.h" +#include "common/sha1.h" +#include "common/sha2.h" +#ifndef FRONTEND +#include "utils/memutils.h" +#include "utils/resowner.h" +#include "utils/resowner_private.h" +#endif + +/* + * In the backend, use an allocation in TopMemoryContext to count for + * resowner cleanup handling.  In the frontend, use malloc to be able + * to return a failure status back to the caller. + */ +#ifndef FRONTEND +#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size) +#define FREE(ptr) pfree(ptr) +#else +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif + +/* Set of error states */ +typedef enum pg_cryptohash_errno +{ +	PG_CRYPTOHASH_ERROR_NONE = 0, +	PG_CRYPTOHASH_ERROR_DEST_LEN, +	PG_CRYPTOHASH_ERROR_OPENSSL +} pg_cryptohash_errno; + +/* + * Internal pg_cryptohash_ctx structure. + * + * This tracks the resource owner associated to each EVP context data + * for the backend. + */ +struct pg_cryptohash_ctx +{ +	pg_cryptohash_type type; +	pg_cryptohash_errno error; +	const char *errreason; + +	EVP_MD_CTX *evpctx; + +#ifndef FRONTEND +	ResourceOwner resowner; +#endif +}; + +static const char * +SSLerrmessage(unsigned long ecode) +{ +	if (ecode == 0) +		return NULL; + +	/* +	 * This may return NULL, but we would fall back to a default error path if +	 * that were the case. +	 */ +	return ERR_reason_error_string(ecode); +} + +/* + * pg_cryptohash_create + * + * Allocate a hash context.  Returns NULL on failure for an OOM.  The + * backend issues an error, without returning. + */ +pg_cryptohash_ctx * +pg_cryptohash_create(pg_cryptohash_type type) +{ +	pg_cryptohash_ctx *ctx; + +	/* +	 * Make sure that the resource owner has space to remember this reference. +	 * This can error out with "out of memory", so do this before any other +	 * allocation to avoid leaking. +	 */ +#ifndef FRONTEND +	ResourceOwnerEnlargeCryptoHash(CurrentResourceOwner); +#endif + +	ctx = ALLOC(sizeof(pg_cryptohash_ctx)); +	if (ctx == NULL) +		return NULL; +	memset(ctx, 0, sizeof(pg_cryptohash_ctx)); +	ctx->type = type; +	ctx->error = PG_CRYPTOHASH_ERROR_NONE; +	ctx->errreason = NULL; + +	/* +	 * Initialization takes care of assigning the correct type for OpenSSL. +	 * Also ensure that there aren't any unconsumed errors in the queue from +	 * previous runs. +	 */ +	ERR_clear_error(); +	ctx->evpctx = EVP_MD_CTX_create(); + +	if (ctx->evpctx == NULL) +	{ +		explicit_bzero(ctx, sizeof(pg_cryptohash_ctx)); +		FREE(ctx); +#ifndef FRONTEND +		ereport(ERROR, +				(errcode(ERRCODE_OUT_OF_MEMORY), +				 errmsg("out of memory"))); +#else +		return NULL; +#endif +	} + +#ifndef FRONTEND +	ctx->resowner = CurrentResourceOwner; +	ResourceOwnerRememberCryptoHash(CurrentResourceOwner, +									PointerGetDatum(ctx)); +#endif + +	return ctx; +} + +/* + * pg_cryptohash_init + * + * Initialize a hash context.  Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_init(pg_cryptohash_ctx *ctx) +{ +	int			status = 0; + +	if (ctx == NULL) +		return -1; + +	switch (ctx->type) +	{ +		case PG_MD5: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_md5(), NULL); +			break; +		case PG_SHA1: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha1(), NULL); +			break; +		case PG_SHA224: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha224(), NULL); +			break; +		case PG_SHA256: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha256(), NULL); +			break; +		case PG_SHA384: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha384(), NULL); +			break; +		case PG_SHA512: +			status = EVP_DigestInit_ex(ctx->evpctx, EVP_sha512(), NULL); +			break; +	} + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; + +		/* +		 * The OpenSSL error queue should normally be empty since we've +		 * consumed an error, but cipher initialization can in FIPS-enabled +		 * OpenSSL builds generate two errors so clear the queue here as well. +		 */ +		ERR_clear_error(); +		return -1; +	} +	return 0; +} + +/* + * pg_cryptohash_update + * + * Update a hash context.  Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len) +{ +	int			status = 0; + +	if (ctx == NULL) +		return -1; + +	status = EVP_DigestUpdate(ctx->evpctx, data, len); + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; +		return -1; +	} +	return 0; +} + +/* + * pg_cryptohash_final + * + * Finalize a hash context.  Returns 0 on success, and -1 on failure. + */ +int +pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len) +{ +	int			status = 0; + +	if (ctx == NULL) +		return -1; + +	switch (ctx->type) +	{ +		case PG_MD5: +			if (len < MD5_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA1: +			if (len < SHA1_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA224: +			if (len < PG_SHA224_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA256: +			if (len < PG_SHA256_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA384: +			if (len < PG_SHA384_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA512: +			if (len < PG_SHA512_DIGEST_LENGTH) +			{ +				ctx->error = PG_CRYPTOHASH_ERROR_DEST_LEN; +				return -1; +			} +			break; +	} + +	status = EVP_DigestFinal_ex(ctx->evpctx, dest, 0); + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_CRYPTOHASH_ERROR_OPENSSL; +		return -1; +	} +	return 0; +} + +/* + * pg_cryptohash_free + * + * Free a hash context. + */ +void +pg_cryptohash_free(pg_cryptohash_ctx *ctx) +{ +	if (ctx == NULL) +		return; + +	EVP_MD_CTX_destroy(ctx->evpctx); + +#ifndef FRONTEND +	ResourceOwnerForgetCryptoHash(ctx->resowner, +								  PointerGetDatum(ctx)); +#endif + +	explicit_bzero(ctx, sizeof(pg_cryptohash_ctx)); +	FREE(ctx); +} + +/* + * pg_cryptohash_error + * + * Returns a static string providing details about an error that + * happened during a computation. + */ +const char * +pg_cryptohash_error(pg_cryptohash_ctx *ctx) +{ +	/* +	 * This implementation would never fail because of an out-of-memory error, +	 * except when creating the context. +	 */ +	if (ctx == NULL) +		return _("out of memory"); + +	/* +	 * If a reason is provided, rely on it, else fallback to any error code +	 * set. +	 */ +	if (ctx->errreason) +		return ctx->errreason; + +	switch (ctx->error) +	{ +		case PG_CRYPTOHASH_ERROR_NONE: +			return _("success"); +		case PG_CRYPTOHASH_ERROR_DEST_LEN: +			return _("destination buffer too small"); +		case PG_CRYPTOHASH_ERROR_OPENSSL: +			return _("OpenSSL failure"); +	} + +	Assert(false);				/* cannot be reached */ +	return _("success"); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s.c new file mode 100644 index 00000000000..614e98192a8 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s.c @@ -0,0 +1,1076 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/d2s.c + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + *     (See accompanying file LICENSE-Apache or copy at + *      http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + *     (See accompanying file LICENSE-Boost or copy at + *      https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +/* + *  Runtime compiler options: + * + *  -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower, + *      depending on your compiler. + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/shortest_dec.h" + +/* + * For consistency, we use 128-bit types if and only if the rest of PG also + * does, even though we could use them here without worrying about the + * alignment concerns that apply elsewhere. + */ +#if !defined(HAVE_INT128) && defined(_MSC_VER) \ +	&& !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64) +#define HAS_64_BIT_INTRINSICS +#endif + +#include "ryu_common.h" +#include "digit_table.h" +#include "d2s_full_table.h" +#include "d2s_intrinsics.h" + +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_BIAS 1023 + +#define DOUBLE_POW5_INV_BITCOUNT 122 +#define DOUBLE_POW5_BITCOUNT 121 + + +static inline uint32 +pow5Factor(uint64 value) +{ +	uint32		count = 0; + +	for (;;) +	{ +		Assert(value != 0); +		const uint64 q = div5(value); +		const uint32 r = (uint32) (value - 5 * q); + +		if (r != 0) +			break; + +		value = q; +		++count; +	} +	return count; +} + +/*  Returns true if value is divisible by 5^p. */ +static inline bool +multipleOfPowerOf5(const uint64 value, const uint32 p) +{ +	/* +	 * I tried a case distinction on p, but there was no performance +	 * difference. +	 */ +	return pow5Factor(value) >= p; +} + +/*  Returns true if value is divisible by 2^p. */ +static inline bool +multipleOfPowerOf2(const uint64 value, const uint32 p) +{ +	/* return __builtin_ctzll(value) >= p; */ +	return (value & ((UINT64CONST(1) << p) - 1)) == 0; +} + +/* + * We need a 64x128-bit multiplication and a subsequent 128-bit shift. + * + * Multiplication: + * + *    The 64-bit factor is variable and passed in, the 128-bit factor comes + *    from a lookup table. We know that the 64-bit factor only has 55 + *    significant bits (i.e., the 9 topmost bits are zeros). The 128-bit + *    factor only has 124 significant bits (i.e., the 4 topmost bits are + *    zeros). + * + * Shift: + * + *    In principle, the multiplication result requires 55 + 124 = 179 bits to + *    represent. However, we then shift this value to the right by j, which is + *    at least j >= 115, so the result is guaranteed to fit into 179 - 115 = + *    64 bits. This means that we only need the topmost 64 significant bits of + *    the 64x128-bit multiplication. + * + * There are several ways to do this: + * + *  1. Best case: the compiler exposes a 128-bit type. + *     We perform two 64x64-bit multiplications, add the higher 64 bits of the + *     lower result to the higher result, and shift by j - 64 bits. + * + *     We explicitly cast from 64-bit to 128-bit, so the compiler can tell + *     that these are only 64-bit inputs, and can map these to the best + *     possible sequence of assembly instructions. x86-64 machines happen to + *     have matching assembly instructions for 64x64-bit multiplications and + *     128-bit shifts. + * + *  2. Second best case: the compiler exposes intrinsics for the x86-64 + *     assembly instructions mentioned in 1. + * + *  3. We only have 64x64 bit instructions that return the lower 64 bits of + *     the result, i.e., we have to use plain C. + * + *     Our inputs are less than the full width, so we have three options: + *     a. Ignore this fact and just implement the intrinsics manually. + *     b. Split both into 31-bit pieces, which guarantees no internal + *        overflow, but requires extra work upfront (unless we change the + *        lookup table). + *     c. Split only the first factor into 31-bit pieces, which also + *        guarantees no internal overflow, but requires extra work since the + *        intermediate results are not perfectly aligned. + */ +#if defined(HAVE_INT128) + +/*  Best case: use 128-bit type. */ +static inline uint64 +mulShift(const uint64 m, const uint64 *const mul, const int32 j) +{ +	const uint128 b0 = ((uint128) m) * mul[0]; +	const uint128 b2 = ((uint128) m) * mul[1]; + +	return (uint64) (((b0 >> 64) + b2) >> (j - 64)); +} + +static inline uint64 +mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j, +			uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ +	*vp = mulShift(4 * m + 2, mul, j); +	*vm = mulShift(4 * m - 1 - mmShift, mul, j); +	return mulShift(4 * m, mul, j); +} + +#elif defined(HAS_64_BIT_INTRINSICS) + +static inline uint64 +mulShift(const uint64 m, const uint64 *const mul, const int32 j) +{ +	/* m is maximum 55 bits */ +	uint64		high1; + +	/* 128 */ +	const uint64 low1 = umul128(m, mul[1], &high1); + +	/* 64 */ +	uint64		high0; +	uint64		sum; + +	/* 64 */ +	umul128(m, mul[0], &high0); +	/* 0 */ +	sum = high0 + low1; + +	if (sum < high0) +	{ +		++high1; +		/* overflow into high1 */ +	} +	return shiftright128(sum, high1, j - 64); +} + +static inline uint64 +mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j, +			uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ +	*vp = mulShift(4 * m + 2, mul, j); +	*vm = mulShift(4 * m - 1 - mmShift, mul, j); +	return mulShift(4 * m, mul, j); +} + +#else							/* // !defined(HAVE_INT128) && +								 * !defined(HAS_64_BIT_INTRINSICS) */ + +static inline uint64 +mulShiftAll(uint64 m, const uint64 *const mul, const int32 j, +			uint64 *const vp, uint64 *const vm, const uint32 mmShift) +{ +	m <<= 1;					/* m is maximum 55 bits */ + +	uint64		tmp; +	const uint64 lo = umul128(m, mul[0], &tmp); +	uint64		hi; +	const uint64 mid = tmp + umul128(m, mul[1], &hi); + +	hi += mid < tmp;			/* overflow into hi */ + +	const uint64 lo2 = lo + mul[0]; +	const uint64 mid2 = mid + mul[1] + (lo2 < lo); +	const uint64 hi2 = hi + (mid2 < mid); + +	*vp = shiftright128(mid2, hi2, j - 64 - 1); + +	if (mmShift == 1) +	{ +		const uint64 lo3 = lo - mul[0]; +		const uint64 mid3 = mid - mul[1] - (lo3 > lo); +		const uint64 hi3 = hi - (mid3 > mid); + +		*vm = shiftright128(mid3, hi3, j - 64 - 1); +	} +	else +	{ +		const uint64 lo3 = lo + lo; +		const uint64 mid3 = mid + mid + (lo3 < lo); +		const uint64 hi3 = hi + hi + (mid3 < mid); +		const uint64 lo4 = lo3 - mul[0]; +		const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3); +		const uint64 hi4 = hi3 - (mid4 > mid3); + +		*vm = shiftright128(mid4, hi4, j - 64); +	} + +	return shiftright128(mid, hi, j - 64 - 1); +} + +#endif							/* // HAS_64_BIT_INTRINSICS */ + +static inline uint32 +decimalLength(const uint64 v) +{ +	/* This is slightly faster than a loop. */ +	/* The average output length is 16.38 digits, so we check high-to-low. */ +	/* Function precondition: v is not an 18, 19, or 20-digit number. */ +	/* (17 digits are sufficient for round-tripping.) */ +	Assert(v < 100000000000000000L); +	if (v >= 10000000000000000L) +	{ +		return 17; +	} +	if (v >= 1000000000000000L) +	{ +		return 16; +	} +	if (v >= 100000000000000L) +	{ +		return 15; +	} +	if (v >= 10000000000000L) +	{ +		return 14; +	} +	if (v >= 1000000000000L) +	{ +		return 13; +	} +	if (v >= 100000000000L) +	{ +		return 12; +	} +	if (v >= 10000000000L) +	{ +		return 11; +	} +	if (v >= 1000000000L) +	{ +		return 10; +	} +	if (v >= 100000000L) +	{ +		return 9; +	} +	if (v >= 10000000L) +	{ +		return 8; +	} +	if (v >= 1000000L) +	{ +		return 7; +	} +	if (v >= 100000L) +	{ +		return 6; +	} +	if (v >= 10000L) +	{ +		return 5; +	} +	if (v >= 1000L) +	{ +		return 4; +	} +	if (v >= 100L) +	{ +		return 3; +	} +	if (v >= 10L) +	{ +		return 2; +	} +	return 1; +} + +/*  A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_64 +{ +	uint64		mantissa; +	int32		exponent; +} floating_decimal_64; + +static inline floating_decimal_64 +d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent) +{ +	int32		e2; +	uint64		m2; + +	if (ieeeExponent == 0) +	{ +		/* We subtract 2 so that the bounds computation has 2 additional bits. */ +		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; +		m2 = ieeeMantissa; +	} +	else +	{ +		e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; +		m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa; +	} + +#if STRICTLY_SHORTEST +	const bool	even = (m2 & 1) == 0; +	const bool	acceptBounds = even; +#else +	const bool	acceptBounds = false; +#endif + +	/* Step 2: Determine the interval of legal decimal representations. */ +	const uint64 mv = 4 * m2; + +	/* Implicit bool -> int conversion. True is 1, false is 0. */ +	const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + +	/* We would compute mp and mm like this: */ +	/* uint64 mp = 4 * m2 + 2; */ +	/* uint64 mm = mv - 1 - mmShift; */ + +	/* Step 3: Convert to a decimal power base using 128-bit arithmetic. */ +	uint64		vr, +				vp, +				vm; +	int32		e10; +	bool		vmIsTrailingZeros = false; +	bool		vrIsTrailingZeros = false; + +	if (e2 >= 0) +	{ +		/* +		 * I tried special-casing q == 0, but there was no effect on +		 * performance. +		 * +		 * This expr is slightly faster than max(0, log10Pow2(e2) - 1). +		 */ +		const uint32 q = log10Pow2(e2) - (e2 > 3); +		const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1; +		const int32 i = -e2 + q + k; + +		e10 = q; + +		vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift); + +		if (q <= 21) +		{ +			/* +			 * This should use q <= 22, but I think 21 is also safe. Smaller +			 * values may still be safe, but it's more difficult to reason +			 * about them. +			 * +			 * Only one of mp, mv, and mm can be a multiple of 5, if any. +			 */ +			const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv)); + +			if (mvMod5 == 0) +			{ +				vrIsTrailingZeros = multipleOfPowerOf5(mv, q); +			} +			else if (acceptBounds) +			{ +				/*---- +				 * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q +				 * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q +				 * <=> true && pow5Factor(mm) >= q, since e2 >= q. +				 *---- +				 */ +				vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q); +			} +			else +			{ +				/* Same as min(e2 + 1, pow5Factor(mp)) >= q. */ +				vp -= multipleOfPowerOf5(mv + 2, q); +			} +		} +	} +	else +	{ +		/* +		 * This expression is slightly faster than max(0, log10Pow5(-e2) - 1). +		 */ +		const uint32 q = log10Pow5(-e2) - (-e2 > 1); +		const int32 i = -e2 - q; +		const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; +		const int32 j = q - k; + +		e10 = q + e2; + +		vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift); + +		if (q <= 1) +		{ +			/* +			 * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q +			 * trailing 0 bits. +			 */ +			/* mv = 4 * m2, so it always has at least two trailing 0 bits. */ +			vrIsTrailingZeros = true; +			if (acceptBounds) +			{ +				/* +				 * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff +				 * mmShift == 1. +				 */ +				vmIsTrailingZeros = mmShift == 1; +			} +			else +			{ +				/* +				 * mp = mv + 2, so it always has at least one trailing 0 bit. +				 */ +				--vp; +			} +		} +		else if (q < 63) +		{ +			/* TODO(ulfjack):Use a tighter bound here. */ +			/* +			 * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1 +			 */ +			/* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */ +			/* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */ +			/* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */ + +			/* +			 * We also need to make sure that the left shift does not +			 * overflow. +			 */ +			vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); +		} +	} + +	/* +	 * Step 4: Find the shortest decimal representation in the interval of +	 * legal representations. +	 */ +	uint32		removed = 0; +	uint8		lastRemovedDigit = 0; +	uint64		output; + +	/* On average, we remove ~2 digits. */ +	if (vmIsTrailingZeros || vrIsTrailingZeros) +	{ +		/* General case, which happens rarely (~0.7%). */ +		for (;;) +		{ +			const uint64 vpDiv10 = div10(vp); +			const uint64 vmDiv10 = div10(vm); + +			if (vpDiv10 <= vmDiv10) +				break; + +			const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10); +			const uint64 vrDiv10 = div10(vr); +			const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + +			vmIsTrailingZeros &= vmMod10 == 0; +			vrIsTrailingZeros &= lastRemovedDigit == 0; +			lastRemovedDigit = (uint8) vrMod10; +			vr = vrDiv10; +			vp = vpDiv10; +			vm = vmDiv10; +			++removed; +		} + +		if (vmIsTrailingZeros) +		{ +			for (;;) +			{ +				const uint64 vmDiv10 = div10(vm); +				const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10); + +				if (vmMod10 != 0) +					break; + +				const uint64 vpDiv10 = div10(vp); +				const uint64 vrDiv10 = div10(vr); +				const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + +				vrIsTrailingZeros &= lastRemovedDigit == 0; +				lastRemovedDigit = (uint8) vrMod10; +				vr = vrDiv10; +				vp = vpDiv10; +				vm = vmDiv10; +				++removed; +			} +		} + +		if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) +		{ +			/* Round even if the exact number is .....50..0. */ +			lastRemovedDigit = 4; +		} + +		/* +		 * We need to take vr + 1 if vr is outside bounds or we need to round +		 * up. +		 */ +		output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); +	} +	else +	{ +		/* +		 * Specialized for the common case (~99.3%). Percentages below are +		 * relative to this. +		 */ +		bool		roundUp = false; +		const uint64 vpDiv100 = div100(vp); +		const uint64 vmDiv100 = div100(vm); + +		if (vpDiv100 > vmDiv100) +		{ +			/* Optimization:remove two digits at a time(~86.2 %). */ +			const uint64 vrDiv100 = div100(vr); +			const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100); + +			roundUp = vrMod100 >= 50; +			vr = vrDiv100; +			vp = vpDiv100; +			vm = vmDiv100; +			removed += 2; +		} + +		/*---- +		 * Loop iterations below (approximately), without optimization +		 * above: +		 * +		 * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, +		 * 6+: 0.02% +		 * +		 * Loop iterations below (approximately), with optimization +		 * above: +		 * +		 * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% +		 *---- +		 */ +		for (;;) +		{ +			const uint64 vpDiv10 = div10(vp); +			const uint64 vmDiv10 = div10(vm); + +			if (vpDiv10 <= vmDiv10) +				break; + +			const uint64 vrDiv10 = div10(vr); +			const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10); + +			roundUp = vrMod10 >= 5; +			vr = vrDiv10; +			vp = vpDiv10; +			vm = vmDiv10; +			++removed; +		} + +		/* +		 * We need to take vr + 1 if vr is outside bounds or we need to round +		 * up. +		 */ +		output = vr + (vr == vm || roundUp); +	} + +	const int32 exp = e10 + removed; + +	floating_decimal_64 fd; + +	fd.exponent = exp; +	fd.mantissa = output; +	return fd; +} + +static inline int +to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result) +{ +	/* Step 5: Print the decimal representation. */ +	int			index = 0; + +	uint64		output = v.mantissa; +	int32		exp = v.exponent; + +	/*---- +	 * On entry, mantissa * 10^exp is the result to be output. +	 * Caller has already done the - sign if needed. +	 * +	 * We want to insert the point somewhere depending on the output length +	 * and exponent, which might mean adding zeros: +	 * +	 *            exp  | format +	 *            1+   |  ddddddddd000000 +	 *            0    |  ddddddddd +	 *  -1 .. -len+1   |  dddddddd.d to d.ddddddddd +	 *  -len ...       |  0.ddddddddd to 0.000dddddd +	 */ +	uint32		i = 0; +	int32		nexp = exp + olength; + +	if (nexp <= 0) +	{ +		/* -nexp is number of 0s to add after '.' */ +		Assert(nexp >= -3); +		/* 0.000ddddd */ +		index = 2 - nexp; +		/* won't need more than this many 0s */ +		memcpy(result, "0.000000", 8); +	} +	else if (exp < 0) +	{ +		/* +		 * dddd.dddd; leave space at the start and move the '.' in after +		 */ +		index = 1; +	} +	else +	{ +		/* +		 * We can save some code later by pre-filling with zeros. We know that +		 * there can be no more than 16 output digits in this form, otherwise +		 * we would not choose fixed-point output. +		 */ +		Assert(exp < 16 && exp + olength <= 16); +		memset(result, '0', 16); +	} + +	/* +	 * We prefer 32-bit operations, even on 64-bit platforms. We have at most +	 * 17 digits, and uint32 can store 9 digits. If output doesn't fit into +	 * uint32, we cut off 8 digits, so the rest will fit into uint32. +	 */ +	if ((output >> 32) != 0) +	{ +		/* Expensive 64-bit division. */ +		const uint64 q = div1e8(output); +		uint32		output2 = (uint32) (output - 100000000 * q); +		const uint32 c = output2 % 10000; + +		output = q; +		output2 /= 10000; + +		const uint32 d = output2 % 10000; +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; +		const uint32 d0 = (d % 100) << 1; +		const uint32 d1 = (d / 100) << 1; + +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); +		memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2); +		memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2); +		i += 8; +	} + +	uint32		output2 = (uint32) output; + +	while (output2 >= 10000) +	{ +		const uint32 c = output2 - 10000 * (output2 / 10000); +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; + +		output2 /= 10000; +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); +		i += 4; +	} +	if (output2 >= 100) +	{ +		const uint32 c = (output2 % 100) << 1; + +		output2 /= 100; +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); +		i += 2; +	} +	if (output2 >= 10) +	{ +		const uint32 c = output2 << 1; + +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); +	} +	else +	{ +		result[index] = (char) ('0' + output2); +	} + +	if (index == 1) +	{ +		/* +		 * nexp is 1..15 here, representing the number of digits before the +		 * point. A value of 16 is not possible because we switch to +		 * scientific notation when the display exponent reaches 15. +		 */ +		Assert(nexp < 16); +		/* gcc only seems to want to optimize memmove for small 2^n */ +		if (nexp & 8) +		{ +			memmove(result + index - 1, result + index, 8); +			index += 8; +		} +		if (nexp & 4) +		{ +			memmove(result + index - 1, result + index, 4); +			index += 4; +		} +		if (nexp & 2) +		{ +			memmove(result + index - 1, result + index, 2); +			index += 2; +		} +		if (nexp & 1) +		{ +			result[index - 1] = result[index]; +		} +		result[nexp] = '.'; +		index = olength + 1; +	} +	else if (exp >= 0) +	{ +		/* we supplied the trailing zeros earlier, now just set the length. */ +		index = olength + exp; +	} +	else +	{ +		index = olength + (2 - nexp); +	} + +	return index; +} + +static inline int +to_chars(floating_decimal_64 v, const bool sign, char *const result) +{ +	/* Step 5: Print the decimal representation. */ +	int			index = 0; + +	uint64		output = v.mantissa; +	uint32		olength = decimalLength(output); +	int32		exp = v.exponent + olength - 1; + +	if (sign) +	{ +		result[index++] = '-'; +	} + +	/* +	 * The thresholds for fixed-point output are chosen to match printf +	 * defaults. Beware that both the code of to_chars_df and the value of +	 * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. +	 */ +	if (exp >= -4 && exp < 15) +		return to_chars_df(v, olength, result + index) + sign; + +	/* +	 * If v.exponent is exactly 0, we might have reached here via the small +	 * integer fast path, in which case v.mantissa might contain trailing +	 * (decimal) zeros. For scientific notation we need to move these zeros +	 * into the exponent. (For fixed point this doesn't matter, which is why +	 * we do this here rather than above.) +	 * +	 * Since we already calculated the display exponent (exp) above based on +	 * the old decimal length, that value does not change here. Instead, we +	 * just reduce the display length for each digit removed. +	 * +	 * If we didn't get here via the fast path, the raw exponent will not +	 * usually be 0, and there will be no trailing zeros, so we pay no more +	 * than one div10/multiply extra cost. We claw back half of that by +	 * checking for divisibility by 2 before dividing by 10. +	 */ +	if (v.exponent == 0) +	{ +		while ((output & 1) == 0) +		{ +			const uint64 q = div10(output); +			const uint32 r = (uint32) (output - 10 * q); + +			if (r != 0) +				break; +			output = q; +			--olength; +		} +	} + +	/*---- +	 * Print the decimal digits. +	 * +	 * The following code is equivalent to: +	 * +	 * for (uint32 i = 0; i < olength - 1; ++i) { +	 *   const uint32 c = output % 10; output /= 10; +	 *   result[index + olength - i] = (char) ('0' + c); +	 * } +	 * result[index] = '0' + output % 10; +	 *---- +	 */ + +	uint32		i = 0; + +	/* +	 * We prefer 32-bit operations, even on 64-bit platforms. We have at most +	 * 17 digits, and uint32 can store 9 digits. If output doesn't fit into +	 * uint32, we cut off 8 digits, so the rest will fit into uint32. +	 */ +	if ((output >> 32) != 0) +	{ +		/* Expensive 64-bit division. */ +		const uint64 q = div1e8(output); +		uint32		output2 = (uint32) (output - 100000000 * q); + +		output = q; + +		const uint32 c = output2 % 10000; + +		output2 /= 10000; + +		const uint32 d = output2 % 10000; +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; +		const uint32 d0 = (d % 100) << 1; +		const uint32 d1 = (d / 100) << 1; + +		memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); +		memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2); +		memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2); +		i += 8; +	} + +	uint32		output2 = (uint32) output; + +	while (output2 >= 10000) +	{ +		const uint32 c = output2 - 10000 * (output2 / 10000); + +		output2 /= 10000; + +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; + +		memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); +		i += 4; +	} +	if (output2 >= 100) +	{ +		const uint32 c = (output2 % 100) << 1; + +		output2 /= 100; +		memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2); +		i += 2; +	} +	if (output2 >= 10) +	{ +		const uint32 c = output2 << 1; + +		/* +		 * We can't use memcpy here: the decimal dot goes between these two +		 * digits. +		 */ +		result[index + olength - i] = DIGIT_TABLE[c + 1]; +		result[index] = DIGIT_TABLE[c]; +	} +	else +	{ +		result[index] = (char) ('0' + output2); +	} + +	/* Print decimal point if needed. */ +	if (olength > 1) +	{ +		result[index + 1] = '.'; +		index += olength + 1; +	} +	else +	{ +		++index; +	} + +	/* Print the exponent. */ +	result[index++] = 'e'; +	if (exp < 0) +	{ +		result[index++] = '-'; +		exp = -exp; +	} +	else +		result[index++] = '+'; + +	if (exp >= 100) +	{ +		const int32 c = exp % 10; + +		memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2); +		result[index + 2] = (char) ('0' + c); +		index += 3; +	} +	else +	{ +		memcpy(result + index, DIGIT_TABLE + 2 * exp, 2); +		index += 2; +	} + +	return index; +} + +static inline bool +d2d_small_int(const uint64 ieeeMantissa, +			  const uint32 ieeeExponent, +			  floating_decimal_64 *v) +{ +	const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; + +	/* +	 * Avoid using multiple "return false;" here since it tends to provoke the +	 * compiler into inlining multiple copies of d2d, which is undesirable. +	 */ + +	if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0) +	{ +		/*---- +		 * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: +		 *   1 <= f = m2 / 2^-e2 < 2^53. +		 * +		 * Test if the lower -e2 bits of the significand are 0, i.e. whether +		 * the fraction is 0. We can use ieeeMantissa here, since the implied +		 * 1 bit can never be tested by this; the implied 1 can only be part +		 * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already +		 * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53) +		 */ +		const uint64 mask = (UINT64CONST(1) << -e2) - 1; +		const uint64 fraction = ieeeMantissa & mask; + +		if (fraction == 0) +		{ +			/*---- +			 * f is an integer in the range [1, 2^53). +			 * Note: mantissa might contain trailing (decimal) 0's. +			 * Note: since 2^53 < 10^16, there is no need to adjust +			 * decimalLength(). +			 */ +			const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa; + +			v->mantissa = m2 >> -e2; +			v->exponent = 0; +			return true; +		} +	} + +	return false; +} + +/* + * Store the shortest decimal representation of the given double as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int +double_to_shortest_decimal_bufn(double f, char *result) +{ +	/* +	 * Step 1: Decode the floating-point number, and unify normalized and +	 * subnormal cases. +	 */ +	const uint64 bits = double_to_bits(f); + +	/* Decode bits into sign, mantissa, and exponent. */ +	const bool	ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; +	const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1); +	const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); + +	/* Case distinction; exit early for the easy cases. */ +	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) +	{ +		return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0)); +	} + +	floating_decimal_64 v; +	const bool	isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v); + +	if (!isSmallInt) +	{ +		v = d2d(ieeeMantissa, ieeeExponent); +	} + +	return to_chars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given double as a + * null-terminated string in the caller's supplied buffer (which must be at + * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int +double_to_shortest_decimal_buf(double f, char *result) +{ +	const int	index = double_to_shortest_decimal_bufn(f, result); + +	/* Terminate the string. */ +	Assert(index < DOUBLE_SHORTEST_DECIMAL_LEN); +	result[index] = '\0'; +	return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char * +double_to_shortest_decimal(double f) +{ +	char	   *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN); + +	double_to_shortest_decimal_buf(f, result); +	return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_full_table.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_full_table.h new file mode 100644 index 00000000000..23f5e9a45e2 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_full_table.h @@ -0,0 +1,358 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/d2s_full_table.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + *     (See accompanying file LICENSE-Apache or copy at + *      http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + *     (See accompanying file LICENSE-Boost or copy at + *      https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +#ifndef RYU_D2S_FULL_TABLE_H +#define RYU_D2S_FULL_TABLE_H + +/* + * These tables are generated (by the upstream) using PrintDoubleLookupTable + * from the upstream sources at github.com/ulfjack/ryu, and then modified (by + * us) by adding UINT64CONST. + */ +static const uint64 DOUBLE_POW5_INV_SPLIT[292][2] = { +	{UINT64CONST(1), UINT64CONST(288230376151711744)}, {UINT64CONST(3689348814741910324), UINT64CONST(230584300921369395)}, +	{UINT64CONST(2951479051793528259), UINT64CONST(184467440737095516)}, {UINT64CONST(17118578500402463900), UINT64CONST(147573952589676412)}, +	{UINT64CONST(12632330341676300947), UINT64CONST(236118324143482260)}, {UINT64CONST(10105864273341040758), UINT64CONST(188894659314785808)}, +	{UINT64CONST(15463389048156653253), UINT64CONST(151115727451828646)}, {UINT64CONST(17362724847566824558), UINT64CONST(241785163922925834)}, +	{UINT64CONST(17579528692795369969), UINT64CONST(193428131138340667)}, {UINT64CONST(6684925324752475329), UINT64CONST(154742504910672534)}, +	{UINT64CONST(18074578149087781173), UINT64CONST(247588007857076054)}, {UINT64CONST(18149011334012135262), UINT64CONST(198070406285660843)}, +	{UINT64CONST(3451162622983977240), UINT64CONST(158456325028528675)}, {UINT64CONST(5521860196774363583), UINT64CONST(253530120045645880)}, +	{UINT64CONST(4417488157419490867), UINT64CONST(202824096036516704)}, {UINT64CONST(7223339340677503017), UINT64CONST(162259276829213363)}, +	{UINT64CONST(7867994130342094503), UINT64CONST(259614842926741381)}, {UINT64CONST(2605046489531765280), UINT64CONST(207691874341393105)}, +	{UINT64CONST(2084037191625412224), UINT64CONST(166153499473114484)}, {UINT64CONST(10713157136084480204), UINT64CONST(265845599156983174)}, +	{UINT64CONST(12259874523609494487), UINT64CONST(212676479325586539)}, {UINT64CONST(13497248433629505913), UINT64CONST(170141183460469231)}, +	{UINT64CONST(14216899864323388813), UINT64CONST(272225893536750770)}, {UINT64CONST(11373519891458711051), UINT64CONST(217780714829400616)}, +	{UINT64CONST(5409467098425058518), UINT64CONST(174224571863520493)}, {UINT64CONST(4965798542738183305), UINT64CONST(278759314981632789)}, +	{UINT64CONST(7661987648932456967), UINT64CONST(223007451985306231)}, {UINT64CONST(2440241304404055250), UINT64CONST(178405961588244985)}, +	{UINT64CONST(3904386087046488400), UINT64CONST(285449538541191976)}, {UINT64CONST(17880904128604832013), UINT64CONST(228359630832953580)}, +	{UINT64CONST(14304723302883865611), UINT64CONST(182687704666362864)}, {UINT64CONST(15133127457049002812), UINT64CONST(146150163733090291)}, +	{UINT64CONST(16834306301794583852), UINT64CONST(233840261972944466)}, {UINT64CONST(9778096226693756759), UINT64CONST(187072209578355573)}, +	{UINT64CONST(15201174610838826053), UINT64CONST(149657767662684458)}, {UINT64CONST(2185786488890659746), UINT64CONST(239452428260295134)}, +	{UINT64CONST(5437978005854438120), UINT64CONST(191561942608236107)}, {UINT64CONST(15418428848909281466), UINT64CONST(153249554086588885)}, +	{UINT64CONST(6222742084545298729), UINT64CONST(245199286538542217)}, {UINT64CONST(16046240111861969953), UINT64CONST(196159429230833773)}, +	{UINT64CONST(1768945645263844993), UINT64CONST(156927543384667019)}, {UINT64CONST(10209010661905972635), UINT64CONST(251084069415467230)}, +	{UINT64CONST(8167208529524778108), UINT64CONST(200867255532373784)}, {UINT64CONST(10223115638361732810), UINT64CONST(160693804425899027)}, +	{UINT64CONST(1599589762411131202), UINT64CONST(257110087081438444)}, {UINT64CONST(4969020624670815285), UINT64CONST(205688069665150755)}, +	{UINT64CONST(3975216499736652228), UINT64CONST(164550455732120604)}, {UINT64CONST(13739044029062464211), UINT64CONST(263280729171392966)}, +	{UINT64CONST(7301886408508061046), UINT64CONST(210624583337114373)}, {UINT64CONST(13220206756290269483), UINT64CONST(168499666669691498)}, +	{UINT64CONST(17462981995322520850), UINT64CONST(269599466671506397)}, {UINT64CONST(6591687966774196033), UINT64CONST(215679573337205118)}, +	{UINT64CONST(12652048002903177473), UINT64CONST(172543658669764094)}, {UINT64CONST(9175230360419352987), UINT64CONST(276069853871622551)}, +	{UINT64CONST(3650835473593572067), UINT64CONST(220855883097298041)}, {UINT64CONST(17678063637842498946), UINT64CONST(176684706477838432)}, +	{UINT64CONST(13527506561580357021), UINT64CONST(282695530364541492)}, {UINT64CONST(3443307619780464970), UINT64CONST(226156424291633194)}, +	{UINT64CONST(6443994910566282300), UINT64CONST(180925139433306555)}, {UINT64CONST(5155195928453025840), UINT64CONST(144740111546645244)}, +	{UINT64CONST(15627011115008661990), UINT64CONST(231584178474632390)}, {UINT64CONST(12501608892006929592), UINT64CONST(185267342779705912)}, +	{UINT64CONST(2622589484121723027), UINT64CONST(148213874223764730)}, {UINT64CONST(4196143174594756843), UINT64CONST(237142198758023568)}, +	{UINT64CONST(10735612169159626121), UINT64CONST(189713759006418854)}, {UINT64CONST(12277838550069611220), UINT64CONST(151771007205135083)}, +	{UINT64CONST(15955192865369467629), UINT64CONST(242833611528216133)}, {UINT64CONST(1696107848069843133), UINT64CONST(194266889222572907)}, +	{UINT64CONST(12424932722681605476), UINT64CONST(155413511378058325)}, {UINT64CONST(1433148282581017146), UINT64CONST(248661618204893321)}, +	{UINT64CONST(15903913885032455010), UINT64CONST(198929294563914656)}, {UINT64CONST(9033782293284053685), UINT64CONST(159143435651131725)}, +	{UINT64CONST(14454051669254485895), UINT64CONST(254629497041810760)}, {UINT64CONST(11563241335403588716), UINT64CONST(203703597633448608)}, +	{UINT64CONST(16629290697806691620), UINT64CONST(162962878106758886)}, {UINT64CONST(781423413297334329), UINT64CONST(260740604970814219)}, +	{UINT64CONST(4314487545379777786), UINT64CONST(208592483976651375)}, {UINT64CONST(3451590036303822229), UINT64CONST(166873987181321100)}, +	{UINT64CONST(5522544058086115566), UINT64CONST(266998379490113760)}, {UINT64CONST(4418035246468892453), UINT64CONST(213598703592091008)}, +	{UINT64CONST(10913125826658934609), UINT64CONST(170878962873672806)}, {UINT64CONST(10082303693170474728), UINT64CONST(273406340597876490)}, +	{UINT64CONST(8065842954536379782), UINT64CONST(218725072478301192)}, {UINT64CONST(17520720807854834795), UINT64CONST(174980057982640953)}, +	{UINT64CONST(5897060404116273733), UINT64CONST(279968092772225526)}, {UINT64CONST(1028299508551108663), UINT64CONST(223974474217780421)}, +	{UINT64CONST(15580034865808528224), UINT64CONST(179179579374224336)}, {UINT64CONST(17549358155809824511), UINT64CONST(286687326998758938)}, +	{UINT64CONST(2971440080422128639), UINT64CONST(229349861599007151)}, {UINT64CONST(17134547323305344204), UINT64CONST(183479889279205720)}, +	{UINT64CONST(13707637858644275364), UINT64CONST(146783911423364576)}, {UINT64CONST(14553522944347019935), UINT64CONST(234854258277383322)}, +	{UINT64CONST(4264120725993795302), UINT64CONST(187883406621906658)}, {UINT64CONST(10789994210278856888), UINT64CONST(150306725297525326)}, +	{UINT64CONST(9885293106962350374), UINT64CONST(240490760476040522)}, {UINT64CONST(529536856086059653), UINT64CONST(192392608380832418)}, +	{UINT64CONST(7802327114352668369), UINT64CONST(153914086704665934)}, {UINT64CONST(1415676938738538420), UINT64CONST(246262538727465495)}, +	{UINT64CONST(1132541550990830736), UINT64CONST(197010030981972396)}, {UINT64CONST(15663428499760305882), UINT64CONST(157608024785577916)}, +	{UINT64CONST(17682787970132668764), UINT64CONST(252172839656924666)}, {UINT64CONST(10456881561364224688), UINT64CONST(201738271725539733)}, +	{UINT64CONST(15744202878575200397), UINT64CONST(161390617380431786)}, {UINT64CONST(17812026976236499989), UINT64CONST(258224987808690858)}, +	{UINT64CONST(3181575136763469022), UINT64CONST(206579990246952687)}, {UINT64CONST(13613306553636506187), UINT64CONST(165263992197562149)}, +	{UINT64CONST(10713244041592678929), UINT64CONST(264422387516099439)}, {UINT64CONST(12259944048016053467), UINT64CONST(211537910012879551)}, +	{UINT64CONST(6118606423670932450), UINT64CONST(169230328010303641)}, {UINT64CONST(2411072648389671274), UINT64CONST(270768524816485826)}, +	{UINT64CONST(16686253377679378312), UINT64CONST(216614819853188660)}, {UINT64CONST(13349002702143502650), UINT64CONST(173291855882550928)}, +	{UINT64CONST(17669055508687693916), UINT64CONST(277266969412081485)}, {UINT64CONST(14135244406950155133), UINT64CONST(221813575529665188)}, +	{UINT64CONST(240149081334393137), UINT64CONST(177450860423732151)}, {UINT64CONST(11452284974360759988), UINT64CONST(283921376677971441)}, +	{UINT64CONST(5472479164746697667), UINT64CONST(227137101342377153)}, {UINT64CONST(11756680961281178780), UINT64CONST(181709681073901722)}, +	{UINT64CONST(2026647139541122378), UINT64CONST(145367744859121378)}, {UINT64CONST(18000030682233437097), UINT64CONST(232588391774594204)}, +	{UINT64CONST(18089373360528660001), UINT64CONST(186070713419675363)}, {UINT64CONST(3403452244197197031), UINT64CONST(148856570735740291)}, +	{UINT64CONST(16513570034941246220), UINT64CONST(238170513177184465)}, {UINT64CONST(13210856027952996976), UINT64CONST(190536410541747572)}, +	{UINT64CONST(3189987192878576934), UINT64CONST(152429128433398058)}, {UINT64CONST(1414630693863812771), UINT64CONST(243886605493436893)}, +	{UINT64CONST(8510402184574870864), UINT64CONST(195109284394749514)}, {UINT64CONST(10497670562401807014), UINT64CONST(156087427515799611)}, +	{UINT64CONST(9417575270359070576), UINT64CONST(249739884025279378)}, {UINT64CONST(14912757845771077107), UINT64CONST(199791907220223502)}, +	{UINT64CONST(4551508647133041040), UINT64CONST(159833525776178802)}, {UINT64CONST(10971762650154775986), UINT64CONST(255733641241886083)}, +	{UINT64CONST(16156107749607641435), UINT64CONST(204586912993508866)}, {UINT64CONST(9235537384944202825), UINT64CONST(163669530394807093)}, +	{UINT64CONST(11087511001168814197), UINT64CONST(261871248631691349)}, {UINT64CONST(12559357615676961681), UINT64CONST(209496998905353079)}, +	{UINT64CONST(13736834907283479668), UINT64CONST(167597599124282463)}, {UINT64CONST(18289587036911657145), UINT64CONST(268156158598851941)}, +	{UINT64CONST(10942320814787415393), UINT64CONST(214524926879081553)}, {UINT64CONST(16132554281313752961), UINT64CONST(171619941503265242)}, +	{UINT64CONST(11054691591134363444), UINT64CONST(274591906405224388)}, {UINT64CONST(16222450902391311402), UINT64CONST(219673525124179510)}, +	{UINT64CONST(12977960721913049122), UINT64CONST(175738820099343608)}, {UINT64CONST(17075388340318968271), UINT64CONST(281182112158949773)}, +	{UINT64CONST(2592264228029443648), UINT64CONST(224945689727159819)}, {UINT64CONST(5763160197165465241), UINT64CONST(179956551781727855)}, +	{UINT64CONST(9221056315464744386), UINT64CONST(287930482850764568)}, {UINT64CONST(14755542681855616155), UINT64CONST(230344386280611654)}, +	{UINT64CONST(15493782960226403247), UINT64CONST(184275509024489323)}, {UINT64CONST(1326979923955391628), UINT64CONST(147420407219591459)}, +	{UINT64CONST(9501865507812447252), UINT64CONST(235872651551346334)}, {UINT64CONST(11290841220991868125), UINT64CONST(188698121241077067)}, +	{UINT64CONST(1653975347309673853), UINT64CONST(150958496992861654)}, {UINT64CONST(10025058185179298811), UINT64CONST(241533595188578646)}, +	{UINT64CONST(4330697733401528726), UINT64CONST(193226876150862917)}, {UINT64CONST(14532604630946953951), UINT64CONST(154581500920690333)}, +	{UINT64CONST(1116074521063664381), UINT64CONST(247330401473104534)}, {UINT64CONST(4582208431592841828), UINT64CONST(197864321178483627)}, +	{UINT64CONST(14733813189500004432), UINT64CONST(158291456942786901)}, {UINT64CONST(16195403473716186445), UINT64CONST(253266331108459042)}, +	{UINT64CONST(5577625149489128510), UINT64CONST(202613064886767234)}, {UINT64CONST(8151448934333213131), UINT64CONST(162090451909413787)}, +	{UINT64CONST(16731667109675051333), UINT64CONST(259344723055062059)}, {UINT64CONST(17074682502481951390), UINT64CONST(207475778444049647)}, +	{UINT64CONST(6281048372501740465), UINT64CONST(165980622755239718)}, {UINT64CONST(6360328581260874421), UINT64CONST(265568996408383549)}, +	{UINT64CONST(8777611679750609860), UINT64CONST(212455197126706839)}, {UINT64CONST(10711438158542398211), UINT64CONST(169964157701365471)}, +	{UINT64CONST(9759603424184016492), UINT64CONST(271942652322184754)}, {UINT64CONST(11497031554089123517), UINT64CONST(217554121857747803)}, +	{UINT64CONST(16576322872755119460), UINT64CONST(174043297486198242)}, {UINT64CONST(11764721337440549842), UINT64CONST(278469275977917188)}, +	{UINT64CONST(16790474699436260520), UINT64CONST(222775420782333750)}, {UINT64CONST(13432379759549008416), UINT64CONST(178220336625867000)}, +	{UINT64CONST(3045063541568861850), UINT64CONST(285152538601387201)}, {UINT64CONST(17193446092222730773), UINT64CONST(228122030881109760)}, +	{UINT64CONST(13754756873778184618), UINT64CONST(182497624704887808)}, {UINT64CONST(18382503128506368341), UINT64CONST(145998099763910246)}, +	{UINT64CONST(3586563302416817083), UINT64CONST(233596959622256395)}, {UINT64CONST(2869250641933453667), UINT64CONST(186877567697805116)}, +	{UINT64CONST(17052795772514404226), UINT64CONST(149502054158244092)}, {UINT64CONST(12527077977055405469), UINT64CONST(239203286653190548)}, +	{UINT64CONST(17400360011128145022), UINT64CONST(191362629322552438)}, {UINT64CONST(2852241564676785048), UINT64CONST(153090103458041951)}, +	{UINT64CONST(15631632947708587046), UINT64CONST(244944165532867121)}, {UINT64CONST(8815957543424959314), UINT64CONST(195955332426293697)}, +	{UINT64CONST(18120812478965698421), UINT64CONST(156764265941034957)}, {UINT64CONST(14235904707377476180), UINT64CONST(250822825505655932)}, +	{UINT64CONST(4010026136418160298), UINT64CONST(200658260404524746)}, {UINT64CONST(17965416168102169531), UINT64CONST(160526608323619796)}, +	{UINT64CONST(2919224165770098987), UINT64CONST(256842573317791675)}, {UINT64CONST(2335379332616079190), UINT64CONST(205474058654233340)}, +	{UINT64CONST(1868303466092863352), UINT64CONST(164379246923386672)}, {UINT64CONST(6678634360490491686), UINT64CONST(263006795077418675)}, +	{UINT64CONST(5342907488392393349), UINT64CONST(210405436061934940)}, {UINT64CONST(4274325990713914679), UINT64CONST(168324348849547952)}, +	{UINT64CONST(10528270399884173809), UINT64CONST(269318958159276723)}, {UINT64CONST(15801313949391159694), UINT64CONST(215455166527421378)}, +	{UINT64CONST(1573004715287196786), UINT64CONST(172364133221937103)}, {UINT64CONST(17274202803427156150), UINT64CONST(275782613155099364)}, +	{UINT64CONST(17508711057483635243), UINT64CONST(220626090524079491)}, {UINT64CONST(10317620031244997871), UINT64CONST(176500872419263593)}, +	{UINT64CONST(12818843235250086271), UINT64CONST(282401395870821749)}, {UINT64CONST(13944423402941979340), UINT64CONST(225921116696657399)}, +	{UINT64CONST(14844887537095493795), UINT64CONST(180736893357325919)}, {UINT64CONST(15565258844418305359), UINT64CONST(144589514685860735)}, +	{UINT64CONST(6457670077359736959), UINT64CONST(231343223497377177)}, {UINT64CONST(16234182506113520537), UINT64CONST(185074578797901741)}, +	{UINT64CONST(9297997190148906106), UINT64CONST(148059663038321393)}, {UINT64CONST(11187446689496339446), UINT64CONST(236895460861314229)}, +	{UINT64CONST(12639306166338981880), UINT64CONST(189516368689051383)}, {UINT64CONST(17490142562555006151), UINT64CONST(151613094951241106)}, +	{UINT64CONST(2158786396894637579), UINT64CONST(242580951921985771)}, {UINT64CONST(16484424376483351356), UINT64CONST(194064761537588616)}, +	{UINT64CONST(9498190686444770762), UINT64CONST(155251809230070893)}, {UINT64CONST(11507756283569722895), UINT64CONST(248402894768113429)}, +	{UINT64CONST(12895553841597688639), UINT64CONST(198722315814490743)}, {UINT64CONST(17695140702761971558), UINT64CONST(158977852651592594)}, +	{UINT64CONST(17244178680193423523), UINT64CONST(254364564242548151)}, {UINT64CONST(10105994129412828495), UINT64CONST(203491651394038521)}, +	{UINT64CONST(4395446488788352473), UINT64CONST(162793321115230817)}, {UINT64CONST(10722063196803274280), UINT64CONST(260469313784369307)}, +	{UINT64CONST(1198952927958798777), UINT64CONST(208375451027495446)}, {UINT64CONST(15716557601334680315), UINT64CONST(166700360821996356)}, +	{UINT64CONST(17767794532651667857), UINT64CONST(266720577315194170)}, {UINT64CONST(14214235626121334286), UINT64CONST(213376461852155336)}, +	{UINT64CONST(7682039686155157106), UINT64CONST(170701169481724269)}, {UINT64CONST(1223217053622520399), UINT64CONST(273121871170758831)}, +	{UINT64CONST(15735968901865657612), UINT64CONST(218497496936607064)}, {UINT64CONST(16278123936234436413), UINT64CONST(174797997549285651)}, +	{UINT64CONST(219556594781725998), UINT64CONST(279676796078857043)}, {UINT64CONST(7554342905309201445), UINT64CONST(223741436863085634)}, +	{UINT64CONST(9732823138989271479), UINT64CONST(178993149490468507)}, {UINT64CONST(815121763415193074), UINT64CONST(286389039184749612)}, +	{UINT64CONST(11720143854957885429), UINT64CONST(229111231347799689)}, {UINT64CONST(13065463898708218666), UINT64CONST(183288985078239751)}, +	{UINT64CONST(6763022304224664610), UINT64CONST(146631188062591801)}, {UINT64CONST(3442138057275642729), UINT64CONST(234609900900146882)}, +	{UINT64CONST(13821756890046245153), UINT64CONST(187687920720117505)}, {UINT64CONST(11057405512036996122), UINT64CONST(150150336576094004)}, +	{UINT64CONST(6623802375033462826), UINT64CONST(240240538521750407)}, {UINT64CONST(16367088344252501231), UINT64CONST(192192430817400325)}, +	{UINT64CONST(13093670675402000985), UINT64CONST(153753944653920260)}, {UINT64CONST(2503129006933649959), UINT64CONST(246006311446272417)}, +	{UINT64CONST(13070549649772650937), UINT64CONST(196805049157017933)}, {UINT64CONST(17835137349301941396), UINT64CONST(157444039325614346)}, +	{UINT64CONST(2710778055689733971), UINT64CONST(251910462920982955)}, {UINT64CONST(2168622444551787177), UINT64CONST(201528370336786364)}, +	{UINT64CONST(5424246770383340065), UINT64CONST(161222696269429091)}, {UINT64CONST(1300097203129523457), UINT64CONST(257956314031086546)}, +	{UINT64CONST(15797473021471260058), UINT64CONST(206365051224869236)}, {UINT64CONST(8948629602435097724), UINT64CONST(165092040979895389)}, +	{UINT64CONST(3249760919670425388), UINT64CONST(264147265567832623)}, {UINT64CONST(9978506365220160957), UINT64CONST(211317812454266098)}, +	{UINT64CONST(15361502721659949412), UINT64CONST(169054249963412878)}, {UINT64CONST(2442311466204457120), UINT64CONST(270486799941460606)}, +	{UINT64CONST(16711244431931206989), UINT64CONST(216389439953168484)}, {UINT64CONST(17058344360286875914), UINT64CONST(173111551962534787)}, +	{UINT64CONST(12535955717491360170), UINT64CONST(276978483140055660)}, {UINT64CONST(10028764573993088136), UINT64CONST(221582786512044528)}, +	{UINT64CONST(15401709288678291155), UINT64CONST(177266229209635622)}, {UINT64CONST(9885339602917624555), UINT64CONST(283625966735416996)}, +	{UINT64CONST(4218922867592189321), UINT64CONST(226900773388333597)}, {UINT64CONST(14443184738299482427), UINT64CONST(181520618710666877)}, +	{UINT64CONST(4175850161155765295), UINT64CONST(145216494968533502)}, {UINT64CONST(10370709072591134795), UINT64CONST(232346391949653603)}, +	{UINT64CONST(15675264887556728482), UINT64CONST(185877113559722882)}, {UINT64CONST(5161514280561562140), UINT64CONST(148701690847778306)}, +	{UINT64CONST(879725219414678777), UINT64CONST(237922705356445290)}, {UINT64CONST(703780175531743021), UINT64CONST(190338164285156232)}, +	{UINT64CONST(11631070584651125387), UINT64CONST(152270531428124985)}, {UINT64CONST(162968861732249003), UINT64CONST(243632850284999977)}, +	{UINT64CONST(11198421533611530172), UINT64CONST(194906280227999981)}, {UINT64CONST(5269388412147313814), UINT64CONST(155925024182399985)}, +	{UINT64CONST(8431021459435702103), UINT64CONST(249480038691839976)}, {UINT64CONST(3055468352806651359), UINT64CONST(199584030953471981)}, +	{UINT64CONST(17201769941212962380), UINT64CONST(159667224762777584)}, {UINT64CONST(16454785461715008838), UINT64CONST(255467559620444135)}, +	{UINT64CONST(13163828369372007071), UINT64CONST(204374047696355308)}, {UINT64CONST(17909760324981426303), UINT64CONST(163499238157084246)}, +	{UINT64CONST(2830174816776909822), UINT64CONST(261598781051334795)}, {UINT64CONST(2264139853421527858), UINT64CONST(209279024841067836)}, +	{UINT64CONST(16568707141704863579), UINT64CONST(167423219872854268)}, {UINT64CONST(4373838538276319787), UINT64CONST(267877151796566830)}, +	{UINT64CONST(3499070830621055830), UINT64CONST(214301721437253464)}, {UINT64CONST(6488605479238754987), UINT64CONST(171441377149802771)}, +	{UINT64CONST(3003071137298187333), UINT64CONST(274306203439684434)}, {UINT64CONST(6091805724580460189), UINT64CONST(219444962751747547)}, +	{UINT64CONST(15941491023890099121), UINT64CONST(175555970201398037)}, {UINT64CONST(10748990379256517301), UINT64CONST(280889552322236860)}, +	{UINT64CONST(8599192303405213841), UINT64CONST(224711641857789488)}, {UINT64CONST(14258051472207991719), UINT64CONST(179769313486231590)} +}; + +static const uint64 DOUBLE_POW5_SPLIT[326][2] = { +	{UINT64CONST(0), UINT64CONST(72057594037927936)}, {UINT64CONST(0), UINT64CONST(90071992547409920)}, +	{UINT64CONST(0), UINT64CONST(112589990684262400)}, {UINT64CONST(0), UINT64CONST(140737488355328000)}, +	{UINT64CONST(0), UINT64CONST(87960930222080000)}, {UINT64CONST(0), UINT64CONST(109951162777600000)}, +	{UINT64CONST(0), UINT64CONST(137438953472000000)}, {UINT64CONST(0), UINT64CONST(85899345920000000)}, +	{UINT64CONST(0), UINT64CONST(107374182400000000)}, {UINT64CONST(0), UINT64CONST(134217728000000000)}, +	{UINT64CONST(0), UINT64CONST(83886080000000000)}, {UINT64CONST(0), UINT64CONST(104857600000000000)}, +	{UINT64CONST(0), UINT64CONST(131072000000000000)}, {UINT64CONST(0), UINT64CONST(81920000000000000)}, +	{UINT64CONST(0), UINT64CONST(102400000000000000)}, {UINT64CONST(0), UINT64CONST(128000000000000000)}, +	{UINT64CONST(0), UINT64CONST(80000000000000000)}, {UINT64CONST(0), UINT64CONST(100000000000000000)}, +	{UINT64CONST(0), UINT64CONST(125000000000000000)}, {UINT64CONST(0), UINT64CONST(78125000000000000)}, +	{UINT64CONST(0), UINT64CONST(97656250000000000)}, {UINT64CONST(0), UINT64CONST(122070312500000000)}, +	{UINT64CONST(0), UINT64CONST(76293945312500000)}, {UINT64CONST(0), UINT64CONST(95367431640625000)}, +	{UINT64CONST(0), UINT64CONST(119209289550781250)}, {UINT64CONST(4611686018427387904), UINT64CONST(74505805969238281)}, +	{UINT64CONST(10376293541461622784), UINT64CONST(93132257461547851)}, {UINT64CONST(8358680908399640576), UINT64CONST(116415321826934814)}, +	{UINT64CONST(612489549322387456), UINT64CONST(72759576141834259)}, {UINT64CONST(14600669991935148032), UINT64CONST(90949470177292823)}, +	{UINT64CONST(13639151471491547136), UINT64CONST(113686837721616029)}, {UINT64CONST(3213881284082270208), UINT64CONST(142108547152020037)}, +	{UINT64CONST(4314518811765112832), UINT64CONST(88817841970012523)}, {UINT64CONST(781462496279003136), UINT64CONST(111022302462515654)}, +	{UINT64CONST(10200200157203529728), UINT64CONST(138777878078144567)}, {UINT64CONST(13292654125893287936), UINT64CONST(86736173798840354)}, +	{UINT64CONST(7392445620511834112), UINT64CONST(108420217248550443)}, {UINT64CONST(4628871007212404736), UINT64CONST(135525271560688054)}, +	{UINT64CONST(16728102434789916672), UINT64CONST(84703294725430033)}, {UINT64CONST(7075069988205232128), UINT64CONST(105879118406787542)}, +	{UINT64CONST(18067209522111315968), UINT64CONST(132348898008484427)}, {UINT64CONST(8986162942105878528), UINT64CONST(82718061255302767)}, +	{UINT64CONST(6621017659204960256), UINT64CONST(103397576569128459)}, {UINT64CONST(3664586055578812416), UINT64CONST(129246970711410574)}, +	{UINT64CONST(16125424340018921472), UINT64CONST(80779356694631608)}, {UINT64CONST(1710036351314100224), UINT64CONST(100974195868289511)}, +	{UINT64CONST(15972603494424788992), UINT64CONST(126217744835361888)}, {UINT64CONST(9982877184015493120), UINT64CONST(78886090522101180)}, +	{UINT64CONST(12478596480019366400), UINT64CONST(98607613152626475)}, {UINT64CONST(10986559581596820096), UINT64CONST(123259516440783094)}, +	{UINT64CONST(2254913720070624656), UINT64CONST(77037197775489434)}, {UINT64CONST(12042014186943056628), UINT64CONST(96296497219361792)}, +	{UINT64CONST(15052517733678820785), UINT64CONST(120370621524202240)}, {UINT64CONST(9407823583549262990), UINT64CONST(75231638452626400)}, +	{UINT64CONST(11759779479436578738), UINT64CONST(94039548065783000)}, {UINT64CONST(14699724349295723422), UINT64CONST(117549435082228750)}, +	{UINT64CONST(4575641699882439235), UINT64CONST(73468396926392969)}, {UINT64CONST(10331238143280436948), UINT64CONST(91835496157991211)}, +	{UINT64CONST(8302361660673158281), UINT64CONST(114794370197489014)}, {UINT64CONST(1154580038986672043), UINT64CONST(143492962746861268)}, +	{UINT64CONST(9944984561221445835), UINT64CONST(89683101716788292)}, {UINT64CONST(12431230701526807293), UINT64CONST(112103877145985365)}, +	{UINT64CONST(1703980321626345405), UINT64CONST(140129846432481707)}, {UINT64CONST(17205888765512323542), UINT64CONST(87581154020301066)}, +	{UINT64CONST(12283988920035628619), UINT64CONST(109476442525376333)}, {UINT64CONST(1519928094762372062), UINT64CONST(136845553156720417)}, +	{UINT64CONST(12479170105294952299), UINT64CONST(85528470722950260)}, {UINT64CONST(15598962631618690374), UINT64CONST(106910588403687825)}, +	{UINT64CONST(5663645234241199255), UINT64CONST(133638235504609782)}, {UINT64CONST(17374836326682913246), UINT64CONST(83523897190381113)}, +	{UINT64CONST(7883487353071477846), UINT64CONST(104404871487976392)}, {UINT64CONST(9854359191339347308), UINT64CONST(130506089359970490)}, +	{UINT64CONST(10770660513014479971), UINT64CONST(81566305849981556)}, {UINT64CONST(13463325641268099964), UINT64CONST(101957882312476945)}, +	{UINT64CONST(2994098996302961243), UINT64CONST(127447352890596182)}, {UINT64CONST(15706369927971514489), UINT64CONST(79654595556622613)}, +	{UINT64CONST(5797904354682229399), UINT64CONST(99568244445778267)}, {UINT64CONST(2635694424925398845), UINT64CONST(124460305557222834)}, +	{UINT64CONST(6258995034005762182), UINT64CONST(77787690973264271)}, {UINT64CONST(3212057774079814824), UINT64CONST(97234613716580339)}, +	{UINT64CONST(17850130272881932242), UINT64CONST(121543267145725423)}, {UINT64CONST(18073860448192289507), UINT64CONST(75964541966078389)}, +	{UINT64CONST(8757267504958198172), UINT64CONST(94955677457597987)}, {UINT64CONST(6334898362770359811), UINT64CONST(118694596821997484)}, +	{UINT64CONST(13182683513586250689), UINT64CONST(74184123013748427)}, {UINT64CONST(11866668373555425458), UINT64CONST(92730153767185534)}, +	{UINT64CONST(5609963430089506015), UINT64CONST(115912692208981918)}, {UINT64CONST(17341285199088104971), UINT64CONST(72445432630613698)}, +	{UINT64CONST(12453234462005355406), UINT64CONST(90556790788267123)}, {UINT64CONST(10954857059079306353), UINT64CONST(113195988485333904)}, +	{UINT64CONST(13693571323849132942), UINT64CONST(141494985606667380)}, {UINT64CONST(17781854114260483896), UINT64CONST(88434366004167112)}, +	{UINT64CONST(3780573569116053255), UINT64CONST(110542957505208891)}, {UINT64CONST(114030942967678664), UINT64CONST(138178696881511114)}, +	{UINT64CONST(4682955357782187069), UINT64CONST(86361685550944446)}, {UINT64CONST(15077066234082509644), UINT64CONST(107952106938680557)}, +	{UINT64CONST(5011274737320973344), UINT64CONST(134940133673350697)}, {UINT64CONST(14661261756894078100), UINT64CONST(84337583545844185)}, +	{UINT64CONST(4491519140835433913), UINT64CONST(105421979432305232)}, {UINT64CONST(5614398926044292391), UINT64CONST(131777474290381540)}, +	{UINT64CONST(12732371365632458552), UINT64CONST(82360921431488462)}, {UINT64CONST(6692092170185797382), UINT64CONST(102951151789360578)}, +	{UINT64CONST(17588487249587022536), UINT64CONST(128688939736700722)}, {UINT64CONST(15604490549419276989), UINT64CONST(80430587335437951)}, +	{UINT64CONST(14893927168346708332), UINT64CONST(100538234169297439)}, {UINT64CONST(14005722942005997511), UINT64CONST(125672792711621799)}, +	{UINT64CONST(15671105866394830300), UINT64CONST(78545495444763624)}, {UINT64CONST(1142138259283986260), UINT64CONST(98181869305954531)}, +	{UINT64CONST(15262730879387146537), UINT64CONST(122727336632443163)}, {UINT64CONST(7233363790403272633), UINT64CONST(76704585395276977)}, +	{UINT64CONST(13653390756431478696), UINT64CONST(95880731744096221)}, {UINT64CONST(3231680390257184658), UINT64CONST(119850914680120277)}, +	{UINT64CONST(4325643253124434363), UINT64CONST(74906821675075173)}, {UINT64CONST(10018740084832930858), UINT64CONST(93633527093843966)}, +	{UINT64CONST(3300053069186387764), UINT64CONST(117041908867304958)}, {UINT64CONST(15897591223523656064), UINT64CONST(73151193042065598)}, +	{UINT64CONST(10648616992549794273), UINT64CONST(91438991302581998)}, {UINT64CONST(4087399203832467033), UINT64CONST(114298739128227498)}, +	{UINT64CONST(14332621041645359599), UINT64CONST(142873423910284372)}, {UINT64CONST(18181260187883125557), UINT64CONST(89295889943927732)}, +	{UINT64CONST(4279831161144355331), UINT64CONST(111619862429909666)}, {UINT64CONST(14573160988285219972), UINT64CONST(139524828037387082)}, +	{UINT64CONST(13719911636105650386), UINT64CONST(87203017523366926)}, {UINT64CONST(7926517508277287175), UINT64CONST(109003771904208658)}, +	{UINT64CONST(684774848491833161), UINT64CONST(136254714880260823)}, {UINT64CONST(7345513307948477581), UINT64CONST(85159196800163014)}, +	{UINT64CONST(18405263671790372785), UINT64CONST(106448996000203767)}, {UINT64CONST(18394893571310578077), UINT64CONST(133061245000254709)}, +	{UINT64CONST(13802651491282805250), UINT64CONST(83163278125159193)}, {UINT64CONST(3418256308821342851), UINT64CONST(103954097656448992)}, +	{UINT64CONST(4272820386026678563), UINT64CONST(129942622070561240)}, {UINT64CONST(2670512741266674102), UINT64CONST(81214138794100775)}, +	{UINT64CONST(17173198981865506339), UINT64CONST(101517673492625968)}, {UINT64CONST(3019754653622331308), UINT64CONST(126897091865782461)}, +	{UINT64CONST(4193189667727651020), UINT64CONST(79310682416114038)}, {UINT64CONST(14464859121514339583), UINT64CONST(99138353020142547)}, +	{UINT64CONST(13469387883465536574), UINT64CONST(123922941275178184)}, {UINT64CONST(8418367427165960359), UINT64CONST(77451838296986365)}, +	{UINT64CONST(15134645302384838353), UINT64CONST(96814797871232956)}, {UINT64CONST(471562554271496325), UINT64CONST(121018497339041196)}, +	{UINT64CONST(9518098633274461011), UINT64CONST(75636560836900747)}, {UINT64CONST(7285937273165688360), UINT64CONST(94545701046125934)}, +	{UINT64CONST(18330793628311886258), UINT64CONST(118182126307657417)}, {UINT64CONST(4539216990053847055), UINT64CONST(73863828942285886)}, +	{UINT64CONST(14897393274422084627), UINT64CONST(92329786177857357)}, {UINT64CONST(4786683537745442072), UINT64CONST(115412232722321697)}, +	{UINT64CONST(14520892257159371055), UINT64CONST(72132645451451060)}, {UINT64CONST(18151115321449213818), UINT64CONST(90165806814313825)}, +	{UINT64CONST(8853836096529353561), UINT64CONST(112707258517892282)}, {UINT64CONST(1843923083806916143), UINT64CONST(140884073147365353)}, +	{UINT64CONST(12681666973447792349), UINT64CONST(88052545717103345)}, {UINT64CONST(2017025661527576725), UINT64CONST(110065682146379182)}, +	{UINT64CONST(11744654113764246714), UINT64CONST(137582102682973977)}, {UINT64CONST(422879793461572340), UINT64CONST(85988814176858736)}, +	{UINT64CONST(528599741826965425), UINT64CONST(107486017721073420)}, {UINT64CONST(660749677283706782), UINT64CONST(134357522151341775)}, +	{UINT64CONST(7330497575943398595), UINT64CONST(83973451344588609)}, {UINT64CONST(13774807988356636147), UINT64CONST(104966814180735761)}, +	{UINT64CONST(3383451930163631472), UINT64CONST(131208517725919702)}, {UINT64CONST(15949715511634433382), UINT64CONST(82005323578699813)}, +	{UINT64CONST(6102086334260878016), UINT64CONST(102506654473374767)}, {UINT64CONST(3015921899398709616), UINT64CONST(128133318091718459)}, +	{UINT64CONST(18025852251620051174), UINT64CONST(80083323807324036)}, {UINT64CONST(4085571240815512351), UINT64CONST(100104154759155046)}, +	{UINT64CONST(14330336087874166247), UINT64CONST(125130193448943807)}, {UINT64CONST(15873989082562435760), UINT64CONST(78206370905589879)}, +	{UINT64CONST(15230800334775656796), UINT64CONST(97757963631987349)}, {UINT64CONST(5203442363187407284), UINT64CONST(122197454539984187)}, +	{UINT64CONST(946308467778435600), UINT64CONST(76373409087490117)}, {UINT64CONST(5794571603150432404), UINT64CONST(95466761359362646)}, +	{UINT64CONST(16466586540792816313), UINT64CONST(119333451699203307)}, {UINT64CONST(7985773578781816244), UINT64CONST(74583407312002067)}, +	{UINT64CONST(5370530955049882401), UINT64CONST(93229259140002584)}, {UINT64CONST(6713163693812353001), UINT64CONST(116536573925003230)}, +	{UINT64CONST(18030785363914884337), UINT64CONST(72835358703127018)}, {UINT64CONST(13315109668038829614), UINT64CONST(91044198378908773)}, +	{UINT64CONST(2808829029766373305), UINT64CONST(113805247973635967)}, {UINT64CONST(17346094342490130344), UINT64CONST(142256559967044958)}, +	{UINT64CONST(6229622945628943561), UINT64CONST(88910349979403099)}, {UINT64CONST(3175342663608791547), UINT64CONST(111137937474253874)}, +	{UINT64CONST(13192550366365765242), UINT64CONST(138922421842817342)}, {UINT64CONST(3633657960551215372), UINT64CONST(86826513651760839)}, +	{UINT64CONST(18377130505971182927), UINT64CONST(108533142064701048)}, {UINT64CONST(4524669058754427043), UINT64CONST(135666427580876311)}, +	{UINT64CONST(9745447189362598758), UINT64CONST(84791517238047694)}, {UINT64CONST(2958436949848472639), UINT64CONST(105989396547559618)}, +	{UINT64CONST(12921418224165366607), UINT64CONST(132486745684449522)}, {UINT64CONST(12687572408530742033), UINT64CONST(82804216052780951)}, +	{UINT64CONST(11247779492236039638), UINT64CONST(103505270065976189)}, {UINT64CONST(224666310012885835), UINT64CONST(129381587582470237)}, +	{UINT64CONST(2446259452971747599), UINT64CONST(80863492239043898)}, {UINT64CONST(12281196353069460307), UINT64CONST(101079365298804872)}, +	{UINT64CONST(15351495441336825384), UINT64CONST(126349206623506090)}, {UINT64CONST(14206370669262903769), UINT64CONST(78968254139691306)}, +	{UINT64CONST(8534591299723853903), UINT64CONST(98710317674614133)}, {UINT64CONST(15279925143082205283), UINT64CONST(123387897093267666)}, +	{UINT64CONST(14161639232853766206), UINT64CONST(77117435683292291)}, {UINT64CONST(13090363022639819853), UINT64CONST(96396794604115364)}, +	{UINT64CONST(16362953778299774816), UINT64CONST(120495993255144205)}, {UINT64CONST(12532689120651053212), UINT64CONST(75309995784465128)}, +	{UINT64CONST(15665861400813816515), UINT64CONST(94137494730581410)}, {UINT64CONST(10358954714162494836), UINT64CONST(117671868413226763)}, +	{UINT64CONST(4168503687137865320), UINT64CONST(73544917758266727)}, {UINT64CONST(598943590494943747), UINT64CONST(91931147197833409)}, +	{UINT64CONST(5360365506546067587), UINT64CONST(114913933997291761)}, {UINT64CONST(11312142901609972388), UINT64CONST(143642417496614701)}, +	{UINT64CONST(9375932322719926695), UINT64CONST(89776510935384188)}, {UINT64CONST(11719915403399908368), UINT64CONST(112220638669230235)}, +	{UINT64CONST(10038208235822497557), UINT64CONST(140275798336537794)}, {UINT64CONST(10885566165816448877), UINT64CONST(87672373960336121)}, +	{UINT64CONST(18218643725697949000), UINT64CONST(109590467450420151)}, {UINT64CONST(18161618638695048346), UINT64CONST(136988084313025189)}, +	{UINT64CONST(13656854658398099168), UINT64CONST(85617552695640743)}, {UINT64CONST(12459382304570236056), UINT64CONST(107021940869550929)}, +	{UINT64CONST(1739169825430631358), UINT64CONST(133777426086938662)}, {UINT64CONST(14922039196176308311), UINT64CONST(83610891304336663)}, +	{UINT64CONST(14040862976792997485), UINT64CONST(104513614130420829)}, {UINT64CONST(3716020665709083144), UINT64CONST(130642017663026037)}, +	{UINT64CONST(4628355925281870917), UINT64CONST(81651261039391273)}, {UINT64CONST(10397130925029726550), UINT64CONST(102064076299239091)}, +	{UINT64CONST(8384727637859770284), UINT64CONST(127580095374048864)}, {UINT64CONST(5240454773662356427), UINT64CONST(79737559608780540)}, +	{UINT64CONST(6550568467077945534), UINT64CONST(99671949510975675)}, {UINT64CONST(3576524565420044014), UINT64CONST(124589936888719594)}, +	{UINT64CONST(6847013871814915412), UINT64CONST(77868710555449746)}, {UINT64CONST(17782139376623420074), UINT64CONST(97335888194312182)}, +	{UINT64CONST(13004302183924499284), UINT64CONST(121669860242890228)}, {UINT64CONST(17351060901807587860), UINT64CONST(76043662651806392)}, +	{UINT64CONST(3242082053549933210), UINT64CONST(95054578314757991)}, {UINT64CONST(17887660622219580224), UINT64CONST(118818222893447488)}, +	{UINT64CONST(11179787888887237640), UINT64CONST(74261389308404680)}, {UINT64CONST(13974734861109047050), UINT64CONST(92826736635505850)}, +	{UINT64CONST(8245046539531533005), UINT64CONST(116033420794382313)}, {UINT64CONST(16682369133275677888), UINT64CONST(72520887996488945)}, +	{UINT64CONST(7017903361312433648), UINT64CONST(90651109995611182)}, {UINT64CONST(17995751238495317868), UINT64CONST(113313887494513977)}, +	{UINT64CONST(8659630992836983623), UINT64CONST(141642359368142472)}, {UINT64CONST(5412269370523114764), UINT64CONST(88526474605089045)}, +	{UINT64CONST(11377022731581281359), UINT64CONST(110658093256361306)}, {UINT64CONST(4997906377621825891), UINT64CONST(138322616570451633)}, +	{UINT64CONST(14652906532082110942), UINT64CONST(86451635356532270)}, {UINT64CONST(9092761128247862869), UINT64CONST(108064544195665338)}, +	{UINT64CONST(2142579373455052779), UINT64CONST(135080680244581673)}, {UINT64CONST(12868327154477877747), UINT64CONST(84425425152863545)}, +	{UINT64CONST(2250350887815183471), UINT64CONST(105531781441079432)}, {UINT64CONST(2812938609768979339), UINT64CONST(131914726801349290)}, +	{UINT64CONST(6369772649532999991), UINT64CONST(82446704250843306)}, {UINT64CONST(17185587848771025797), UINT64CONST(103058380313554132)}, +	{UINT64CONST(3035240737254230630), UINT64CONST(128822975391942666)}, {UINT64CONST(6508711479211282048), UINT64CONST(80514359619964166)}, +	{UINT64CONST(17359261385868878368), UINT64CONST(100642949524955207)}, {UINT64CONST(17087390713908710056), UINT64CONST(125803686906194009)}, +	{UINT64CONST(3762090168551861929), UINT64CONST(78627304316371256)}, {UINT64CONST(4702612710689827411), UINT64CONST(98284130395464070)}, +	{UINT64CONST(15101637925217060072), UINT64CONST(122855162994330087)}, {UINT64CONST(16356052730901744401), UINT64CONST(76784476871456304)}, +	{UINT64CONST(1998321839917628885), UINT64CONST(95980596089320381)}, {UINT64CONST(7109588318324424010), UINT64CONST(119975745111650476)}, +	{UINT64CONST(13666864735807540814), UINT64CONST(74984840694781547)}, {UINT64CONST(12471894901332038114), UINT64CONST(93731050868476934)}, +	{UINT64CONST(6366496589810271835), UINT64CONST(117163813585596168)}, {UINT64CONST(3979060368631419896), UINT64CONST(73227383490997605)}, +	{UINT64CONST(9585511479216662775), UINT64CONST(91534229363747006)}, {UINT64CONST(2758517312166052660), UINT64CONST(114417786704683758)}, +	{UINT64CONST(12671518677062341634), UINT64CONST(143022233380854697)}, {UINT64CONST(1002170145522881665), UINT64CONST(89388895863034186)}, +	{UINT64CONST(10476084718758377889), UINT64CONST(111736119828792732)}, {UINT64CONST(13095105898447972362), UINT64CONST(139670149785990915)}, +	{UINT64CONST(5878598177316288774), UINT64CONST(87293843616244322)}, {UINT64CONST(16571619758500136775), UINT64CONST(109117304520305402)}, +	{UINT64CONST(11491152661270395161), UINT64CONST(136396630650381753)}, {UINT64CONST(264441385652915120), UINT64CONST(85247894156488596)}, +	{UINT64CONST(330551732066143900), UINT64CONST(106559867695610745)}, {UINT64CONST(5024875683510067779), UINT64CONST(133199834619513431)}, +	{UINT64CONST(10058076329834874218), UINT64CONST(83249896637195894)}, {UINT64CONST(3349223375438816964), UINT64CONST(104062370796494868)}, +	{UINT64CONST(4186529219298521205), UINT64CONST(130077963495618585)}, {UINT64CONST(14145795808130045513), UINT64CONST(81298727184761615)}, +	{UINT64CONST(13070558741735168987), UINT64CONST(101623408980952019)}, {UINT64CONST(11726512408741573330), UINT64CONST(127029261226190024)}, +	{UINT64CONST(7329070255463483331), UINT64CONST(79393288266368765)}, {UINT64CONST(13773023837756742068), UINT64CONST(99241610332960956)}, +	{UINT64CONST(17216279797195927585), UINT64CONST(124052012916201195)}, {UINT64CONST(8454331864033760789), UINT64CONST(77532508072625747)}, +	{UINT64CONST(5956228811614813082), UINT64CONST(96915635090782184)}, {UINT64CONST(7445286014518516353), UINT64CONST(121144543863477730)}, +	{UINT64CONST(9264989777501460624), UINT64CONST(75715339914673581)}, {UINT64CONST(16192923240304213684), UINT64CONST(94644174893341976)}, +	{UINT64CONST(1794409976670715490), UINT64CONST(118305218616677471)}, {UINT64CONST(8039035263060279037), UINT64CONST(73940761635423419)}, +	{UINT64CONST(5437108060397960892), UINT64CONST(92425952044279274)}, {UINT64CONST(16019757112352226923), UINT64CONST(115532440055349092)}, +	{UINT64CONST(788976158365366019), UINT64CONST(72207775034593183)}, {UINT64CONST(14821278253238871236), UINT64CONST(90259718793241478)}, +	{UINT64CONST(9303225779693813237), UINT64CONST(112824648491551848)}, {UINT64CONST(11629032224617266546), UINT64CONST(141030810614439810)}, +	{UINT64CONST(11879831158813179495), UINT64CONST(88144256634024881)}, {UINT64CONST(1014730893234310657), UINT64CONST(110180320792531102)}, +	{UINT64CONST(10491785653397664129), UINT64CONST(137725400990663877)}, {UINT64CONST(8863209042587234033), UINT64CONST(86078375619164923)}, +	{UINT64CONST(6467325284806654637), UINT64CONST(107597969523956154)}, {UINT64CONST(17307528642863094104), UINT64CONST(134497461904945192)}, +	{UINT64CONST(10817205401789433815), UINT64CONST(84060913690590745)}, {UINT64CONST(18133192770664180173), UINT64CONST(105076142113238431)}, +	{UINT64CONST(18054804944902837312), UINT64CONST(131345177641548039)}, {UINT64CONST(18201782118205355176), UINT64CONST(82090736025967524)}, +	{UINT64CONST(4305483574047142354), UINT64CONST(102613420032459406)}, {UINT64CONST(14605226504413703751), UINT64CONST(128266775040574257)}, +	{UINT64CONST(2210737537617482988), UINT64CONST(80166734400358911)}, {UINT64CONST(16598479977304017447), UINT64CONST(100208418000448638)}, +	{UINT64CONST(11524727934775246001), UINT64CONST(125260522500560798)}, {UINT64CONST(2591268940807140847), UINT64CONST(78287826562850499)}, +	{UINT64CONST(17074144231291089770), UINT64CONST(97859783203563123)}, {UINT64CONST(16730994270686474309), UINT64CONST(122324729004453904)}, +	{UINT64CONST(10456871419179046443), UINT64CONST(76452955627783690)}, {UINT64CONST(3847717237119032246), UINT64CONST(95566194534729613)}, +	{UINT64CONST(9421332564826178211), UINT64CONST(119457743168412016)}, {UINT64CONST(5888332853016361382), UINT64CONST(74661089480257510)}, +	{UINT64CONST(16583788103125227536), UINT64CONST(93326361850321887)}, {UINT64CONST(16118049110479146516), UINT64CONST(116657952312902359)}, +	{UINT64CONST(16991309721690548428), UINT64CONST(72911220195563974)}, {UINT64CONST(12015765115258409727), UINT64CONST(91139025244454968)}, +	{UINT64CONST(15019706394073012159), UINT64CONST(113923781555568710)}, {UINT64CONST(9551260955736489391), UINT64CONST(142404726944460888)}, +	{UINT64CONST(5969538097335305869), UINT64CONST(89002954340288055)}, {UINT64CONST(2850236603241744433), UINT64CONST(111253692925360069)} +}; + +#endif							/* RYU_D2S_FULL_TABLE_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_intrinsics.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_intrinsics.h new file mode 100644 index 00000000000..ae0f28dbb2f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/d2s_intrinsics.h @@ -0,0 +1,202 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for double precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/d2s_intrinsics.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + *     (See accompanying file LICENSE-Apache or copy at + *      http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + *     (See accompanying file LICENSE-Boost or copy at + *      https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ +#ifndef RYU_D2S_INTRINSICS_H +#define RYU_D2S_INTRINSICS_H + +#if defined(HAS_64_BIT_INTRINSICS) + +#include <intrin.h> + +static inline uint64 +umul128(const uint64 a, const uint64 b, uint64 *const productHi) +{ +	return _umul128(a, b, productHi); +} + +static inline uint64 +shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) +{ +	/* +	 * For the __shiftright128 intrinsic, the shift value is always modulo 64. +	 * In the current implementation of the double-precision version of Ryu, +	 * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, +	 * the shift value is in the range [49, 58]. Otherwise in the range [2, +	 * 59].) Check this here in case a future change requires larger shift +	 * values. In this case this function needs to be adjusted. +	 */ +	Assert(dist < 64); +	return __shiftright128(lo, hi, (unsigned char) dist); +} + +#else							/* defined(HAS_64_BIT_INTRINSICS) */ + +static inline uint64 +umul128(const uint64 a, const uint64 b, uint64 *const productHi) +{ +	/* +	 * The casts here help MSVC to avoid calls to the __allmul library +	 * function. +	 */ +	const uint32 aLo = (uint32) a; +	const uint32 aHi = (uint32) (a >> 32); +	const uint32 bLo = (uint32) b; +	const uint32 bHi = (uint32) (b >> 32); + +	const uint64 b00 = (uint64) aLo * bLo; +	const uint64 b01 = (uint64) aLo * bHi; +	const uint64 b10 = (uint64) aHi * bLo; +	const uint64 b11 = (uint64) aHi * bHi; + +	const uint32 b00Lo = (uint32) b00; +	const uint32 b00Hi = (uint32) (b00 >> 32); + +	const uint64 mid1 = b10 + b00Hi; +	const uint32 mid1Lo = (uint32) (mid1); +	const uint32 mid1Hi = (uint32) (mid1 >> 32); + +	const uint64 mid2 = b01 + mid1Lo; +	const uint32 mid2Lo = (uint32) (mid2); +	const uint32 mid2Hi = (uint32) (mid2 >> 32); + +	const uint64 pHi = b11 + mid1Hi + mid2Hi; +	const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; + +	*productHi = pHi; +	return pLo; +} + +static inline uint64 +shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) +{ +	/* We don't need to handle the case dist >= 64 here (see above). */ +	Assert(dist < 64); +#if !defined(RYU_32_BIT_PLATFORM) +	Assert(dist > 0); +	return (hi << (64 - dist)) | (lo >> dist); +#else +	/* Avoid a 64-bit shift by taking advantage of the range of shift values. */ +	Assert(dist >= 32); +	return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); +#endif +} + +#endif							/* // defined(HAS_64_BIT_INTRINSICS) */ + +#ifdef RYU_32_BIT_PLATFORM + +/*  Returns the high 64 bits of the 128-bit product of a and b. */ +static inline uint64 +umulh(const uint64 a, const uint64 b) +{ +	/* +	 * Reuse the umul128 implementation. Optimizers will likely eliminate the +	 * instructions used to compute the low part of the product. +	 */ +	uint64		hi; + +	umul128(a, b, &hi); +	return hi; +} + +/*---- + *  On 32-bit platforms, compilers typically generate calls to library + *  functions for 64-bit divisions, even if the divisor is a constant. + * + *  E.g.: + *  https://bugs.llvm.org/show_bug.cgi?id=37932 + *  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 + *  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 + * + *  The functions here perform division-by-constant using multiplications + *  in the same way as 64-bit compilers would do. + * + *  NB: + *  The multipliers and shift values are the ones generated by clang x64 + *  for expressions like x/5, x/10, etc. + *---- + */ + +static inline uint64 +div5(const uint64 x) +{ +	return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; +} + +static inline uint64 +div10(const uint64 x) +{ +	return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; +} + +static inline uint64 +div100(const uint64 x) +{ +	return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; +} + +static inline uint64 +div1e8(const uint64 x) +{ +	return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; +} + +#else							/* RYU_32_BIT_PLATFORM */ + +static inline uint64 +div5(const uint64 x) +{ +	return x / 5; +} + +static inline uint64 +div10(const uint64 x) +{ +	return x / 10; +} + +static inline uint64 +div100(const uint64 x) +{ +	return x / 100; +} + +static inline uint64 +div1e8(const uint64 x) +{ +	return x / 100000000; +} + +#endif							/* RYU_32_BIT_PLATFORM */ + +#endif							/* RYU_D2S_INTRINSICS_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/digit_table.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/digit_table.h new file mode 100644 index 00000000000..483aa171424 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/digit_table.h @@ -0,0 +1,21 @@ +#ifndef RYU_DIGIT_TABLE_H +#define RYU_DIGIT_TABLE_H + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { +	'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', +	'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', +	'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', +	'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', +	'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', +	'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', +	'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', +	'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', +	'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', +	'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' +}; + +#endif							/* RYU_DIGIT_TABLE_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c new file mode 100644 index 00000000000..0412a8220ef --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c @@ -0,0 +1,598 @@ +/*------------------------------------------------------------------------- + * + * encnames.c + *	  Encoding names and routines for working with them. + * + * Portions Copyright (c) 2001-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/encnames.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include <ctype.h> +#include <unistd.h> + +#include "mb/pg_wchar.h" + + +/* ---------- + * All encoding names, sorted:		 *** A L P H A B E T I C *** + * + * All names must be without irrelevant chars, search routines use + * isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1 + * are always converted to 'iso88591'. All must be lower case. + * + * The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed? + * + * Karel Zak, Aug 2001 + * ---------- + */ +typedef struct pg_encname +{ +	const char *name; +	pg_enc		encoding; +} pg_encname; + +static const pg_encname pg_encname_tbl[] = +{ +	{ +		"abc", PG_WIN1258 +	},							/* alias for WIN1258 */ +	{ +		"alt", PG_WIN866 +	},							/* IBM866 */ +	{ +		"big5", PG_BIG5 +	},							/* Big5; Chinese for Taiwan multibyte set */ +	{ +		"euccn", PG_EUC_CN +	},							/* EUC-CN; Extended Unix Code for simplified +								 * Chinese */ +	{ +		"eucjis2004", PG_EUC_JIS_2004 +	},							/* EUC-JIS-2004; Extended UNIX Code fixed +								 * Width for Japanese, standard JIS X 0213 */ +	{ +		"eucjp", PG_EUC_JP +	},							/* EUC-JP; Extended UNIX Code fixed Width for +								 * Japanese, standard OSF */ +	{ +		"euckr", PG_EUC_KR +	},							/* EUC-KR; Extended Unix Code for Korean , KS +								 * X 1001 standard */ +	{ +		"euctw", PG_EUC_TW +	},							/* EUC-TW; Extended Unix Code for +								 * +								 * traditional Chinese */ +	{ +		"gb18030", PG_GB18030 +	},							/* GB18030;GB18030 */ +	{ +		"gbk", PG_GBK +	},							/* GBK; Chinese Windows CodePage 936 +								 * simplified Chinese */ +	{ +		"iso88591", PG_LATIN1 +	},							/* ISO-8859-1; RFC1345,KXS2 */ +	{ +		"iso885910", PG_LATIN6 +	},							/* ISO-8859-10; RFC1345,KXS2 */ +	{ +		"iso885913", PG_LATIN7 +	},							/* ISO-8859-13; RFC1345,KXS2 */ +	{ +		"iso885914", PG_LATIN8 +	},							/* ISO-8859-14; RFC1345,KXS2 */ +	{ +		"iso885915", PG_LATIN9 +	},							/* ISO-8859-15; RFC1345,KXS2 */ +	{ +		"iso885916", PG_LATIN10 +	},							/* ISO-8859-16; RFC1345,KXS2 */ +	{ +		"iso88592", PG_LATIN2 +	},							/* ISO-8859-2; RFC1345,KXS2 */ +	{ +		"iso88593", PG_LATIN3 +	},							/* ISO-8859-3; RFC1345,KXS2 */ +	{ +		"iso88594", PG_LATIN4 +	},							/* ISO-8859-4; RFC1345,KXS2 */ +	{ +		"iso88595", PG_ISO_8859_5 +	},							/* ISO-8859-5; RFC1345,KXS2 */ +	{ +		"iso88596", PG_ISO_8859_6 +	},							/* ISO-8859-6; RFC1345,KXS2 */ +	{ +		"iso88597", PG_ISO_8859_7 +	},							/* ISO-8859-7; RFC1345,KXS2 */ +	{ +		"iso88598", PG_ISO_8859_8 +	},							/* ISO-8859-8; RFC1345,KXS2 */ +	{ +		"iso88599", PG_LATIN5 +	},							/* ISO-8859-9; RFC1345,KXS2 */ +	{ +		"johab", PG_JOHAB +	},							/* JOHAB; Extended Unix Code for simplified +								 * Chinese */ +	{ +		"koi8", PG_KOI8R +	},							/* _dirty_ alias for KOI8-R (backward +								 * compatibility) */ +	{ +		"koi8r", PG_KOI8R +	},							/* KOI8-R; RFC1489 */ +	{ +		"koi8u", PG_KOI8U +	},							/* KOI8-U; RFC2319 */ +	{ +		"latin1", PG_LATIN1 +	},							/* alias for ISO-8859-1 */ +	{ +		"latin10", PG_LATIN10 +	},							/* alias for ISO-8859-16 */ +	{ +		"latin2", PG_LATIN2 +	},							/* alias for ISO-8859-2 */ +	{ +		"latin3", PG_LATIN3 +	},							/* alias for ISO-8859-3 */ +	{ +		"latin4", PG_LATIN4 +	},							/* alias for ISO-8859-4 */ +	{ +		"latin5", PG_LATIN5 +	},							/* alias for ISO-8859-9 */ +	{ +		"latin6", PG_LATIN6 +	},							/* alias for ISO-8859-10 */ +	{ +		"latin7", PG_LATIN7 +	},							/* alias for ISO-8859-13 */ +	{ +		"latin8", PG_LATIN8 +	},							/* alias for ISO-8859-14 */ +	{ +		"latin9", PG_LATIN9 +	},							/* alias for ISO-8859-15 */ +	{ +		"mskanji", PG_SJIS +	},							/* alias for Shift_JIS */ +	{ +		"muleinternal", PG_MULE_INTERNAL +	}, +	{ +		"shiftjis", PG_SJIS +	},							/* Shift_JIS; JIS X 0202-1991 */ + +	{ +		"shiftjis2004", PG_SHIFT_JIS_2004 +	},							/* SHIFT-JIS-2004; Shift JIS for Japanese, +								 * standard JIS X 0213 */ +	{ +		"sjis", PG_SJIS +	},							/* alias for Shift_JIS */ +	{ +		"sqlascii", PG_SQL_ASCII +	}, +	{ +		"tcvn", PG_WIN1258 +	},							/* alias for WIN1258 */ +	{ +		"tcvn5712", PG_WIN1258 +	},							/* alias for WIN1258 */ +	{ +		"uhc", PG_UHC +	},							/* UHC; Korean Windows CodePage 949 */ +	{ +		"unicode", PG_UTF8 +	},							/* alias for UTF8 */ +	{ +		"utf8", PG_UTF8 +	},							/* alias for UTF8 */ +	{ +		"vscii", PG_WIN1258 +	},							/* alias for WIN1258 */ +	{ +		"win", PG_WIN1251 +	},							/* _dirty_ alias for windows-1251 (backward +								 * compatibility) */ +	{ +		"win1250", PG_WIN1250 +	},							/* alias for Windows-1250 */ +	{ +		"win1251", PG_WIN1251 +	},							/* alias for Windows-1251 */ +	{ +		"win1252", PG_WIN1252 +	},							/* alias for Windows-1252 */ +	{ +		"win1253", PG_WIN1253 +	},							/* alias for Windows-1253 */ +	{ +		"win1254", PG_WIN1254 +	},							/* alias for Windows-1254 */ +	{ +		"win1255", PG_WIN1255 +	},							/* alias for Windows-1255 */ +	{ +		"win1256", PG_WIN1256 +	},							/* alias for Windows-1256 */ +	{ +		"win1257", PG_WIN1257 +	},							/* alias for Windows-1257 */ +	{ +		"win1258", PG_WIN1258 +	},							/* alias for Windows-1258 */ +	{ +		"win866", PG_WIN866 +	},							/* IBM866 */ +	{ +		"win874", PG_WIN874 +	},							/* alias for Windows-874 */ +	{ +		"win932", PG_SJIS +	},							/* alias for Shift_JIS */ +	{ +		"win936", PG_GBK +	},							/* alias for GBK */ +	{ +		"win949", PG_UHC +	},							/* alias for UHC */ +	{ +		"win950", PG_BIG5 +	},							/* alias for BIG5 */ +	{ +		"windows1250", PG_WIN1250 +	},							/* Windows-1251; Microsoft */ +	{ +		"windows1251", PG_WIN1251 +	},							/* Windows-1251; Microsoft */ +	{ +		"windows1252", PG_WIN1252 +	},							/* Windows-1252; Microsoft */ +	{ +		"windows1253", PG_WIN1253 +	},							/* Windows-1253; Microsoft */ +	{ +		"windows1254", PG_WIN1254 +	},							/* Windows-1254; Microsoft */ +	{ +		"windows1255", PG_WIN1255 +	},							/* Windows-1255; Microsoft */ +	{ +		"windows1256", PG_WIN1256 +	},							/* Windows-1256; Microsoft */ +	{ +		"windows1257", PG_WIN1257 +	},							/* Windows-1257; Microsoft */ +	{ +		"windows1258", PG_WIN1258 +	},							/* Windows-1258; Microsoft */ +	{ +		"windows866", PG_WIN866 +	},							/* IBM866 */ +	{ +		"windows874", PG_WIN874 +	},							/* Windows-874; Microsoft */ +	{ +		"windows932", PG_SJIS +	},							/* alias for Shift_JIS */ +	{ +		"windows936", PG_GBK +	},							/* alias for GBK */ +	{ +		"windows949", PG_UHC +	},							/* alias for UHC */ +	{ +		"windows950", PG_BIG5 +	}							/* alias for BIG5 */ +}; + +/* ---------- + * These are "official" encoding names. + * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) + * ---------- + */ +#ifndef WIN32 +#define DEF_ENC2NAME(name, codepage) { #name, PG_##name } +#else +#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage } +#endif + +const pg_enc2name pg_enc2name_tbl[] = +{ +	DEF_ENC2NAME(SQL_ASCII, 0), +	DEF_ENC2NAME(EUC_JP, 20932), +	DEF_ENC2NAME(EUC_CN, 20936), +	DEF_ENC2NAME(EUC_KR, 51949), +	DEF_ENC2NAME(EUC_TW, 0), +	DEF_ENC2NAME(EUC_JIS_2004, 20932), +	DEF_ENC2NAME(UTF8, 65001), +	DEF_ENC2NAME(MULE_INTERNAL, 0), +	DEF_ENC2NAME(LATIN1, 28591), +	DEF_ENC2NAME(LATIN2, 28592), +	DEF_ENC2NAME(LATIN3, 28593), +	DEF_ENC2NAME(LATIN4, 28594), +	DEF_ENC2NAME(LATIN5, 28599), +	DEF_ENC2NAME(LATIN6, 0), +	DEF_ENC2NAME(LATIN7, 0), +	DEF_ENC2NAME(LATIN8, 0), +	DEF_ENC2NAME(LATIN9, 28605), +	DEF_ENC2NAME(LATIN10, 0), +	DEF_ENC2NAME(WIN1256, 1256), +	DEF_ENC2NAME(WIN1258, 1258), +	DEF_ENC2NAME(WIN866, 866), +	DEF_ENC2NAME(WIN874, 874), +	DEF_ENC2NAME(KOI8R, 20866), +	DEF_ENC2NAME(WIN1251, 1251), +	DEF_ENC2NAME(WIN1252, 1252), +	DEF_ENC2NAME(ISO_8859_5, 28595), +	DEF_ENC2NAME(ISO_8859_6, 28596), +	DEF_ENC2NAME(ISO_8859_7, 28597), +	DEF_ENC2NAME(ISO_8859_8, 28598), +	DEF_ENC2NAME(WIN1250, 1250), +	DEF_ENC2NAME(WIN1253, 1253), +	DEF_ENC2NAME(WIN1254, 1254), +	DEF_ENC2NAME(WIN1255, 1255), +	DEF_ENC2NAME(WIN1257, 1257), +	DEF_ENC2NAME(KOI8U, 21866), +	DEF_ENC2NAME(SJIS, 932), +	DEF_ENC2NAME(BIG5, 950), +	DEF_ENC2NAME(GBK, 936), +	DEF_ENC2NAME(UHC, 949), +	DEF_ENC2NAME(GB18030, 54936), +	DEF_ENC2NAME(JOHAB, 0), +	DEF_ENC2NAME(SHIFT_JIS_2004, 932) +}; + +/* ---------- + * These are encoding names for gettext. + * + * This covers all encodings except MULE_INTERNAL, which is alien to gettext. + * ---------- + */ +const pg_enc2gettext pg_enc2gettext_tbl[] = +{ +	{PG_SQL_ASCII, "US-ASCII"}, +	{PG_UTF8, "UTF-8"}, +	{PG_LATIN1, "LATIN1"}, +	{PG_LATIN2, "LATIN2"}, +	{PG_LATIN3, "LATIN3"}, +	{PG_LATIN4, "LATIN4"}, +	{PG_ISO_8859_5, "ISO-8859-5"}, +	{PG_ISO_8859_6, "ISO_8859-6"}, +	{PG_ISO_8859_7, "ISO-8859-7"}, +	{PG_ISO_8859_8, "ISO-8859-8"}, +	{PG_LATIN5, "LATIN5"}, +	{PG_LATIN6, "LATIN6"}, +	{PG_LATIN7, "LATIN7"}, +	{PG_LATIN8, "LATIN8"}, +	{PG_LATIN9, "LATIN-9"}, +	{PG_LATIN10, "LATIN10"}, +	{PG_KOI8R, "KOI8-R"}, +	{PG_KOI8U, "KOI8-U"}, +	{PG_WIN1250, "CP1250"}, +	{PG_WIN1251, "CP1251"}, +	{PG_WIN1252, "CP1252"}, +	{PG_WIN1253, "CP1253"}, +	{PG_WIN1254, "CP1254"}, +	{PG_WIN1255, "CP1255"}, +	{PG_WIN1256, "CP1256"}, +	{PG_WIN1257, "CP1257"}, +	{PG_WIN1258, "CP1258"}, +	{PG_WIN866, "CP866"}, +	{PG_WIN874, "CP874"}, +	{PG_EUC_CN, "EUC-CN"}, +	{PG_EUC_JP, "EUC-JP"}, +	{PG_EUC_KR, "EUC-KR"}, +	{PG_EUC_TW, "EUC-TW"}, +	{PG_EUC_JIS_2004, "EUC-JP"}, +	{PG_SJIS, "SHIFT-JIS"}, +	{PG_BIG5, "BIG5"}, +	{PG_GBK, "GBK"}, +	{PG_UHC, "UHC"}, +	{PG_GB18030, "GB18030"}, +	{PG_JOHAB, "JOHAB"}, +	{PG_SHIFT_JIS_2004, "SHIFT_JISX0213"}, +	{0, NULL} +}; + + +/* + * Table of encoding names for ICU (currently covers backend encodings only) + * + * Reference: <https://ssl.icu-project.org/icu-bin/convexp> + * + * NULL entries are not supported by ICU, or their mapping is unclear. + */ +static const char *const pg_enc2icu_tbl[] = +{ +	NULL,						/* PG_SQL_ASCII */ +	"EUC-JP",					/* PG_EUC_JP */ +	"EUC-CN",					/* PG_EUC_CN */ +	"EUC-KR",					/* PG_EUC_KR */ +	"EUC-TW",					/* PG_EUC_TW */ +	NULL,						/* PG_EUC_JIS_2004 */ +	"UTF-8",					/* PG_UTF8 */ +	NULL,						/* PG_MULE_INTERNAL */ +	"ISO-8859-1",				/* PG_LATIN1 */ +	"ISO-8859-2",				/* PG_LATIN2 */ +	"ISO-8859-3",				/* PG_LATIN3 */ +	"ISO-8859-4",				/* PG_LATIN4 */ +	"ISO-8859-9",				/* PG_LATIN5 */ +	"ISO-8859-10",				/* PG_LATIN6 */ +	"ISO-8859-13",				/* PG_LATIN7 */ +	"ISO-8859-14",				/* PG_LATIN8 */ +	"ISO-8859-15",				/* PG_LATIN9 */ +	NULL,						/* PG_LATIN10 */ +	"CP1256",					/* PG_WIN1256 */ +	"CP1258",					/* PG_WIN1258 */ +	"CP866",					/* PG_WIN866 */ +	NULL,						/* PG_WIN874 */ +	"KOI8-R",					/* PG_KOI8R */ +	"CP1251",					/* PG_WIN1251 */ +	"CP1252",					/* PG_WIN1252 */ +	"ISO-8859-5",				/* PG_ISO_8859_5 */ +	"ISO-8859-6",				/* PG_ISO_8859_6 */ +	"ISO-8859-7",				/* PG_ISO_8859_7 */ +	"ISO-8859-8",				/* PG_ISO_8859_8 */ +	"CP1250",					/* PG_WIN1250 */ +	"CP1253",					/* PG_WIN1253 */ +	"CP1254",					/* PG_WIN1254 */ +	"CP1255",					/* PG_WIN1255 */ +	"CP1257",					/* PG_WIN1257 */ +	"KOI8-U",					/* PG_KOI8U */ +}; + +StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1, +				 "pg_enc2icu_tbl incomplete"); + + +/* + * Is this encoding supported by ICU? + */ +bool +is_encoding_supported_by_icu(int encoding) +{ +	if (!PG_VALID_BE_ENCODING(encoding)) +		return false; +	return (pg_enc2icu_tbl[encoding] != NULL); +} + +/* + * Returns ICU's name for encoding, or NULL if not supported + */ +const char * +get_encoding_name_for_icu(int encoding) +{ +	if (!PG_VALID_BE_ENCODING(encoding)) +		return NULL; +	return pg_enc2icu_tbl[encoding]; +} + + +/* ---------- + * Encoding checks, for error returns -1 else encoding id + * ---------- + */ +int +pg_valid_client_encoding(const char *name) +{ +	int			enc; + +	if ((enc = pg_char_to_encoding(name)) < 0) +		return -1; + +	if (!PG_VALID_FE_ENCODING(enc)) +		return -1; + +	return enc; +} + +int +pg_valid_server_encoding(const char *name) +{ +	int			enc; + +	if ((enc = pg_char_to_encoding(name)) < 0) +		return -1; + +	if (!PG_VALID_BE_ENCODING(enc)) +		return -1; + +	return enc; +} + +int +pg_valid_server_encoding_id(int encoding) +{ +	return PG_VALID_BE_ENCODING(encoding); +} + +/* + * Remove irrelevant chars from encoding name, store at *newkey + * + * (Caller's responsibility to provide a large enough buffer) + */ +static char * +clean_encoding_name(const char *key, char *newkey) +{ +	const char *p; +	char	   *np; + +	for (p = key, np = newkey; *p != '\0'; p++) +	{ +		if (isalnum((unsigned char) *p)) +		{ +			if (*p >= 'A' && *p <= 'Z') +				*np++ = *p + 'a' - 'A'; +			else +				*np++ = *p; +		} +	} +	*np = '\0'; +	return newkey; +} + +/* + * Search encoding by encoding name + * + * Returns encoding ID, or -1 if not recognized + */ +int +pg_char_to_encoding(const char *name) +{ +	unsigned int nel = lengthof(pg_encname_tbl); +	const pg_encname *base = pg_encname_tbl, +			   *last = base + nel - 1, +			   *position; +	int			result; +	char		buff[NAMEDATALEN], +			   *key; + +	if (name == NULL || *name == '\0') +		return -1; + +	if (strlen(name) >= NAMEDATALEN) +		return -1;				/* it's certainly not in the table */ + +	key = clean_encoding_name(name, buff); + +	while (last >= base) +	{ +		position = base + ((last - base) >> 1); +		result = key[0] - position->name[0]; + +		if (result == 0) +		{ +			result = strcmp(key, position->name); +			if (result == 0) +				return position->encoding; +		} +		if (result < 0) +			last = position - 1; +		else +			base = position + 1; +	} +	return -1; +} + +const char * +pg_encoding_to_char(int encoding) +{ +	if (PG_VALID_ENCODING(encoding)) +	{ +		const pg_enc2name *p = &pg_enc2name_tbl[encoding]; + +		Assert(encoding == p->encoding); +		return p->name; +	} +	return ""; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/exec.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/exec.c new file mode 100644 index 00000000000..d3a967baa4b --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/exec.c @@ -0,0 +1,719 @@ +/*------------------------------------------------------------------------- + * + * exec.c + *		Functions for finding and validating executable files + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/exec.c + * + *------------------------------------------------------------------------- + */ + +/* + * On macOS, "man realpath" avers: + *    Defining _DARWIN_C_SOURCE or _DARWIN_BETTER_REALPATH before including + *    stdlib.h will cause the provided implementation of realpath() to use + *    F_GETPATH from fcntl(2) to discover the path. + * This should be harmless everywhere else. + */ +#define _DARWIN_BETTER_REALPATH + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <signal.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> + +#ifdef EXEC_BACKEND +#if defined(HAVE_SYS_PERSONALITY_H) +#include <sys/personality.h> +#elif defined(HAVE_SYS_PROCCTL_H) +#error #include <sys/procctl.h> +#endif +#endif + +/* Inhibit mingw CRT's auto-globbing of command line arguments */ +#if defined(WIN32) && !defined(_MSC_VER) +extern int	_CRT_glob = 0;		/* 0 turns off globbing; 1 turns it on */ +#endif + +/* + * Hacky solution to allow expressing both frontend and backend error reports + * in one macro call.  First argument of log_error is an errcode() call of + * some sort (ignored if FRONTEND); the rest are errmsg_internal() arguments, + * i.e. message string and any parameters for it. + * + * Caller must provide the gettext wrapper around the message string, if + * appropriate, so that it gets translated in the FRONTEND case; this + * motivates using errmsg_internal() not errmsg().  We handle appending a + * newline, if needed, inside the macro, so that there's only one translatable + * string per call not two. + */ +#ifndef FRONTEND +#define log_error(errcodefn, ...) \ +	ereport(LOG, (errcodefn, errmsg_internal(__VA_ARGS__))) +#else +#define log_error(errcodefn, ...) \ +	(fprintf(stderr, __VA_ARGS__), fputc('\n', stderr)) +#endif + +static int	normalize_exec_path(char *path); +static char *pg_realpath(const char *fname); + +#ifdef WIN32 +static BOOL GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser); +#endif + +/* + * validate_exec -- validate "path" as an executable file + * + * returns 0 if the file is found and no error is encountered. + *		  -1 if the regular file "path" does not exist or cannot be executed. + *		  -2 if the file is otherwise valid but cannot be read. + * in the failure cases, errno is set appropriately + */ +int +validate_exec(const char *path) +{ +	struct stat buf; +	int			is_r; +	int			is_x; + +#ifdef WIN32 +	char		path_exe[MAXPGPATH + sizeof(".exe") - 1]; + +	/* Win32 requires a .exe suffix for stat() */ +	if (strlen(path) < strlen(".exe") || +		pg_strcasecmp(path + strlen(path) - strlen(".exe"), ".exe") != 0) +	{ +		strlcpy(path_exe, path, sizeof(path_exe) - 4); +		strcat(path_exe, ".exe"); +		path = path_exe; +	} +#endif + +	/* +	 * Ensure that the file exists and is a regular file. +	 * +	 * XXX if you have a broken system where stat() looks at the symlink +	 * instead of the underlying file, you lose. +	 */ +	if (stat(path, &buf) < 0) +		return -1; + +	if (!S_ISREG(buf.st_mode)) +	{ +		/* +		 * POSIX offers no errno code that's simply "not a regular file".  If +		 * it's a directory we can use EISDIR.  Otherwise, it's most likely a +		 * device special file, and EPERM (Operation not permitted) isn't too +		 * horribly off base. +		 */ +		errno = S_ISDIR(buf.st_mode) ? EISDIR : EPERM; +		return -1; +	} + +	/* +	 * Ensure that the file is both executable and readable (required for +	 * dynamic loading). +	 */ +#ifndef WIN32 +	is_r = (access(path, R_OK) == 0); +	is_x = (access(path, X_OK) == 0); +	/* access() will set errno if it returns -1 */ +#else +	is_r = buf.st_mode & S_IRUSR; +	is_x = buf.st_mode & S_IXUSR; +	errno = EACCES;				/* appropriate thing if we return nonzero */ +#endif +	return is_x ? (is_r ? 0 : -2) : -1; +} + + +/* + * find_my_exec -- find an absolute path to this program's executable + * + *	argv0 is the name passed on the command line + *	retpath is the output area (must be of size MAXPGPATH) + *	Returns 0 if OK, -1 if error. + * + * The reason we have to work so hard to find an absolute path is that + * on some platforms we can't do dynamic loading unless we know the + * executable's location.  Also, we need an absolute path not a relative + * path because we may later change working directory.  Finally, we want + * a true path not a symlink location, so that we can locate other files + * that are part of our installation relative to the executable. + */ +int +find_my_exec(const char *argv0, char *retpath) +{ +	char	   *path; + +	/* +	 * If argv0 contains a separator, then PATH wasn't used. +	 */ +	strlcpy(retpath, argv0, MAXPGPATH); +	if (first_dir_separator(retpath) != NULL) +	{ +		if (validate_exec(retpath) == 0) +			return normalize_exec_path(retpath); + +		log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE), +				  _("invalid binary \"%s\": %m"), retpath); +		return -1; +	} + +#ifdef WIN32 +	/* Win32 checks the current directory first for names without slashes */ +	if (validate_exec(retpath) == 0) +		return normalize_exec_path(retpath); +#endif + +	/* +	 * Since no explicit path was supplied, the user must have been relying on +	 * PATH.  We'll search the same PATH. +	 */ +	if ((path = getenv("PATH")) && *path) +	{ +		char	   *startp = NULL, +				   *endp = NULL; + +		do +		{ +			if (!startp) +				startp = path; +			else +				startp = endp + 1; + +			endp = first_path_var_separator(startp); +			if (!endp) +				endp = startp + strlen(startp); /* point to end */ + +			strlcpy(retpath, startp, Min(endp - startp + 1, MAXPGPATH)); + +			join_path_components(retpath, retpath, argv0); +			canonicalize_path(retpath); + +			switch (validate_exec(retpath)) +			{ +				case 0:			/* found ok */ +					return normalize_exec_path(retpath); +				case -1:		/* wasn't even a candidate, keep looking */ +					break; +				case -2:		/* found but disqualified */ +					log_error(errcode(ERRCODE_WRONG_OBJECT_TYPE), +							  _("could not read binary \"%s\": %m"), +							  retpath); +					break; +			} +		} while (*endp); +	} + +	log_error(errcode(ERRCODE_UNDEFINED_FILE), +			  _("could not find a \"%s\" to execute"), argv0); +	return -1; +} + + +/* + * normalize_exec_path - resolve symlinks and convert to absolute path + * + * Given a path that refers to an executable, chase through any symlinks + * to find the real file location; then convert that to an absolute path. + * + * On success, replaces the contents of "path" with the absolute path. + * ("path" is assumed to be of size MAXPGPATH.) + * Returns 0 if OK, -1 if error. + */ +static int +normalize_exec_path(char *path) +{ +	/* +	 * We used to do a lot of work ourselves here, but now we just let +	 * realpath(3) do all the heavy lifting. +	 */ +	char	   *abspath = pg_realpath(path); + +	if (abspath == NULL) +	{ +		log_error(errcode_for_file_access(), +				  _("could not resolve path \"%s\" to absolute form: %m"), +				  path); +		return -1; +	} +	strlcpy(path, abspath, MAXPGPATH); +	free(abspath); + +#ifdef WIN32 +	/* On Windows, be sure to convert '\' to '/' */ +	canonicalize_path(path); +#endif + +	return 0; +} + + +/* + * pg_realpath() - realpath(3) with POSIX.1-2008 semantics + * + * This is equivalent to realpath(fname, NULL), in that it returns a + * malloc'd buffer containing the absolute path equivalent to fname. + * On error, returns NULL with errno set. + * + * On Windows, what you get is spelled per platform conventions, + * so you probably want to apply canonicalize_path() to the result. + * + * For now, this is needed only here so mark it static.  If you choose to + * move it into its own file, move the _DARWIN_BETTER_REALPATH #define too! + */ +static char * +pg_realpath(const char *fname) +{ +	char	   *path; + +#ifndef WIN32 +	path = realpath(fname, NULL); +	if (path == NULL && errno == EINVAL) +	{ +		/* +		 * Cope with old-POSIX systems that require a user-provided buffer. +		 * Assume MAXPGPATH is enough room on all such systems. +		 */ +		char	   *buf = malloc(MAXPGPATH); + +		if (buf == NULL) +			return NULL;		/* assume errno is set */ +		path = realpath(fname, buf); +		if (path == NULL)		/* don't leak memory */ +		{ +			int			save_errno = errno; + +			free(buf); +			errno = save_errno; +		} +	} +#else							/* WIN32 */ + +	/* +	 * Microsoft is resolutely non-POSIX, but _fullpath() does the same thing. +	 * The documentation claims it reports errors by setting errno, which is a +	 * bit surprising for Microsoft, but we'll believe that until it's proven +	 * wrong.  Clear errno first, though, so we can at least tell if a failure +	 * occurs and doesn't set it. +	 */ +	errno = 0; +	path = _fullpath(NULL, fname, 0); +#endif + +	return path; +} + + +/* + * Find another program in our binary's directory, + * then make sure it is the proper version. + */ +int +find_other_exec(const char *argv0, const char *target, +				const char *versionstr, char *retpath) +{ +	char		cmd[MAXPGPATH]; +	char		line[MAXPGPATH]; + +	if (find_my_exec(argv0, retpath) < 0) +		return -1; + +	/* Trim off program name and keep just directory */ +	*last_dir_separator(retpath) = '\0'; +	canonicalize_path(retpath); + +	/* Now append the other program's name */ +	snprintf(retpath + strlen(retpath), MAXPGPATH - strlen(retpath), +			 "/%s%s", target, EXE); + +	if (validate_exec(retpath) != 0) +		return -1; + +	snprintf(cmd, sizeof(cmd), "\"%s\" -V", retpath); + +	if (!pipe_read_line(cmd, line, sizeof(line))) +		return -1; + +	if (strcmp(line, versionstr) != 0) +		return -2; + +	return 0; +} + + +/* + * Execute a command in a pipe and read the first line from it. + */ +char * +pipe_read_line(char *cmd, char *line, int maxsize) +{ +	FILE	   *pgver; + +	fflush(NULL); + +	errno = 0; +	if ((pgver = popen(cmd, "r")) == NULL) +	{ +		perror("popen failure"); +		return NULL; +	} + +	errno = 0; +	if (fgets(line, maxsize, pgver) == NULL) +	{ +		if (feof(pgver)) +			fprintf(stderr, "no data was returned by command \"%s\"\n", cmd); +		else +			perror("fgets failure"); +		pclose(pgver);			/* no error checking */ +		return NULL; +	} + +	if (pclose_check(pgver)) +		return NULL; + +	return line; +} + + +/* + * pclose() plus useful error reporting + */ +int +pclose_check(FILE *stream) +{ +	int			exitstatus; +	char	   *reason; + +	exitstatus = pclose(stream); + +	if (exitstatus == 0) +		return 0;				/* all is well */ + +	if (exitstatus == -1) +	{ +		/* pclose() itself failed, and hopefully set errno */ +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  _("%s() failed: %m"), "pclose"); +	} +	else +	{ +		reason = wait_result_to_str(exitstatus); +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "%s", reason); +		pfree(reason); +	} +	return exitstatus; +} + +/* + *	set_pglocale_pgservice + * + *	Set application-specific locale and service directory + * + *	This function takes the value of argv[0] rather than a full path. + * + * (You may be wondering why this is in exec.c.  It requires this module's + * services and doesn't introduce any new dependencies, so this seems as + * good as anyplace.) + */ +void +set_pglocale_pgservice(const char *argv0, const char *app) +{ +	char		path[MAXPGPATH]; +	char		my_exec_path[MAXPGPATH]; + +	/* don't set LC_ALL in the backend */ +	if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0) +	{ +		setlocale(LC_ALL, ""); + +		/* +		 * One could make a case for reproducing here PostmasterMain()'s test +		 * for whether the process is multithreaded.  Unlike the postmaster, +		 * no frontend program calls sigprocmask() or otherwise provides for +		 * mutual exclusion between signal handlers.  While frontends using +		 * fork(), if multithreaded, are formally exposed to undefined +		 * behavior, we have not witnessed a concrete bug.  Therefore, +		 * complaining about multithreading here may be mere pedantry. +		 */ +	} + +	if (find_my_exec(argv0, my_exec_path) < 0) +		return; + +#ifdef ENABLE_NLS +	get_locale_path(my_exec_path, path); +	bindtextdomain(app, path); +	textdomain(app); +	/* set for libpq to use, but don't override existing setting */ +	setenv("PGLOCALEDIR", path, 0); +#endif + +	if (getenv("PGSYSCONFDIR") == NULL) +	{ +		get_etc_path(my_exec_path, path); +		/* set for libpq to use */ +		setenv("PGSYSCONFDIR", path, 0); +	} +} + +#ifdef EXEC_BACKEND +/* + * For the benefit of PostgreSQL developers testing EXEC_BACKEND on Unix + * systems (code paths normally exercised only on Windows), provide a way to + * disable address space layout randomization, if we know how on this platform. + * Otherwise, backends may fail to attach to shared memory at the fixed address + * chosen by the postmaster.  (See also the macOS-specific hack in + * sysv_shmem.c.) + */ +int +pg_disable_aslr(void) +{ +#if defined(HAVE_SYS_PERSONALITY_H) +	return personality(ADDR_NO_RANDOMIZE); +#elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_ASLR_FORCE_DISABLE) +	int			data = PROC_ASLR_FORCE_DISABLE; + +	return procctl(P_PID, 0, PROC_ASLR_CTL, &data); +#else +	errno = ENOSYS; +	return -1; +#endif +} +#endif + +#ifdef WIN32 + +/* + * AddUserToTokenDacl(HANDLE hToken) + * + * This function adds the current user account to the restricted + * token used when we create a restricted process. + * + * This is required because of some security changes in Windows + * that appeared in patches to XP/2K3 and in Vista/2008. + * + * On these machines, the Administrator account is not included in + * the default DACL - you just get Administrators + System. For + * regular users you get User + System. Because we strip Administrators + * when we create the restricted token, we are left with only System + * in the DACL which leads to access denied errors for later CreatePipe() + * and CreateProcess() calls when running as Administrator. + * + * This function fixes this problem by modifying the DACL of the + * token the process will use, and explicitly re-adding the current + * user account.  This is still secure because the Administrator account + * inherits its privileges from the Administrators group - it doesn't + * have any of its own. + */ +BOOL +AddUserToTokenDacl(HANDLE hToken) +{ +	int			i; +	ACL_SIZE_INFORMATION asi; +	ACCESS_ALLOWED_ACE *pace; +	DWORD		dwNewAclSize; +	DWORD		dwSize = 0; +	DWORD		dwTokenInfoLength = 0; +	PACL		pacl = NULL; +	PTOKEN_USER pTokenUser = NULL; +	TOKEN_DEFAULT_DACL tddNew; +	TOKEN_DEFAULT_DACL *ptdd = NULL; +	TOKEN_INFORMATION_CLASS tic = TokenDefaultDacl; +	BOOL		ret = FALSE; + +	/* Figure out the buffer size for the DACL info */ +	if (!GetTokenInformation(hToken, tic, (LPVOID) NULL, dwTokenInfoLength, &dwSize)) +	{ +		if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) +		{ +			ptdd = (TOKEN_DEFAULT_DACL *) LocalAlloc(LPTR, dwSize); +			if (ptdd == NULL) +			{ +				log_error(errcode(ERRCODE_OUT_OF_MEMORY), +						  _("out of memory")); +				goto cleanup; +			} + +			if (!GetTokenInformation(hToken, tic, (LPVOID) ptdd, dwSize, &dwSize)) +			{ +				log_error(errcode(ERRCODE_SYSTEM_ERROR), +						  "could not get token information: error code %lu", +						  GetLastError()); +				goto cleanup; +			} +		} +		else +		{ +			log_error(errcode(ERRCODE_SYSTEM_ERROR), +					  "could not get token information buffer size: error code %lu", +					  GetLastError()); +			goto cleanup; +		} +	} + +	/* Get the ACL info */ +	if (!GetAclInformation(ptdd->DefaultDacl, (LPVOID) &asi, +						   (DWORD) sizeof(ACL_SIZE_INFORMATION), +						   AclSizeInformation)) +	{ +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "could not get ACL information: error code %lu", +				  GetLastError()); +		goto cleanup; +	} + +	/* Get the current user SID */ +	if (!GetTokenUser(hToken, &pTokenUser)) +		goto cleanup;			/* callee printed a message */ + +	/* Figure out the size of the new ACL */ +	dwNewAclSize = asi.AclBytesInUse + sizeof(ACCESS_ALLOWED_ACE) + +		GetLengthSid(pTokenUser->User.Sid) - sizeof(DWORD); + +	/* Allocate the ACL buffer & initialize it */ +	pacl = (PACL) LocalAlloc(LPTR, dwNewAclSize); +	if (pacl == NULL) +	{ +		log_error(errcode(ERRCODE_OUT_OF_MEMORY), +				  _("out of memory")); +		goto cleanup; +	} + +	if (!InitializeAcl(pacl, dwNewAclSize, ACL_REVISION)) +	{ +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "could not initialize ACL: error code %lu", GetLastError()); +		goto cleanup; +	} + +	/* Loop through the existing ACEs, and build the new ACL */ +	for (i = 0; i < (int) asi.AceCount; i++) +	{ +		if (!GetAce(ptdd->DefaultDacl, i, (LPVOID *) &pace)) +		{ +			log_error(errcode(ERRCODE_SYSTEM_ERROR), +					  "could not get ACE: error code %lu", GetLastError()); +			goto cleanup; +		} + +		if (!AddAce(pacl, ACL_REVISION, MAXDWORD, pace, ((PACE_HEADER) pace)->AceSize)) +		{ +			log_error(errcode(ERRCODE_SYSTEM_ERROR), +					  "could not add ACE: error code %lu", GetLastError()); +			goto cleanup; +		} +	} + +	/* Add the new ACE for the current user */ +	if (!AddAccessAllowedAceEx(pacl, ACL_REVISION, OBJECT_INHERIT_ACE, GENERIC_ALL, pTokenUser->User.Sid)) +	{ +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "could not add access allowed ACE: error code %lu", +				  GetLastError()); +		goto cleanup; +	} + +	/* Set the new DACL in the token */ +	tddNew.DefaultDacl = pacl; + +	if (!SetTokenInformation(hToken, tic, (LPVOID) &tddNew, dwNewAclSize)) +	{ +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "could not set token information: error code %lu", +				  GetLastError()); +		goto cleanup; +	} + +	ret = TRUE; + +cleanup: +	if (pTokenUser) +		LocalFree((HLOCAL) pTokenUser); + +	if (pacl) +		LocalFree((HLOCAL) pacl); + +	if (ptdd) +		LocalFree((HLOCAL) ptdd); + +	return ret; +} + +/* + * GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser) + * + * Get the users token information from a process token. + * + * The caller of this function is responsible for calling LocalFree() on the + * returned TOKEN_USER memory. + */ +static BOOL +GetTokenUser(HANDLE hToken, PTOKEN_USER *ppTokenUser) +{ +	DWORD		dwLength; + +	*ppTokenUser = NULL; + +	if (!GetTokenInformation(hToken, +							 TokenUser, +							 NULL, +							 0, +							 &dwLength)) +	{ +		if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) +		{ +			*ppTokenUser = (PTOKEN_USER) LocalAlloc(LPTR, dwLength); + +			if (*ppTokenUser == NULL) +			{ +				log_error(errcode(ERRCODE_OUT_OF_MEMORY), +						  _("out of memory")); +				return FALSE; +			} +		} +		else +		{ +			log_error(errcode(ERRCODE_SYSTEM_ERROR), +					  "could not get token information buffer size: error code %lu", +					  GetLastError()); +			return FALSE; +		} +	} + +	if (!GetTokenInformation(hToken, +							 TokenUser, +							 *ppTokenUser, +							 dwLength, +							 &dwLength)) +	{ +		LocalFree(*ppTokenUser); +		*ppTokenUser = NULL; + +		log_error(errcode(ERRCODE_SYSTEM_ERROR), +				  "could not get token information: error code %lu", +				  GetLastError()); +		return FALSE; +	} + +	/* Memory in *ppTokenUser is LocalFree():d by the caller */ +	return TRUE; +} + +#endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/f2s.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/f2s.c new file mode 100644 index 00000000000..ba08dcb6aa5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/f2s.c @@ -0,0 +1,803 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for single precision. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/f2s.c + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + *     (See accompanying file LICENSE-Apache or copy at + *      http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + *     (See accompanying file LICENSE-Boost or copy at + *      https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/shortest_dec.h" +#include "digit_table.h" +#include "ryu_common.h" + +#define FLOAT_MANTISSA_BITS 23 +#define FLOAT_EXPONENT_BITS 8 +#define FLOAT_BIAS 127 + +/* + * This table is generated (by the upstream) by PrintFloatLookupTable, + * and modified (by us) to add UINT64CONST. + */ +#define FLOAT_POW5_INV_BITCOUNT 59 +static const uint64 FLOAT_POW5_INV_SPLIT[31] = { +	UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826), +	UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670), +	UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688), +	UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727), +	UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350), +	UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234), +	UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971), +	UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730) +}; +#define FLOAT_POW5_BITCOUNT 61 +static const uint64 FLOAT_POW5_SPLIT[47] = { +	UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000), +	UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000), +	UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000), +	UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000), +	UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000), +	UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000), +	UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031), +	UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594), +	UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675), +	UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678), +	UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187), +	UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221) +}; + +static inline uint32 +pow5Factor(uint32 value) +{ +	uint32		count = 0; + +	for (;;) +	{ +		Assert(value != 0); +		const uint32 q = value / 5; +		const uint32 r = value % 5; + +		if (r != 0) +			break; + +		value = q; +		++count; +	} +	return count; +} + +/*  Returns true if value is divisible by 5^p. */ +static inline bool +multipleOfPowerOf5(const uint32 value, const uint32 p) +{ +	return pow5Factor(value) >= p; +} + +/*  Returns true if value is divisible by 2^p. */ +static inline bool +multipleOfPowerOf2(const uint32 value, const uint32 p) +{ +	/* return __builtin_ctz(value) >= p; */ +	return (value & ((1u << p) - 1)) == 0; +} + +/* + * It seems to be slightly faster to avoid uint128_t here, although the + * generated code for uint128_t looks slightly nicer. + */ +static inline uint32 +mulShift(const uint32 m, const uint64 factor, const int32 shift) +{ +	/* +	 * The casts here help MSVC to avoid calls to the __allmul library +	 * function. +	 */ +	const uint32 factorLo = (uint32) (factor); +	const uint32 factorHi = (uint32) (factor >> 32); +	const uint64 bits0 = (uint64) m * factorLo; +	const uint64 bits1 = (uint64) m * factorHi; + +	Assert(shift > 32); + +#ifdef RYU_32_BIT_PLATFORM + +	/* +	 * On 32-bit platforms we can avoid a 64-bit shift-right since we only +	 * need the upper 32 bits of the result and the shift value is > 32. +	 */ +	const uint32 bits0Hi = (uint32) (bits0 >> 32); +	uint32		bits1Lo = (uint32) (bits1); +	uint32		bits1Hi = (uint32) (bits1 >> 32); + +	bits1Lo += bits0Hi; +	bits1Hi += (bits1Lo < bits0Hi); + +	const int32 s = shift - 32; + +	return (bits1Hi << (32 - s)) | (bits1Lo >> s); + +#else							/* RYU_32_BIT_PLATFORM */ + +	const uint64 sum = (bits0 >> 32) + bits1; +	const uint64 shiftedSum = sum >> (shift - 32); + +	Assert(shiftedSum <= PG_UINT32_MAX); +	return (uint32) shiftedSum; + +#endif							/* RYU_32_BIT_PLATFORM */ +} + +static inline uint32 +mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j) +{ +	return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j); +} + +static inline uint32 +mulPow5divPow2(const uint32 m, const uint32 i, const int32 j) +{ +	return mulShift(m, FLOAT_POW5_SPLIT[i], j); +} + +static inline uint32 +decimalLength(const uint32 v) +{ +	/* Function precondition: v is not a 10-digit number. */ +	/* (9 digits are sufficient for round-tripping.) */ +	Assert(v < 1000000000); +	if (v >= 100000000) +	{ +		return 9; +	} +	if (v >= 10000000) +	{ +		return 8; +	} +	if (v >= 1000000) +	{ +		return 7; +	} +	if (v >= 100000) +	{ +		return 6; +	} +	if (v >= 10000) +	{ +		return 5; +	} +	if (v >= 1000) +	{ +		return 4; +	} +	if (v >= 100) +	{ +		return 3; +	} +	if (v >= 10) +	{ +		return 2; +	} +	return 1; +} + +/*  A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_32 +{ +	uint32		mantissa; +	int32		exponent; +} floating_decimal_32; + +static inline floating_decimal_32 +f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent) +{ +	int32		e2; +	uint32		m2; + +	if (ieeeExponent == 0) +	{ +		/* We subtract 2 so that the bounds computation has 2 additional bits. */ +		e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; +		m2 = ieeeMantissa; +	} +	else +	{ +		e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; +		m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; +	} + +#if STRICTLY_SHORTEST +	const bool	even = (m2 & 1) == 0; +	const bool	acceptBounds = even; +#else +	const bool	acceptBounds = false; +#endif + +	/* Step 2: Determine the interval of legal decimal representations. */ +	const uint32 mv = 4 * m2; +	const uint32 mp = 4 * m2 + 2; + +	/* Implicit bool -> int conversion. True is 1, false is 0. */ +	const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; +	const uint32 mm = 4 * m2 - 1 - mmShift; + +	/* Step 3: Convert to a decimal power base using 64-bit arithmetic. */ +	uint32		vr, +				vp, +				vm; +	int32		e10; +	bool		vmIsTrailingZeros = false; +	bool		vrIsTrailingZeros = false; +	uint8		lastRemovedDigit = 0; + +	if (e2 >= 0) +	{ +		const uint32 q = log10Pow2(e2); + +		e10 = q; + +		const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1; +		const int32 i = -e2 + q + k; + +		vr = mulPow5InvDivPow2(mv, q, i); +		vp = mulPow5InvDivPow2(mp, q, i); +		vm = mulPow5InvDivPow2(mm, q, i); + +		if (q != 0 && (vp - 1) / 10 <= vm / 10) +		{ +			/* +			 * We need to know one removed digit even if we are not going to +			 * loop below. We could use q = X - 1 above, except that would +			 * require 33 bits for the result, and we've found that 32-bit +			 * arithmetic is faster even on 64-bit machines. +			 */ +			const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; + +			lastRemovedDigit = (uint8) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10); +		} +		if (q <= 9) +		{ +			/* +			 * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 +			 * seems to be safe as well. +			 * +			 * Only one of mp, mv, and mm can be a multiple of 5, if any. +			 */ +			if (mv % 5 == 0) +			{ +				vrIsTrailingZeros = multipleOfPowerOf5(mv, q); +			} +			else if (acceptBounds) +			{ +				vmIsTrailingZeros = multipleOfPowerOf5(mm, q); +			} +			else +			{ +				vp -= multipleOfPowerOf5(mp, q); +			} +		} +	} +	else +	{ +		const uint32 q = log10Pow5(-e2); + +		e10 = q + e2; + +		const int32 i = -e2 - q; +		const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT; +		int32		j = q - k; + +		vr = mulPow5divPow2(mv, i, j); +		vp = mulPow5divPow2(mp, i, j); +		vm = mulPow5divPow2(mm, i, j); + +		if (q != 0 && (vp - 1) / 10 <= vm / 10) +		{ +			j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); +			lastRemovedDigit = (uint8) (mulPow5divPow2(mv, i + 1, j) % 10); +		} +		if (q <= 1) +		{ +			/* +			 * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q +			 * trailing 0 bits. +			 */ +			/* mv = 4 * m2, so it always has at least two trailing 0 bits. */ +			vrIsTrailingZeros = true; +			if (acceptBounds) +			{ +				/* +				 * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff +				 * mmShift == 1. +				 */ +				vmIsTrailingZeros = mmShift == 1; +			} +			else +			{ +				/* +				 * mp = mv + 2, so it always has at least one trailing 0 bit. +				 */ +				--vp; +			} +		} +		else if (q < 31) +		{ +			/* TODO(ulfjack):Use a tighter bound here. */ +			vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); +		} +	} + +	/* +	 * Step 4: Find the shortest decimal representation in the interval of +	 * legal representations. +	 */ +	uint32		removed = 0; +	uint32		output; + +	if (vmIsTrailingZeros || vrIsTrailingZeros) +	{ +		/* General case, which happens rarely (~4.0%). */ +		while (vp / 10 > vm / 10) +		{ +			vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; +			vrIsTrailingZeros &= lastRemovedDigit == 0; +			lastRemovedDigit = (uint8) (vr % 10); +			vr /= 10; +			vp /= 10; +			vm /= 10; +			++removed; +		} +		if (vmIsTrailingZeros) +		{ +			while (vm % 10 == 0) +			{ +				vrIsTrailingZeros &= lastRemovedDigit == 0; +				lastRemovedDigit = (uint8) (vr % 10); +				vr /= 10; +				vp /= 10; +				vm /= 10; +				++removed; +			} +		} + +		if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) +		{ +			/* Round even if the exact number is .....50..0. */ +			lastRemovedDigit = 4; +		} + +		/* +		 * We need to take vr + 1 if vr is outside bounds or we need to round +		 * up. +		 */ +		output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); +	} +	else +	{ +		/* +		 * Specialized for the common case (~96.0%). Percentages below are +		 * relative to this. +		 * +		 * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2: +		 * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% +		 */ +		while (vp / 10 > vm / 10) +		{ +			lastRemovedDigit = (uint8) (vr % 10); +			vr /= 10; +			vp /= 10; +			vm /= 10; +			++removed; +		} + +		/* +		 * We need to take vr + 1 if vr is outside bounds or we need to round +		 * up. +		 */ +		output = vr + (vr == vm || lastRemovedDigit >= 5); +	} + +	const int32 exp = e10 + removed; + +	floating_decimal_32 fd; + +	fd.exponent = exp; +	fd.mantissa = output; +	return fd; +} + +static inline int +to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result) +{ +	/* Step 5: Print the decimal representation. */ +	int			index = 0; + +	uint32		output = v.mantissa; +	int32		exp = v.exponent; + +	/*---- +	 * On entry, mantissa * 10^exp is the result to be output. +	 * Caller has already done the - sign if needed. +	 * +	 * We want to insert the point somewhere depending on the output length +	 * and exponent, which might mean adding zeros: +	 * +	 *            exp  | format +	 *            1+   |  ddddddddd000000 +	 *            0    |  ddddddddd +	 *  -1 .. -len+1   |  dddddddd.d to d.ddddddddd +	 *  -len ...       |  0.ddddddddd to 0.000dddddd +	 */ +	uint32		i = 0; +	int32		nexp = exp + olength; + +	if (nexp <= 0) +	{ +		/* -nexp is number of 0s to add after '.' */ +		Assert(nexp >= -3); +		/* 0.000ddddd */ +		index = 2 - nexp; +		/* copy 8 bytes rather than 5 to let compiler optimize */ +		memcpy(result, "0.000000", 8); +	} +	else if (exp < 0) +	{ +		/* +		 * dddd.dddd; leave space at the start and move the '.' in after +		 */ +		index = 1; +	} +	else +	{ +		/* +		 * We can save some code later by pre-filling with zeros. We know that +		 * there can be no more than 6 output digits in this form, otherwise +		 * we would not choose fixed-point output. memset 8 rather than 6 +		 * bytes to let the compiler optimize it. +		 */ +		Assert(exp < 6 && exp + olength <= 6); +		memset(result, '0', 8); +	} + +	while (output >= 10000) +	{ +		const uint32 c = output - 10000 * (output / 10000); +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; + +		output /= 10000; + +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2); +		i += 4; +	} +	if (output >= 100) +	{ +		const uint32 c = (output % 100) << 1; + +		output /= 100; +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); +		i += 2; +	} +	if (output >= 10) +	{ +		const uint32 c = output << 1; + +		memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2); +	} +	else +	{ +		result[index] = (char) ('0' + output); +	} + +	if (index == 1) +	{ +		/* +		 * nexp is 1..6 here, representing the number of digits before the +		 * point. A value of 7+ is not possible because we switch to +		 * scientific notation when the display exponent reaches 6. +		 */ +		Assert(nexp < 7); +		/* gcc only seems to want to optimize memmove for small 2^n */ +		if (nexp & 4) +		{ +			memmove(result + index - 1, result + index, 4); +			index += 4; +		} +		if (nexp & 2) +		{ +			memmove(result + index - 1, result + index, 2); +			index += 2; +		} +		if (nexp & 1) +		{ +			result[index - 1] = result[index]; +		} +		result[nexp] = '.'; +		index = olength + 1; +	} +	else if (exp >= 0) +	{ +		/* we supplied the trailing zeros earlier, now just set the length. */ +		index = olength + exp; +	} +	else +	{ +		index = olength + (2 - nexp); +	} + +	return index; +} + +static inline int +to_chars(const floating_decimal_32 v, const bool sign, char *const result) +{ +	/* Step 5: Print the decimal representation. */ +	int			index = 0; + +	uint32		output = v.mantissa; +	uint32		olength = decimalLength(output); +	int32		exp = v.exponent + olength - 1; + +	if (sign) +		result[index++] = '-'; + +	/* +	 * The thresholds for fixed-point output are chosen to match printf +	 * defaults. Beware that both the code of to_chars_f and the value of +	 * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. +	 */ +	if (exp >= -4 && exp < 6) +		return to_chars_f(v, olength, result + index) + sign; + +	/* +	 * If v.exponent is exactly 0, we might have reached here via the small +	 * integer fast path, in which case v.mantissa might contain trailing +	 * (decimal) zeros. For scientific notation we need to move these zeros +	 * into the exponent. (For fixed point this doesn't matter, which is why +	 * we do this here rather than above.) +	 * +	 * Since we already calculated the display exponent (exp) above based on +	 * the old decimal length, that value does not change here. Instead, we +	 * just reduce the display length for each digit removed. +	 * +	 * If we didn't get here via the fast path, the raw exponent will not +	 * usually be 0, and there will be no trailing zeros, so we pay no more +	 * than one div10/multiply extra cost. We claw back half of that by +	 * checking for divisibility by 2 before dividing by 10. +	 */ +	if (v.exponent == 0) +	{ +		while ((output & 1) == 0) +		{ +			const uint32 q = output / 10; +			const uint32 r = output - 10 * q; + +			if (r != 0) +				break; +			output = q; +			--olength; +		} +	} + +	/*---- +	 * Print the decimal digits. +	 * The following code is equivalent to: +	 * +	 * for (uint32 i = 0; i < olength - 1; ++i) { +	 *   const uint32 c = output % 10; output /= 10; +	 *   result[index + olength - i] = (char) ('0' + c); +	 * } +	 * result[index] = '0' + output % 10; +	 */ +	uint32		i = 0; + +	while (output >= 10000) +	{ +		const uint32 c = output - 10000 * (output / 10000); +		const uint32 c0 = (c % 100) << 1; +		const uint32 c1 = (c / 100) << 1; + +		output /= 10000; + +		memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); +		memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); +		i += 4; +	} +	if (output >= 100) +	{ +		const uint32 c = (output % 100) << 1; + +		output /= 100; +		memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2); +		i += 2; +	} +	if (output >= 10) +	{ +		const uint32 c = output << 1; + +		/* +		 * We can't use memcpy here: the decimal dot goes between these two +		 * digits. +		 */ +		result[index + olength - i] = DIGIT_TABLE[c + 1]; +		result[index] = DIGIT_TABLE[c]; +	} +	else +	{ +		result[index] = (char) ('0' + output); +	} + +	/* Print decimal point if needed. */ +	if (olength > 1) +	{ +		result[index + 1] = '.'; +		index += olength + 1; +	} +	else +	{ +		++index; +	} + +	/* Print the exponent. */ +	result[index++] = 'e'; +	if (exp < 0) +	{ +		result[index++] = '-'; +		exp = -exp; +	} +	else +		result[index++] = '+'; + +	memcpy(result + index, DIGIT_TABLE + 2 * exp, 2); +	index += 2; + +	return index; +} + +static inline bool +f2d_small_int(const uint32 ieeeMantissa, +			  const uint32 ieeeExponent, +			  floating_decimal_32 *v) +{ +	const int32 e2 = (int32) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS; + +	/* +	 * Avoid using multiple "return false;" here since it tends to provoke the +	 * compiler into inlining multiple copies of f2d, which is undesirable. +	 */ + +	if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0) +	{ +		/*---- +		 * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23: +		 *   1 <= f = m2 / 2^-e2 < 2^24. +		 * +		 * Test if the lower -e2 bits of the significand are 0, i.e. whether +		 * the fraction is 0. We can use ieeeMantissa here, since the implied +		 * 1 bit can never be tested by this; the implied 1 can only be part +		 * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already +		 * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24) +		 */ +		const uint32 mask = (1U << -e2) - 1; +		const uint32 fraction = ieeeMantissa & mask; + +		if (fraction == 0) +		{ +			/*---- +			 * f is an integer in the range [1, 2^24). +			 * Note: mantissa might contain trailing (decimal) 0's. +			 * Note: since 2^24 < 10^9, there is no need to adjust +			 * decimalLength(). +			 */ +			const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa; + +			v->mantissa = m2 >> -e2; +			v->exponent = 0; +			return true; +		} +	} + +	return false; +} + +/* + * Store the shortest decimal representation of the given float as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int +float_to_shortest_decimal_bufn(float f, char *result) +{ +	/* +	 * Step 1: Decode the floating-point number, and unify normalized and +	 * subnormal cases. +	 */ +	const uint32 bits = float_to_bits(f); + +	/* Decode bits into sign, mantissa, and exponent. */ +	const bool	ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; +	const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); +	const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); + +	/* Case distinction; exit early for the easy cases. */ +	if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) +	{ +		return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0)); +	} + +	floating_decimal_32 v; +	const bool	isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v); + +	if (!isSmallInt) +	{ +		v = f2d(ieeeMantissa, ieeeExponent); +	} + +	return to_chars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given float as a + * null-terminated string in the caller's supplied buffer (which must be at + * least FLOAT_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int +float_to_shortest_decimal_buf(float f, char *result) +{ +	const int	index = float_to_shortest_decimal_bufn(f, result); + +	/* Terminate the string. */ +	Assert(index < FLOAT_SHORTEST_DECIMAL_LEN); +	result[index] = '\0'; +	return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char * +float_to_shortest_decimal(float f) +{ +	char	   *const result = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN); + +	float_to_shortest_decimal_buf(f, result); +	return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_perm.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_perm.c new file mode 100644 index 00000000000..5675c5a1144 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_perm.c @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------- + * + * File and directory permission routines + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_perm.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/file_perm.h" + +/* Modes for creating directories and files in the data directory */ +__thread int			pg_dir_create_mode = PG_DIR_MODE_OWNER; +__thread int			pg_file_create_mode = PG_FILE_MODE_OWNER; + +/* + * Mode mask to pass to umask().  This is more of a preventative measure since + * all file/directory creates should be performed using the create modes above. + */ +__thread int			pg_mode_mask = PG_MODE_MASK_OWNER; + +/* + * Set create modes and mask to use when writing to PGDATA based on the data + * directory mode passed.  If group read/execute are present in the mode, then + * create modes and mask will be relaxed to allow group read/execute on all + * newly created files and directories. + */ +void +SetDataDirectoryCreatePerm(int dataDirMode) +{ +	/* If the data directory mode has group access */ +	if ((PG_DIR_MODE_GROUP & dataDirMode) == PG_DIR_MODE_GROUP) +	{ +		pg_dir_create_mode = PG_DIR_MODE_GROUP; +		pg_file_create_mode = PG_FILE_MODE_GROUP; +		pg_mode_mask = PG_MODE_MASK_GROUP; +	} +	/* Else use default permissions */ +	else +	{ +		pg_dir_create_mode = PG_DIR_MODE_OWNER; +		pg_file_create_mode = PG_FILE_MODE_OWNER; +		pg_mode_mask = PG_MODE_MASK_OWNER; +	} +} + +#ifdef FRONTEND + +/* + * Get the create modes and mask to use when writing to PGDATA by examining the + * mode of the PGDATA directory and calling SetDataDirectoryCreatePerm(). + * + * Errors are not handled here and should be reported by the application when + * false is returned. + * + * Suppress when on Windows, because there may not be proper support for Unix-y + * file permissions. + */ +bool +GetDataDirectoryCreatePerm(const char *dataDir) +{ +#if !defined(WIN32) && !defined(__CYGWIN__) +	struct stat statBuf; + +	/* +	 * If an error occurs getting the mode then return false.  The caller is +	 * responsible for generating an error, if appropriate, indicating that we +	 * were unable to access the data directory. +	 */ +	if (stat(dataDir, &statBuf) == -1) +		return false; + +	/* Set permissions */ +	SetDataDirectoryCreatePerm(statBuf.st_mode); +	return true; +#else							/* !defined(WIN32) && !defined(__CYGWIN__) */ +	/* +	 * On Windows, we don't have anything to do here since they don't have +	 * Unix-y permissions. +	 */ +	return true; +#endif +} + + +#endif							/* FRONTEND */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_utils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_utils.c new file mode 100644 index 00000000000..ecde2fee2cf --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/file_utils.c @@ -0,0 +1,582 @@ +/*------------------------------------------------------------------------- + * + * File-processing utility routines. + * + * Assorted utility functions to work on files. + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_utils.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "common/file_utils.h" +#ifdef FRONTEND +#error #include "common/logging.h" +#endif +#include "port/pg_iovec.h" + +#ifdef FRONTEND + +/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ +#if defined(HAVE_SYNC_FILE_RANGE) +#define PG_FLUSH_DATA_WORKS 1 +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) +#define PG_FLUSH_DATA_WORKS 1 +#endif + +/* + * pg_xlog has been renamed to pg_wal in version 10. + */ +#define MINIMUM_VERSION_FOR_PG_WAL	100000 + +#ifdef PG_FLUSH_DATA_WORKS +static int	pre_sync_fname(const char *fname, bool isdir); +#endif +static void walkdir(const char *path, +					int (*action) (const char *fname, bool isdir), +					bool process_symlinks); + +/* + * Issue fsync recursively on PGDATA and all its contents. + * + * We fsync regular files and directories wherever they are, but we follow + * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. + * Other symlinks are presumed to point at files we're not responsible for + * fsyncing, and might not have privileges to write at all. + * + * serverVersion indicates the version of the server to be fsync'd. + */ +void +fsync_pgdata(const char *pg_data, +			 int serverVersion) +{ +	bool		xlog_is_symlink; +	char		pg_wal[MAXPGPATH]; +	char		pg_tblspc[MAXPGPATH]; + +	/* handle renaming of pg_xlog to pg_wal in post-10 clusters */ +	snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data, +			 serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal"); +	snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); + +	/* +	 * If pg_wal is a symlink, we'll need to recurse into it separately, +	 * because the first walkdir below will ignore it. +	 */ +	xlog_is_symlink = false; + +	{ +		struct stat st; + +		if (lstat(pg_wal, &st) < 0) +			pg_log_error("could not stat file \"%s\": %m", pg_wal); +		else if (S_ISLNK(st.st_mode)) +			xlog_is_symlink = true; +	} + +	/* +	 * If possible, hint to the kernel that we're soon going to fsync the data +	 * directory and its contents. +	 */ +#ifdef PG_FLUSH_DATA_WORKS +	walkdir(pg_data, pre_sync_fname, false); +	if (xlog_is_symlink) +		walkdir(pg_wal, pre_sync_fname, false); +	walkdir(pg_tblspc, pre_sync_fname, true); +#endif + +	/* +	 * Now we do the fsync()s in the same order. +	 * +	 * The main call ignores symlinks, so in addition to specially processing +	 * pg_wal if it's a symlink, pg_tblspc has to be visited separately with +	 * process_symlinks = true.  Note that if there are any plain directories +	 * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case +	 * so we don't worry about optimizing it. +	 */ +	walkdir(pg_data, fsync_fname, false); +	if (xlog_is_symlink) +		walkdir(pg_wal, fsync_fname, false); +	walkdir(pg_tblspc, fsync_fname, true); +} + +/* + * Issue fsync recursively on the given directory and all its contents. + * + * This is a convenient wrapper on top of walkdir(). + */ +void +fsync_dir_recurse(const char *dir) +{ +	/* +	 * If possible, hint to the kernel that we're soon going to fsync the data +	 * directory and its contents. +	 */ +#ifdef PG_FLUSH_DATA_WORKS +	walkdir(dir, pre_sync_fname, false); +#endif + +	walkdir(dir, fsync_fname, false); +} + +/* + * walkdir: recursively walk a directory, applying the action to each + * regular file and directory (including the named directory itself). + * + * If process_symlinks is true, the action and recursion are also applied + * to regular files and directories that are pointed to by symlinks in the + * given directory; otherwise symlinks are ignored.  Symlinks are always + * ignored in subdirectories, ie we intentionally don't pass down the + * process_symlinks flag to recursive calls. + * + * Errors are reported but not considered fatal. + * + * See also walkdir in fd.c, which is a backend version of this logic. + */ +static void +walkdir(const char *path, +		int (*action) (const char *fname, bool isdir), +		bool process_symlinks) +{ +	DIR		   *dir; +	struct dirent *de; + +	dir = opendir(path); +	if (dir == NULL) +	{ +		pg_log_error("could not open directory \"%s\": %m", path); +		return; +	} + +	while (errno = 0, (de = readdir(dir)) != NULL) +	{ +		char		subpath[MAXPGPATH * 2]; + +		if (strcmp(de->d_name, ".") == 0 || +			strcmp(de->d_name, "..") == 0) +			continue; + +		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + +		switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR)) +		{ +			case PGFILETYPE_REG: +				(*action) (subpath, false); +				break; +			case PGFILETYPE_DIR: +				walkdir(subpath, action, false); +				break; +			default: + +				/* +				 * Errors are already reported directly by get_dirent_type(), +				 * and any remaining symlinks and unknown file types are +				 * ignored. +				 */ +				break; +		} +	} + +	if (errno) +		pg_log_error("could not read directory \"%s\": %m", path); + +	(void) closedir(dir); + +	/* +	 * It's important to fsync the destination directory itself as individual +	 * file fsyncs don't guarantee that the directory entry for the file is +	 * synced.  Recent versions of ext4 have made the window much wider but +	 * it's been an issue for ext3 and other filesystems in the past. +	 */ +	(*action) (path, true); +} + +/* + * Hint to the OS that it should get ready to fsync() this file. + * + * Ignores errors trying to open unreadable files, and reports other errors + * non-fatally. + */ +#ifdef PG_FLUSH_DATA_WORKS + +static int +pre_sync_fname(const char *fname, bool isdir) +{ +	int			fd; + +	fd = open(fname, O_RDONLY | PG_BINARY, 0); + +	if (fd < 0) +	{ +		if (errno == EACCES || (isdir && errno == EISDIR)) +			return 0; +		pg_log_error("could not open file \"%s\": %m", fname); +		return -1; +	} + +	/* +	 * We do what pg_flush_data() would do in the backend: prefer to use +	 * sync_file_range, but fall back to posix_fadvise.  We ignore errors +	 * because this is only a hint. +	 */ +#if defined(HAVE_SYNC_FILE_RANGE) +	(void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) +	(void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#else +#error PG_FLUSH_DATA_WORKS should not have been defined +#endif + +	(void) close(fd); +	return 0; +} + +#endif							/* PG_FLUSH_DATA_WORKS */ + +/* + * fsync_fname -- Try to fsync a file or directory + * + * Ignores errors trying to open unreadable files, or trying to fsync + * directories on systems where that isn't allowed/required.  All other errors + * are fatal. + */ +int +fsync_fname(const char *fname, bool isdir) +{ +	int			fd; +	int			flags; +	int			returncode; + +	/* +	 * Some OSs require directories to be opened read-only whereas other +	 * systems don't allow us to fsync files opened read-only; so we need both +	 * cases here.  Using O_RDWR will cause us to fail to fsync files that are +	 * not writable by our userid, but we assume that's OK. +	 */ +	flags = PG_BINARY; +	if (!isdir) +		flags |= O_RDWR; +	else +		flags |= O_RDONLY; + +	/* +	 * Open the file, silently ignoring errors about unreadable files (or +	 * unsupported operations, e.g. opening a directory under Windows), and +	 * logging others. +	 */ +	fd = open(fname, flags, 0); +	if (fd < 0) +	{ +		if (errno == EACCES || (isdir && errno == EISDIR)) +			return 0; +		pg_log_error("could not open file \"%s\": %m", fname); +		return -1; +	} + +	returncode = fsync(fd); + +	/* +	 * Some OSes don't allow us to fsync directories at all, so we can ignore +	 * those errors. Anything else needs to be reported. +	 */ +	if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL))) +	{ +		pg_log_error("could not fsync file \"%s\": %m", fname); +		(void) close(fd); +		exit(EXIT_FAILURE); +	} + +	(void) close(fd); +	return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +int +fsync_parent_path(const char *fname) +{ +	char		parentpath[MAXPGPATH]; + +	strlcpy(parentpath, fname, MAXPGPATH); +	get_parent_directory(parentpath); + +	/* +	 * get_parent_directory() returns an empty string if the input argument is +	 * just a file name (see comments in path.c), so handle that as being the +	 * current directory. +	 */ +	if (strlen(parentpath) == 0) +		strlcpy(parentpath, ".", MAXPGPATH); + +	if (fsync_fname(parentpath, true) != 0) +		return -1; + +	return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. + */ +int +durable_rename(const char *oldfile, const char *newfile) +{ +	int			fd; + +	/* +	 * First fsync the old and target path (if it exists), to ensure that they +	 * are properly persistent on disk. Syncing the target file is not +	 * strictly necessary, but it makes it easier to reason about crashes; +	 * because it's then guaranteed that either source or target file exists +	 * after a crash. +	 */ +	if (fsync_fname(oldfile, false) != 0) +		return -1; + +	fd = open(newfile, PG_BINARY | O_RDWR, 0); +	if (fd < 0) +	{ +		if (errno != ENOENT) +		{ +			pg_log_error("could not open file \"%s\": %m", newfile); +			return -1; +		} +	} +	else +	{ +		if (fsync(fd) != 0) +		{ +			pg_log_error("could not fsync file \"%s\": %m", newfile); +			close(fd); +			exit(EXIT_FAILURE); +		} +		close(fd); +	} + +	/* Time to do the real deal... */ +	if (rename(oldfile, newfile) != 0) +	{ +		pg_log_error("could not rename file \"%s\" to \"%s\": %m", +					 oldfile, newfile); +		return -1; +	} + +	/* +	 * To guarantee renaming the file is persistent, fsync the file with its +	 * new name, and its containing directory. +	 */ +	if (fsync_fname(newfile, false) != 0) +		return -1; + +	if (fsync_parent_path(newfile) != 0) +		return -1; + +	return 0; +} + +#endif							/* FRONTEND */ + +/* + * Return the type of a directory entry. + * + * In frontend code, elevel should be a level from logging.h; in backend code + * it should be a level from elog.h. + */ +PGFileType +get_dirent_type(const char *path, +				const struct dirent *de, +				bool look_through_symlinks, +				int elevel) +{ +	PGFileType	result; + +	/* +	 * Some systems tell us the type directly in the dirent struct, but that's +	 * a BSD and Linux extension not required by POSIX.  Even when the +	 * interface is present, sometimes the type is unknown, depending on the +	 * filesystem. +	 */ +#if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK) +	if (de->d_type == DT_REG) +		result = PGFILETYPE_REG; +	else if (de->d_type == DT_DIR) +		result = PGFILETYPE_DIR; +	else if (de->d_type == DT_LNK && !look_through_symlinks) +		result = PGFILETYPE_LNK; +	else +		result = PGFILETYPE_UNKNOWN; +#else +	result = PGFILETYPE_UNKNOWN; +#endif + +	if (result == PGFILETYPE_UNKNOWN) +	{ +		struct stat fst; +		int			sret; + + +		if (look_through_symlinks) +			sret = stat(path, &fst); +		else +			sret = lstat(path, &fst); + +		if (sret < 0) +		{ +			result = PGFILETYPE_ERROR; +#ifdef FRONTEND +			pg_log_generic(elevel, PG_LOG_PRIMARY, "could not stat file \"%s\": %m", path); +#else +			ereport(elevel, +					(errcode_for_file_access(), +					 errmsg("could not stat file \"%s\": %m", path))); +#endif +		} +		else if (S_ISREG(fst.st_mode)) +			result = PGFILETYPE_REG; +		else if (S_ISDIR(fst.st_mode)) +			result = PGFILETYPE_DIR; +		else if (S_ISLNK(fst.st_mode)) +			result = PGFILETYPE_LNK; +	} + +	return result; +} + +/* + * pg_pwritev_with_retry + * + * Convenience wrapper for pg_pwritev() that retries on partial write.  If an + * error is returned, it is unspecified how much has been written. + */ +ssize_t +pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ +	struct iovec iov_copy[PG_IOV_MAX]; +	ssize_t		sum = 0; +	ssize_t		part; + +	/* We'd better have space to make a copy, in case we need to retry. */ +	if (iovcnt > PG_IOV_MAX) +	{ +		errno = EINVAL; +		return -1; +	} + +	for (;;) +	{ +		/* Write as much as we can. */ +		part = pg_pwritev(fd, iov, iovcnt, offset); +		if (part < 0) +			return -1; + +#ifdef SIMULATE_SHORT_WRITE +		part = Min(part, 4096); +#endif + +		/* Count our progress. */ +		sum += part; +		offset += part; + +		/* Step over iovecs that are done. */ +		while (iovcnt > 0 && iov->iov_len <= part) +		{ +			part -= iov->iov_len; +			++iov; +			--iovcnt; +		} + +		/* Are they all done? */ +		if (iovcnt == 0) +		{ +			/* We don't expect the kernel to write more than requested. */ +			Assert(part == 0); +			break; +		} + +		/* +		 * Move whatever's left to the front of our mutable copy and adjust +		 * the leading iovec. +		 */ +		Assert(iovcnt > 0); +		memmove(iov_copy, iov, sizeof(*iov) * iovcnt); +		Assert(iov->iov_len > part); +		iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; +		iov_copy[0].iov_len -= part; +		iov = iov_copy; +	} + +	return sum; +} + +/* + * pg_pwrite_zeros + * + * Writes zeros to file worth "size" bytes at "offset" (from the start of the + * file), using vectored I/O. + * + * Returns the total amount of data written.  On failure, a negative value + * is returned with errno set. + */ +ssize_t +pg_pwrite_zeros(int fd, size_t size, off_t offset) +{ +	static const PGIOAlignedBlock zbuffer = {{0}};	/* worth BLCKSZ */ +	void	   *zerobuf_addr = unconstify(PGIOAlignedBlock *, &zbuffer)->data; +	struct iovec iov[PG_IOV_MAX]; +	size_t		remaining_size = size; +	ssize_t		total_written = 0; + +	/* Loop, writing as many blocks as we can for each system call. */ +	while (remaining_size > 0) +	{ +		int			iovcnt = 0; +		ssize_t		written; + +		for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++) +		{ +			size_t		this_iov_size; + +			iov[iovcnt].iov_base = zerobuf_addr; + +			if (remaining_size < BLCKSZ) +				this_iov_size = remaining_size; +			else +				this_iov_size = BLCKSZ; + +			iov[iovcnt].iov_len = this_iov_size; +			remaining_size -= this_iov_size; +		} + +		written = pg_pwritev_with_retry(fd, iov, iovcnt, offset); + +		if (written < 0) +			return written; + +		offset += written; +		total_written += written; +	} + +	Assert(total_written == size); + +	return total_written; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/hashfn.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/hashfn.c new file mode 100644 index 00000000000..2490607eea5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/hashfn.c @@ -0,0 +1,692 @@ +/*------------------------------------------------------------------------- + * + * hashfn.c + *		Generic hashing functions, and hash functions for use in dynahash.c + *		hashtables + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/hashfn.c + * + * NOTES + *	  It is expected that every bit of a hash function's 32-bit result is + *	  as random as every other; failure to ensure this is likely to lead + *	  to poor performance of hash tables.  In most cases a hash + *	  function should use hash_bytes() or its variant hash_bytes_uint32(), + *	  or the wrappers hash_any() and hash_uint32 defined in hashfn.h. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/hashfn.h" +#include "port/pg_bitutils.h" + + +/* + * This hash function was written by Bob Jenkins + * ([email protected]), and superficially adapted + * for PostgreSQL by Neil Conway. For more information on this + * hash function, see http://burtleburtle.net/bob/hash/doobs.html, + * or Bob's article in Dr. Dobb's Journal, Sept. 1997. + * + * In the current code, we have adopted Bob's 2006 update of his hash + * function to fetch the data a word at a time when it is suitably aligned. + * This makes for a useful speedup, at the cost of having to maintain + * four code paths (aligned vs unaligned, and little-endian vs big-endian). + * It also uses two separate mixing functions mix() and final(), instead + * of a slower multi-purpose function. + */ + +/* Get a bit mask of the bits set in non-uint32 aligned addresses */ +#define UINT32_ALIGN_MASK (sizeof(uint32) - 1) + +#define rot(x,k) pg_rotate_left32(x, k) + +/*---------- + * mix -- mix 3 32-bit values reversibly. + * + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * This was tested for: + * * pairs that differed by one bit, by two bits, in any combination + *	 of top bits of (a,b,c), or in any combination of bottom bits of + *	 (a,b,c). + * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed + *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + *	 is commonly produced by subtraction) look like a single 1-bit + *	 difference. + * * the base values were pseudorandom, all zero but one bit set, or + *	 all zero plus a counter that starts at zero. + * + * This does not achieve avalanche.  There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a.  The + * most thoroughly mixed value is c, but it doesn't really even achieve + * avalanche in c. + * + * This allows some parallelism.  Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the opposite + * direction from the goal of parallelism.  I did what I could.  Rotates + * seem to cost as much as shifts on every machine I could lay my hands on, + * and rotates are much kinder to the top and bottom bits, so I used rotates. + *---------- + */ +#define mix(a,b,c) \ +{ \ +  a -= c;  a ^= rot(c, 4);	c += b; \ +  b -= a;  b ^= rot(a, 6);	a += c; \ +  c -= b;  c ^= rot(b, 8);	b += a; \ +  a -= c;  a ^= rot(c,16);	c += b; \ +  b -= a;  b ^= rot(a,19);	a += c; \ +  c -= b;  c ^= rot(b, 4);	b += a; \ +} + +/*---------- + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different.  This was tested for + * * pairs that differed by one bit, by two bits, in any combination + *	 of top bits of (a,b,c), or in any combination of bottom bits of + *	 (a,b,c). + * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed + *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + *	 is commonly produced by subtraction) look like a single 1-bit + *	 difference. + * * the base values were pseudorandom, all zero but one bit set, or + *	 all zero plus a counter that starts at zero. + * + * The use of separate functions for mix() and final() allow for a + * substantial performance increase since final() does not need to + * do well in reverse, but is does need to affect all output bits. + * mix(), on the other hand, does not need to affect all output + * bits (affecting 32 bits is enough).  The original hash function had + * a single mixing operation that had to satisfy both sets of requirements + * and was slower as a result. + *---------- + */ +#define final(a,b,c) \ +{ \ +  c ^= b; c -= rot(b,14); \ +  a ^= c; a -= rot(c,11); \ +  b ^= a; b -= rot(a,25); \ +  c ^= b; c -= rot(b,16); \ +  a ^= c; a -= rot(c, 4); \ +  b ^= a; b -= rot(a,14); \ +  c ^= b; c -= rot(b,24); \ +} + +/* + * hash_bytes() -- hash a variable-length key into a 32-bit value + *		k		: the key (the unaligned variable-length array of bytes) + *		len		: the length of the key, counting by bytes + * + * Returns a uint32 value.  Every bit of the key affects every bit of + * the return value.  Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. The best hash table sizes are powers + * of 2.  There is no need to do mod a prime (mod is sooo slow!). + * If you need less than 32 bits, use a bitmask. + * + * This procedure must never throw elog(ERROR); the ResourceOwner code + * relies on this not to fail. + * + * Note: we could easily change this function to return a 64-bit hash value + * by using the final values of both b and c.  b is perhaps a little less + * well mixed than c, however. + */ +uint32 +hash_bytes(const unsigned char *k, int keylen) +{ +	uint32		a, +				b, +				c, +				len; + +	/* Set up the internal state */ +	len = keylen; +	a = b = c = 0x9e3779b9 + len + 3923095; + +	/* If the source pointer is word-aligned, we use word-wide fetches */ +	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) +	{ +		/* Code path for aligned source data */ +		const uint32 *ka = (const uint32 *) k; + +		/* handle most of the key */ +		while (len >= 12) +		{ +			a += ka[0]; +			b += ka[1]; +			c += ka[2]; +			mix(a, b, c); +			ka += 3; +			len -= 12; +		} + +		/* handle the last 11 bytes */ +		k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 8); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 24); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ka[1]; +				a += ka[0]; +				break; +			case 7: +				b += ((uint32) k[6] << 8); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 16); +				/* fall through */ +			case 5: +				b += ((uint32) k[4] << 24); +				/* fall through */ +			case 4: +				a += ka[0]; +				break; +			case 3: +				a += ((uint32) k[2] << 8); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 16); +				/* fall through */ +			case 1: +				a += ((uint32) k[0] << 24); +				/* case 0: nothing left to add */ +		} +#else							/* !WORDS_BIGENDIAN */ +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 24); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 8); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ka[1]; +				a += ka[0]; +				break; +			case 7: +				b += ((uint32) k[6] << 16); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 8); +				/* fall through */ +			case 5: +				b += k[4]; +				/* fall through */ +			case 4: +				a += ka[0]; +				break; +			case 3: +				a += ((uint32) k[2] << 16); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 8); +				/* fall through */ +			case 1: +				a += k[0]; +				/* case 0: nothing left to add */ +		} +#endif							/* WORDS_BIGENDIAN */ +	} +	else +	{ +		/* Code path for non-aligned source data */ + +		/* handle most of the key */ +		while (len >= 12) +		{ +#ifdef WORDS_BIGENDIAN +			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); +			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); +			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else							/* !WORDS_BIGENDIAN */ +			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); +			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); +			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif							/* WORDS_BIGENDIAN */ +			mix(a, b, c); +			k += 12; +			len -= 12; +		} + +		/* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 8); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 24); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += k[7]; +				/* fall through */ +			case 7: +				b += ((uint32) k[6] << 8); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 16); +				/* fall through */ +			case 5: +				b += ((uint32) k[4] << 24); +				/* fall through */ +			case 4: +				a += k[3]; +				/* fall through */ +			case 3: +				a += ((uint32) k[2] << 8); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 16); +				/* fall through */ +			case 1: +				a += ((uint32) k[0] << 24); +				/* case 0: nothing left to add */ +		} +#else							/* !WORDS_BIGENDIAN */ +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 24); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 8); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ((uint32) k[7] << 24); +				/* fall through */ +			case 7: +				b += ((uint32) k[6] << 16); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 8); +				/* fall through */ +			case 5: +				b += k[4]; +				/* fall through */ +			case 4: +				a += ((uint32) k[3] << 24); +				/* fall through */ +			case 3: +				a += ((uint32) k[2] << 16); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 8); +				/* fall through */ +			case 1: +				a += k[0]; +				/* case 0: nothing left to add */ +		} +#endif							/* WORDS_BIGENDIAN */ +	} + +	final(a, b, c); + +	/* report the result */ +	return c; +} + +/* + * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed + *		k		: the key (the unaligned variable-length array of bytes) + *		len		: the length of the key, counting by bytes + *		seed	: a 64-bit seed (0 means no seed) + * + * Returns a uint64 value.  Otherwise similar to hash_bytes. + */ +uint64 +hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed) +{ +	uint32		a, +				b, +				c, +				len; + +	/* Set up the internal state */ +	len = keylen; +	a = b = c = 0x9e3779b9 + len + 3923095; + +	/* If the seed is non-zero, use it to perturb the internal state. */ +	if (seed != 0) +	{ +		/* +		 * In essence, the seed is treated as part of the data being hashed, +		 * but for simplicity, we pretend that it's padded with four bytes of +		 * zeroes so that the seed constitutes a 12-byte chunk. +		 */ +		a += (uint32) (seed >> 32); +		b += (uint32) seed; +		mix(a, b, c); +	} + +	/* If the source pointer is word-aligned, we use word-wide fetches */ +	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) +	{ +		/* Code path for aligned source data */ +		const uint32 *ka = (const uint32 *) k; + +		/* handle most of the key */ +		while (len >= 12) +		{ +			a += ka[0]; +			b += ka[1]; +			c += ka[2]; +			mix(a, b, c); +			ka += 3; +			len -= 12; +		} + +		/* handle the last 11 bytes */ +		k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 8); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 24); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ka[1]; +				a += ka[0]; +				break; +			case 7: +				b += ((uint32) k[6] << 8); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 16); +				/* fall through */ +			case 5: +				b += ((uint32) k[4] << 24); +				/* fall through */ +			case 4: +				a += ka[0]; +				break; +			case 3: +				a += ((uint32) k[2] << 8); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 16); +				/* fall through */ +			case 1: +				a += ((uint32) k[0] << 24); +				/* case 0: nothing left to add */ +		} +#else							/* !WORDS_BIGENDIAN */ +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 24); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 8); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ka[1]; +				a += ka[0]; +				break; +			case 7: +				b += ((uint32) k[6] << 16); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 8); +				/* fall through */ +			case 5: +				b += k[4]; +				/* fall through */ +			case 4: +				a += ka[0]; +				break; +			case 3: +				a += ((uint32) k[2] << 16); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 8); +				/* fall through */ +			case 1: +				a += k[0]; +				/* case 0: nothing left to add */ +		} +#endif							/* WORDS_BIGENDIAN */ +	} +	else +	{ +		/* Code path for non-aligned source data */ + +		/* handle most of the key */ +		while (len >= 12) +		{ +#ifdef WORDS_BIGENDIAN +			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); +			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); +			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else							/* !WORDS_BIGENDIAN */ +			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); +			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); +			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif							/* WORDS_BIGENDIAN */ +			mix(a, b, c); +			k += 12; +			len -= 12; +		} + +		/* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 8); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 24); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += k[7]; +				/* fall through */ +			case 7: +				b += ((uint32) k[6] << 8); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 16); +				/* fall through */ +			case 5: +				b += ((uint32) k[4] << 24); +				/* fall through */ +			case 4: +				a += k[3]; +				/* fall through */ +			case 3: +				a += ((uint32) k[2] << 8); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 16); +				/* fall through */ +			case 1: +				a += ((uint32) k[0] << 24); +				/* case 0: nothing left to add */ +		} +#else							/* !WORDS_BIGENDIAN */ +		switch (len) +		{ +			case 11: +				c += ((uint32) k[10] << 24); +				/* fall through */ +			case 10: +				c += ((uint32) k[9] << 16); +				/* fall through */ +			case 9: +				c += ((uint32) k[8] << 8); +				/* fall through */ +			case 8: +				/* the lowest byte of c is reserved for the length */ +				b += ((uint32) k[7] << 24); +				/* fall through */ +			case 7: +				b += ((uint32) k[6] << 16); +				/* fall through */ +			case 6: +				b += ((uint32) k[5] << 8); +				/* fall through */ +			case 5: +				b += k[4]; +				/* fall through */ +			case 4: +				a += ((uint32) k[3] << 24); +				/* fall through */ +			case 3: +				a += ((uint32) k[2] << 16); +				/* fall through */ +			case 2: +				a += ((uint32) k[1] << 8); +				/* fall through */ +			case 1: +				a += k[0]; +				/* case 0: nothing left to add */ +		} +#endif							/* WORDS_BIGENDIAN */ +	} + +	final(a, b, c); + +	/* report the result */ +	return ((uint64) b << 32) | c; +} + +/* + * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value + * + * This has the same result as + *		hash_bytes(&k, sizeof(uint32)) + * but is faster and doesn't force the caller to store k into memory. + */ +uint32 +hash_bytes_uint32(uint32 k) +{ +	uint32		a, +				b, +				c; + +	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; +	a += k; + +	final(a, b, c); + +	/* report the result */ +	return c; +} + +/* + * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed + * + * Like hash_bytes_uint32, this is a convenience function. + */ +uint64 +hash_bytes_uint32_extended(uint32 k, uint64 seed) +{ +	uint32		a, +				b, +				c; + +	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + +	if (seed != 0) +	{ +		a += (uint32) (seed >> 32); +		b += (uint32) seed; +		mix(a, b, c); +	} + +	a += k; + +	final(a, b, c); + +	/* report the result */ +	return ((uint64) b << 32) | c; +} + +/* + * string_hash: hash function for keys that are NUL-terminated strings. + * + * NOTE: this is the default hash function if none is specified. + */ +uint32 +string_hash(const void *key, Size keysize) +{ +	/* +	 * If the string exceeds keysize-1 bytes, we want to hash only that many, +	 * because when it is copied into the hash table it will be truncated at +	 * that length. +	 */ +	Size		s_len = strlen((const char *) key); + +	s_len = Min(s_len, keysize - 1); +	return hash_bytes((const unsigned char *) key, (int) s_len); +} + +/* + * tag_hash: hash function for fixed-size tag values + */ +uint32 +tag_hash(const void *key, Size keysize) +{ +	return hash_bytes((const unsigned char *) key, (int) keysize); +} + +/* + * uint32_hash: hash function for keys that are uint32 or int32 + * + * (tag_hash works for this case too, but is slower) + */ +uint32 +uint32_hash(const void *key, Size keysize) +{ +	Assert(keysize == sizeof(uint32)); +	return hash_bytes_uint32(*((const uint32 *) key)); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/hmac_openssl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/hmac_openssl.c new file mode 100644 index 00000000000..12be542fa27 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/hmac_openssl.c @@ -0,0 +1,348 @@ +/*------------------------------------------------------------------------- + * + * hmac_openssl.c + *	  Implementation of HMAC with OpenSSL. + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/hmac_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + + +#include <openssl/err.h> +#include <openssl/hmac.h> + +#include "common/hmac.h" +#include "common/md5.h" +#include "common/sha1.h" +#include "common/sha2.h" +#ifndef FRONTEND +#include "utils/memutils.h" +#include "utils/resowner.h" +#include "utils/resowner_private.h" +#endif + +/* + * In backend, use an allocation in TopMemoryContext to count for resowner + * cleanup handling if necessary.  For versions of OpenSSL where HMAC_CTX is + * known, just use palloc().  In frontend, use malloc to be able to return + * a failure status back to the caller. + */ +#ifndef FRONTEND +#ifdef HAVE_HMAC_CTX_NEW +#define ALLOC(size) MemoryContextAlloc(TopMemoryContext, size) +#else +#define ALLOC(size) palloc(size) +#endif +#define FREE(ptr) pfree(ptr) +#else							/* FRONTEND */ +#define ALLOC(size) malloc(size) +#define FREE(ptr) free(ptr) +#endif							/* FRONTEND */ + +/* Set of error states */ +typedef enum pg_hmac_errno +{ +	PG_HMAC_ERROR_NONE = 0, +	PG_HMAC_ERROR_DEST_LEN, +	PG_HMAC_ERROR_OPENSSL +} pg_hmac_errno; + +/* Internal pg_hmac_ctx structure */ +struct pg_hmac_ctx +{ +	HMAC_CTX   *hmacctx; +	pg_cryptohash_type type; +	pg_hmac_errno error; +	const char *errreason; + +#ifndef FRONTEND +	ResourceOwner resowner; +#endif +}; + +static const char * +SSLerrmessage(unsigned long ecode) +{ +	if (ecode == 0) +		return NULL; + +	/* +	 * This may return NULL, but we would fall back to a default error path if +	 * that were the case. +	 */ +	return ERR_reason_error_string(ecode); +} + +/* + * pg_hmac_create + * + * Allocate a hash context.  Returns NULL on failure for an OOM.  The + * backend issues an error, without returning. + */ +pg_hmac_ctx * +pg_hmac_create(pg_cryptohash_type type) +{ +	pg_hmac_ctx *ctx; + +	ctx = ALLOC(sizeof(pg_hmac_ctx)); +	if (ctx == NULL) +		return NULL; +	memset(ctx, 0, sizeof(pg_hmac_ctx)); + +	ctx->type = type; +	ctx->error = PG_HMAC_ERROR_NONE; +	ctx->errreason = NULL; + + +	/* +	 * Initialization takes care of assigning the correct type for OpenSSL. +	 * Also ensure that there aren't any unconsumed errors in the queue from +	 * previous runs. +	 */ +	ERR_clear_error(); +#ifdef HAVE_HMAC_CTX_NEW +#ifndef FRONTEND +	ResourceOwnerEnlargeHMAC(CurrentResourceOwner); +#endif +	ctx->hmacctx = HMAC_CTX_new(); +#else +	ctx->hmacctx = ALLOC(sizeof(HMAC_CTX)); +#endif + +	if (ctx->hmacctx == NULL) +	{ +		explicit_bzero(ctx, sizeof(pg_hmac_ctx)); +		FREE(ctx); +#ifndef FRONTEND +		ereport(ERROR, +				(errcode(ERRCODE_OUT_OF_MEMORY), +				 errmsg("out of memory"))); +#endif +		return NULL; +	} + +#ifdef HAVE_HMAC_CTX_NEW +#ifndef FRONTEND +	ctx->resowner = CurrentResourceOwner; +	ResourceOwnerRememberHMAC(CurrentResourceOwner, PointerGetDatum(ctx)); +#endif +#else +	memset(ctx->hmacctx, 0, sizeof(HMAC_CTX)); +#endif							/* HAVE_HMAC_CTX_NEW */ + +	return ctx; +} + +/* + * pg_hmac_init + * + * Initialize a HMAC context.  Returns 0 on success, -1 on failure. + */ +int +pg_hmac_init(pg_hmac_ctx *ctx, const uint8 *key, size_t len) +{ +	int			status = 0; + +	if (ctx == NULL) +		return -1; + +	switch (ctx->type) +	{ +		case PG_MD5: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_md5(), NULL); +			break; +		case PG_SHA1: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha1(), NULL); +			break; +		case PG_SHA224: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha224(), NULL); +			break; +		case PG_SHA256: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha256(), NULL); +			break; +		case PG_SHA384: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha384(), NULL); +			break; +		case PG_SHA512: +			status = HMAC_Init_ex(ctx->hmacctx, key, len, EVP_sha512(), NULL); +			break; +	} + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_HMAC_ERROR_OPENSSL; +		return -1; +	} + +	return 0; +} + +/* + * pg_hmac_update + * + * Update a HMAC context.  Returns 0 on success, -1 on failure. + */ +int +pg_hmac_update(pg_hmac_ctx *ctx, const uint8 *data, size_t len) +{ +	int			status = 0; + +	if (ctx == NULL) +		return -1; + +	status = HMAC_Update(ctx->hmacctx, data, len); + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_HMAC_ERROR_OPENSSL; +		return -1; +	} +	return 0; +} + +/* + * pg_hmac_final + * + * Finalize a HMAC context.  Returns 0 on success, -1 on failure. + */ +int +pg_hmac_final(pg_hmac_ctx *ctx, uint8 *dest, size_t len) +{ +	int			status = 0; +	uint32		outlen; + +	if (ctx == NULL) +		return -1; + +	switch (ctx->type) +	{ +		case PG_MD5: +			if (len < MD5_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA1: +			if (len < SHA1_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA224: +			if (len < PG_SHA224_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA256: +			if (len < PG_SHA256_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA384: +			if (len < PG_SHA384_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +		case PG_SHA512: +			if (len < PG_SHA512_DIGEST_LENGTH) +			{ +				ctx->error = PG_HMAC_ERROR_DEST_LEN; +				return -1; +			} +			break; +	} + +	status = HMAC_Final(ctx->hmacctx, dest, &outlen); + +	/* OpenSSL internals return 1 on success, 0 on failure */ +	if (status <= 0) +	{ +		ctx->errreason = SSLerrmessage(ERR_get_error()); +		ctx->error = PG_HMAC_ERROR_OPENSSL; +		return -1; +	} +	return 0; +} + +/* + * pg_hmac_free + * + * Free a HMAC context. + */ +void +pg_hmac_free(pg_hmac_ctx *ctx) +{ +	if (ctx == NULL) +		return; + +#ifdef HAVE_HMAC_CTX_FREE +	HMAC_CTX_free(ctx->hmacctx); +#ifndef FRONTEND +	ResourceOwnerForgetHMAC(ctx->resowner, PointerGetDatum(ctx)); +#endif +#else +	explicit_bzero(ctx->hmacctx, sizeof(HMAC_CTX)); +	FREE(ctx->hmacctx); +#endif + +	explicit_bzero(ctx, sizeof(pg_hmac_ctx)); +	FREE(ctx); +} + +/* + * pg_hmac_error + * + * Returns a static string providing details about an error that happened + * during a HMAC computation. + */ +const char * +pg_hmac_error(pg_hmac_ctx *ctx) +{ +	if (ctx == NULL) +		return _("out of memory"); + +	/* +	 * If a reason is provided, rely on it, else fallback to any error code +	 * set. +	 */ +	if (ctx->errreason) +		return ctx->errreason; + +	switch (ctx->error) +	{ +		case PG_HMAC_ERROR_NONE: +			return _("success"); +		case PG_HMAC_ERROR_DEST_LEN: +			return _("destination buffer too small"); +		case PG_HMAC_ERROR_OPENSSL: +			return _("OpenSSL failure"); +	} + +	Assert(false);				/* cannot be reached */ +	return _("success"); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/ip.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/ip.c new file mode 100644 index 00000000000..b404c74b275 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/ip.c @@ -0,0 +1,264 @@ +/*------------------------------------------------------------------------- + * + * ip.c + *	  IPv6-aware network access. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/ip.c + * + * This file and the IPV6 implementation were initially provided by + * Nigel Kukard <[email protected]>, Linux Based Systems Design + * http://www.lbsd.net. + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <netinet/in.h> +#ifdef HAVE_NETINET_TCP_H +#include <netinet/tcp.h> +#include <arpa/inet.h> +#endif +#include <sys/file.h> + +#include "common/ip.h" + + + +static int	getaddrinfo_unix(const char *path, +							 const struct addrinfo *hintsp, +							 struct addrinfo **result); + +static int	getnameinfo_unix(const struct sockaddr_un *sa, int salen, +							 char *node, int nodelen, +							 char *service, int servicelen, +							 int flags); + + +/* + *	pg_getaddrinfo_all - get address info for Unix, IPv4 and IPv6 sockets + */ +int +pg_getaddrinfo_all(const char *hostname, const char *servname, +				   const struct addrinfo *hintp, struct addrinfo **result) +{ +	int			rc; + +	/* not all versions of getaddrinfo() zero *result on failure */ +	*result = NULL; + +	if (hintp->ai_family == AF_UNIX) +		return getaddrinfo_unix(servname, hintp, result); + +	/* NULL has special meaning to getaddrinfo(). */ +	rc = getaddrinfo((!hostname || hostname[0] == '\0') ? NULL : hostname, +					 servname, hintp, result); + +	return rc; +} + + +/* + *	pg_freeaddrinfo_all - free addrinfo structures for IPv4, IPv6, or Unix + * + * Note: the ai_family field of the original hint structure must be passed + * so that we can tell whether the addrinfo struct was built by the system's + * getaddrinfo() routine or our own getaddrinfo_unix() routine.  Some versions + * of getaddrinfo() might be willing to return AF_UNIX addresses, so it's + * not safe to look at ai_family in the addrinfo itself. + */ +void +pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai) +{ +	if (hint_ai_family == AF_UNIX) +	{ +		/* struct was built by getaddrinfo_unix (see pg_getaddrinfo_all) */ +		while (ai != NULL) +		{ +			struct addrinfo *p = ai; + +			ai = ai->ai_next; +			free(p->ai_addr); +			free(p); +		} +	} +	else +	{ +		/* struct was built by getaddrinfo() */ +		if (ai != NULL) +			freeaddrinfo(ai); +	} +} + + +/* + *	pg_getnameinfo_all - get name info for Unix, IPv4 and IPv6 sockets + * + * The API of this routine differs from the standard getnameinfo() definition + * in two ways: first, the addr parameter is declared as sockaddr_storage + * rather than struct sockaddr, and second, the node and service fields are + * guaranteed to be filled with something even on failure return. + */ +int +pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, +				   char *node, int nodelen, +				   char *service, int servicelen, +				   int flags) +{ +	int			rc; + +	if (addr && addr->ss_family == AF_UNIX) +		rc = getnameinfo_unix((const struct sockaddr_un *) addr, salen, +							  node, nodelen, +							  service, servicelen, +							  flags); +	else +		rc = getnameinfo((const struct sockaddr *) addr, salen, +						 node, nodelen, +						 service, servicelen, +						 flags); + +	if (rc != 0) +	{ +		if (node) +			strlcpy(node, "???", nodelen); +		if (service) +			strlcpy(service, "???", servicelen); +	} + +	return rc; +} + + +/* ------- + *	getaddrinfo_unix - get unix socket info using IPv6-compatible API + * + *	Bugs: only one addrinfo is set even though hintsp is NULL or + *		  ai_socktype is 0 + *		  AI_CANONNAME is not supported. + * ------- + */ +static int +getaddrinfo_unix(const char *path, const struct addrinfo *hintsp, +				 struct addrinfo **result) +{ +	struct addrinfo hints = {0}; +	struct addrinfo *aip; +	struct sockaddr_un *unp; + +	*result = NULL; + +	if (strlen(path) >= sizeof(unp->sun_path)) +		return EAI_FAIL; + +	if (hintsp == NULL) +	{ +		hints.ai_family = AF_UNIX; +		hints.ai_socktype = SOCK_STREAM; +	} +	else +		memcpy(&hints, hintsp, sizeof(hints)); + +	if (hints.ai_socktype == 0) +		hints.ai_socktype = SOCK_STREAM; + +	if (hints.ai_family != AF_UNIX) +	{ +		/* shouldn't have been called */ +		return EAI_FAIL; +	} + +	aip = calloc(1, sizeof(struct addrinfo)); +	if (aip == NULL) +		return EAI_MEMORY; + +	unp = calloc(1, sizeof(struct sockaddr_un)); +	if (unp == NULL) +	{ +		free(aip); +		return EAI_MEMORY; +	} + +	aip->ai_family = AF_UNIX; +	aip->ai_socktype = hints.ai_socktype; +	aip->ai_protocol = hints.ai_protocol; +	aip->ai_next = NULL; +	aip->ai_canonname = NULL; +	*result = aip; + +	unp->sun_family = AF_UNIX; +	aip->ai_addr = (struct sockaddr *) unp; +	aip->ai_addrlen = sizeof(struct sockaddr_un); + +	strcpy(unp->sun_path, path); + +	/* +	 * If the supplied path starts with @, replace that with a zero byte for +	 * the internal representation.  In that mode, the entire sun_path is the +	 * address, including trailing zero bytes.  But we set the address length +	 * to only include the length of the original string.  That way the +	 * trailing zero bytes won't show up in any network or socket lists of the +	 * operating system.  This is just a convention, also followed by other +	 * packages. +	 */ +	if (path[0] == '@') +	{ +		unp->sun_path[0] = '\0'; +		aip->ai_addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(path); +	} + +	return 0; +} + +/* + * Convert an address to a hostname. + */ +static int +getnameinfo_unix(const struct sockaddr_un *sa, int salen, +				 char *node, int nodelen, +				 char *service, int servicelen, +				 int flags) +{ +	int			ret; + +	/* Invalid arguments. */ +	if (sa == NULL || sa->sun_family != AF_UNIX || +		(node == NULL && service == NULL)) +		return EAI_FAIL; + +	if (node) +	{ +		ret = snprintf(node, nodelen, "%s", "[local]"); +		if (ret < 0 || ret >= nodelen) +			return EAI_MEMORY; +	} + +	if (service) +	{ +		/* +		 * Check whether it looks like an abstract socket, but it could also +		 * just be an empty string. +		 */ +		if (sa->sun_path[0] == '\0' && sa->sun_path[1] != '\0') +			ret = snprintf(service, servicelen, "@%s", sa->sun_path + 1); +		else +			ret = snprintf(service, servicelen, "%s", sa->sun_path); +		if (ret < 0 || ret >= servicelen) +			return EAI_MEMORY; +	} + +	return 0; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c new file mode 100644 index 00000000000..168001b0f3b --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/jsonapi.c @@ -0,0 +1,1206 @@ +/*------------------------------------------------------------------------- + * + * jsonapi.c + *		JSON parser and lexer interfaces + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/jsonapi.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/jsonapi.h" +#include "mb/pg_wchar.h" +#include "port/pg_lfind.h" + +#ifndef FRONTEND +#include "miscadmin.h" +#endif + +/* + * The context of the parser is maintained by the recursive descent + * mechanism, but is passed explicitly to the error reporting routine + * for better diagnostics. + */ +typedef enum					/* contexts of JSON parser */ +{ +	JSON_PARSE_VALUE,			/* expecting a value */ +	JSON_PARSE_STRING,			/* expecting a string (for a field name) */ +	JSON_PARSE_ARRAY_START,		/* saw '[', expecting value or ']' */ +	JSON_PARSE_ARRAY_NEXT,		/* saw array element, expecting ',' or ']' */ +	JSON_PARSE_OBJECT_START,	/* saw '{', expecting label or '}' */ +	JSON_PARSE_OBJECT_LABEL,	/* saw object label, expecting ':' */ +	JSON_PARSE_OBJECT_NEXT,		/* saw object value, expecting ',' or '}' */ +	JSON_PARSE_OBJECT_COMMA,	/* saw object ',', expecting next label */ +	JSON_PARSE_END				/* saw the end of a document, expect nothing */ +} JsonParseContext; + +static inline JsonParseErrorType json_lex_string(JsonLexContext *lex); +static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s, +												 bool *num_err, int *total_len); +static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem); +static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex); + +/* the null action object used for pure validation */ +const JsonSemAction nullSemAction = +{ +	NULL, NULL, NULL, NULL, NULL, +	NULL, NULL, NULL, NULL, NULL +}; + +/* Recursive Descent parser support routines */ + +/* + * lex_peek + * + * what is the current look_ahead token? +*/ +static inline JsonTokenType +lex_peek(JsonLexContext *lex) +{ +	return lex->token_type; +} + +/* + * lex_expect + * + * move the lexer to the next token if the current look_ahead token matches + * the parameter token. Otherwise, report an error. + */ +static inline JsonParseErrorType +lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) +{ +	if (lex_peek(lex) == token) +		return json_lex(lex); +	else +		return report_parse_error(ctx, lex); +} + +/* chars to consider as part of an alphanumeric token */ +#define JSON_ALPHANUMERIC_CHAR(c)  \ +	(((c) >= 'a' && (c) <= 'z') || \ +	 ((c) >= 'A' && (c) <= 'Z') || \ +	 ((c) >= '0' && (c) <= '9') || \ +	 (c) == '_' || \ +	 IS_HIGHBIT_SET(c)) + +/* + * Utility function to check if a string is a valid JSON number. + * + * str is of length len, and need not be null-terminated. + */ +bool +IsValidJsonNumber(const char *str, int len) +{ +	bool		numeric_error; +	int			total_len; +	JsonLexContext dummy_lex; + +	if (len <= 0) +		return false; + +	/* +	 * json_lex_number expects a leading  '-' to have been eaten already. +	 * +	 * having to cast away the constness of str is ugly, but there's not much +	 * easy alternative. +	 */ +	if (*str == '-') +	{ +		dummy_lex.input = unconstify(char *, str) + 1; +		dummy_lex.input_length = len - 1; +	} +	else +	{ +		dummy_lex.input = unconstify(char *, str); +		dummy_lex.input_length = len; +	} + +	json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len); + +	return (!numeric_error) && (total_len == dummy_lex.input_length); +} + +/* + * makeJsonLexContextCstringLen + * + * lex constructor, with or without StringInfo object for de-escaped lexemes. + * + * Without is better as it makes the processing faster, so only make one + * if really required. + */ +JsonLexContext * +makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes) +{ +	JsonLexContext *lex = palloc0(sizeof(JsonLexContext)); + +	lex->input = lex->token_terminator = lex->line_start = json; +	lex->line_number = 1; +	lex->input_length = len; +	lex->input_encoding = encoding; +	if (need_escapes) +		lex->strval = makeStringInfo(); +	return lex; +} + +/* + * pg_parse_json + * + * Publicly visible entry point for the JSON parser. + * + * lex is a lexing context, set up for the json to be processed by calling + * makeJsonLexContext(). sem is a structure of function pointers to semantic + * action routines to be called at appropriate spots during parsing, and a + * pointer to a state object to be passed to those routines. + */ +JsonParseErrorType +pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) +{ +	JsonTokenType tok; +	JsonParseErrorType result; + +	/* get the initial token */ +	result = json_lex(lex); +	if (result != JSON_SUCCESS) +		return result; + +	tok = lex_peek(lex); + +	/* parse by recursive descent */ +	switch (tok) +	{ +		case JSON_TOKEN_OBJECT_START: +			result = parse_object(lex, sem); +			break; +		case JSON_TOKEN_ARRAY_START: +			result = parse_array(lex, sem); +			break; +		default: +			result = parse_scalar(lex, sem);	/* json can be a bare scalar */ +	} + +	if (result == JSON_SUCCESS) +		result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); + +	return result; +} + +/* + * json_count_array_elements + * + * Returns number of array elements in lex context at start of array token + * until end of array token at same nesting level. + * + * Designed to be called from array_start routines. + */ +JsonParseErrorType +json_count_array_elements(JsonLexContext *lex, int *elements) +{ +	JsonLexContext copylex; +	int			count; +	JsonParseErrorType result; + +	/* +	 * It's safe to do this with a shallow copy because the lexical routines +	 * don't scribble on the input. They do scribble on the other pointers +	 * etc, so doing this with a copy makes that safe. +	 */ +	memcpy(©lex, lex, sizeof(JsonLexContext)); +	copylex.strval = NULL;		/* not interested in values here */ +	copylex.lex_level++; + +	count = 0; +	result = lex_expect(JSON_PARSE_ARRAY_START, ©lex, +						JSON_TOKEN_ARRAY_START); +	if (result != JSON_SUCCESS) +		return result; +	if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END) +	{ +		while (1) +		{ +			count++; +			result = parse_array_element(©lex, &nullSemAction); +			if (result != JSON_SUCCESS) +				return result; +			if (copylex.token_type != JSON_TOKEN_COMMA) +				break; +			result = json_lex(©lex); +			if (result != JSON_SUCCESS) +				return result; +		} +	} +	result = lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex, +						JSON_TOKEN_ARRAY_END); +	if (result != JSON_SUCCESS) +		return result; + +	*elements = count; +	return JSON_SUCCESS; +} + +/* + *	Recursive Descent parse routines. There is one for each structural + *	element in a json document: + *	  - scalar (string, number, true, false, null) + *	  - array  ( [ ] ) + *	  - array element + *	  - object ( { } ) + *	  - object field + */ +static inline JsonParseErrorType +parse_scalar(JsonLexContext *lex, JsonSemAction *sem) +{ +	char	   *val = NULL; +	json_scalar_action sfunc = sem->scalar; +	JsonTokenType tok = lex_peek(lex); +	JsonParseErrorType result; + +	/* a scalar must be a string, a number, true, false, or null */ +	if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER && +		tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE && +		tok != JSON_TOKEN_NULL) +		return report_parse_error(JSON_PARSE_VALUE, lex); + +	/* if no semantic function, just consume the token */ +	if (sfunc == NULL) +		return json_lex(lex); + +	/* extract the de-escaped string value, or the raw lexeme */ +	if (lex_peek(lex) == JSON_TOKEN_STRING) +	{ +		if (lex->strval != NULL) +			val = pstrdup(lex->strval->data); +	} +	else +	{ +		int			len = (lex->token_terminator - lex->token_start); + +		val = palloc(len + 1); +		memcpy(val, lex->token_start, len); +		val[len] = '\0'; +	} + +	/* consume the token */ +	result = json_lex(lex); +	if (result != JSON_SUCCESS) +		return result; + +	/* invoke the callback */ +	result = (*sfunc) (sem->semstate, val, tok); + +	return result; +} + +static JsonParseErrorType +parse_object_field(JsonLexContext *lex, JsonSemAction *sem) +{ +	/* +	 * An object field is "fieldname" : value where value can be a scalar, +	 * object or array.  Note: in user-facing docs and error messages, we +	 * generally call a field name a "key". +	 */ + +	char	   *fname = NULL;	/* keep compiler quiet */ +	json_ofield_action ostart = sem->object_field_start; +	json_ofield_action oend = sem->object_field_end; +	bool		isnull; +	JsonTokenType tok; +	JsonParseErrorType result; + +	if (lex_peek(lex) != JSON_TOKEN_STRING) +		return report_parse_error(JSON_PARSE_STRING, lex); +	if ((ostart != NULL || oend != NULL) && lex->strval != NULL) +		fname = pstrdup(lex->strval->data); +	result = json_lex(lex); +	if (result != JSON_SUCCESS) +		return result; + +	result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); +	if (result != JSON_SUCCESS) +		return result; + +	tok = lex_peek(lex); +	isnull = tok == JSON_TOKEN_NULL; + +	if (ostart != NULL) +	{ +		result = (*ostart) (sem->semstate, fname, isnull); +		if (result != JSON_SUCCESS) +			return result; +	} + +	switch (tok) +	{ +		case JSON_TOKEN_OBJECT_START: +			result = parse_object(lex, sem); +			break; +		case JSON_TOKEN_ARRAY_START: +			result = parse_array(lex, sem); +			break; +		default: +			result = parse_scalar(lex, sem); +	} +	if (result != JSON_SUCCESS) +		return result; + +	if (oend != NULL) +	{ +		result = (*oend) (sem->semstate, fname, isnull); +		if (result != JSON_SUCCESS) +			return result; +	} + +	return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_object(JsonLexContext *lex, JsonSemAction *sem) +{ +	/* +	 * an object is a possibly empty sequence of object fields, separated by +	 * commas and surrounded by curly braces. +	 */ +	json_struct_action ostart = sem->object_start; +	json_struct_action oend = sem->object_end; +	JsonTokenType tok; +	JsonParseErrorType result; + +#ifndef FRONTEND +	check_stack_depth(); +#endif + +	if (ostart != NULL) +	{ +		result = (*ostart) (sem->semstate); +		if (result != JSON_SUCCESS) +			return result; +	} + +	/* +	 * Data inside an object is at a higher nesting level than the object +	 * itself. Note that we increment this after we call the semantic routine +	 * for the object start and restore it before we call the routine for the +	 * object end. +	 */ +	lex->lex_level++; + +	Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START); +	result = json_lex(lex); +	if (result != JSON_SUCCESS) +		return result; + +	tok = lex_peek(lex); +	switch (tok) +	{ +		case JSON_TOKEN_STRING: +			result = parse_object_field(lex, sem); +			while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA) +			{ +				result = json_lex(lex); +				if (result != JSON_SUCCESS) +					break; +				result = parse_object_field(lex, sem); +			} +			break; +		case JSON_TOKEN_OBJECT_END: +			break; +		default: +			/* case of an invalid initial token inside the object */ +			result = report_parse_error(JSON_PARSE_OBJECT_START, lex); +	} +	if (result != JSON_SUCCESS) +		return result; + +	result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); +	if (result != JSON_SUCCESS) +		return result; + +	lex->lex_level--; + +	if (oend != NULL) +	{ +		result = (*oend) (sem->semstate); +		if (result != JSON_SUCCESS) +			return result; +	} + +	return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_array_element(JsonLexContext *lex, JsonSemAction *sem) +{ +	json_aelem_action astart = sem->array_element_start; +	json_aelem_action aend = sem->array_element_end; +	JsonTokenType tok = lex_peek(lex); +	JsonParseErrorType result; +	bool		isnull; + +	isnull = tok == JSON_TOKEN_NULL; + +	if (astart != NULL) +	{ +		result = (*astart) (sem->semstate, isnull); +		if (result != JSON_SUCCESS) +			return result; +	} + +	/* an array element is any object, array or scalar */ +	switch (tok) +	{ +		case JSON_TOKEN_OBJECT_START: +			result = parse_object(lex, sem); +			break; +		case JSON_TOKEN_ARRAY_START: +			result = parse_array(lex, sem); +			break; +		default: +			result = parse_scalar(lex, sem); +	} + +	if (result != JSON_SUCCESS) +		return result; + +	if (aend != NULL) +	{ +		result = (*aend) (sem->semstate, isnull); +		if (result != JSON_SUCCESS) +			return result; +	} + +	return JSON_SUCCESS; +} + +static JsonParseErrorType +parse_array(JsonLexContext *lex, JsonSemAction *sem) +{ +	/* +	 * an array is a possibly empty sequence of array elements, separated by +	 * commas and surrounded by square brackets. +	 */ +	json_struct_action astart = sem->array_start; +	json_struct_action aend = sem->array_end; +	JsonParseErrorType result; + +#ifndef FRONTEND +	check_stack_depth(); +#endif + +	if (astart != NULL) +	{ +		result = (*astart) (sem->semstate); +		if (result != JSON_SUCCESS) +			return result; +	} + +	/* +	 * Data inside an array is at a higher nesting level than the array +	 * itself. Note that we increment this after we call the semantic routine +	 * for the array start and restore it before we call the routine for the +	 * array end. +	 */ +	lex->lex_level++; + +	result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START); +	if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END) +	{ +		result = parse_array_element(lex, sem); + +		while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA) +		{ +			result = json_lex(lex); +			if (result != JSON_SUCCESS) +				break; +			result = parse_array_element(lex, sem); +		} +	} +	if (result != JSON_SUCCESS) +		return result; + +	result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END); +	if (result != JSON_SUCCESS) +		return result; + +	lex->lex_level--; + +	if (aend != NULL) +	{ +		result = (*aend) (sem->semstate); +		if (result != JSON_SUCCESS) +			return result; +	} + +	return JSON_SUCCESS; +} + +/* + * Lex one token from the input stream. + */ +JsonParseErrorType +json_lex(JsonLexContext *lex) +{ +	char	   *s; +	char	   *const end = lex->input + lex->input_length; +	JsonParseErrorType result; + +	/* Skip leading whitespace. */ +	s = lex->token_terminator; +	while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')) +	{ +		if (*s++ == '\n') +		{ +			++lex->line_number; +			lex->line_start = s; +		} +	} +	lex->token_start = s; + +	/* Determine token type. */ +	if (s >= end) +	{ +		lex->token_start = NULL; +		lex->prev_token_terminator = lex->token_terminator; +		lex->token_terminator = s; +		lex->token_type = JSON_TOKEN_END; +	} +	else +	{ +		switch (*s) +		{ +				/* Single-character token, some kind of punctuation mark. */ +			case '{': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_OBJECT_START; +				break; +			case '}': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_OBJECT_END; +				break; +			case '[': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_ARRAY_START; +				break; +			case ']': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_ARRAY_END; +				break; +			case ',': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_COMMA; +				break; +			case ':': +				lex->prev_token_terminator = lex->token_terminator; +				lex->token_terminator = s + 1; +				lex->token_type = JSON_TOKEN_COLON; +				break; +			case '"': +				/* string */ +				result = json_lex_string(lex); +				if (result != JSON_SUCCESS) +					return result; +				lex->token_type = JSON_TOKEN_STRING; +				break; +			case '-': +				/* Negative number. */ +				result = json_lex_number(lex, s + 1, NULL, NULL); +				if (result != JSON_SUCCESS) +					return result; +				lex->token_type = JSON_TOKEN_NUMBER; +				break; +			case '0': +			case '1': +			case '2': +			case '3': +			case '4': +			case '5': +			case '6': +			case '7': +			case '8': +			case '9': +				/* Positive number. */ +				result = json_lex_number(lex, s, NULL, NULL); +				if (result != JSON_SUCCESS) +					return result; +				lex->token_type = JSON_TOKEN_NUMBER; +				break; +			default: +				{ +					char	   *p; + +					/* +					 * We're not dealing with a string, number, legal +					 * punctuation mark, or end of string.  The only legal +					 * tokens we might find here are true, false, and null, +					 * but for error reporting purposes we scan until we see a +					 * non-alphanumeric character.  That way, we can report +					 * the whole word as an unexpected token, rather than just +					 * some unintuitive prefix thereof. +					 */ +					for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++) +						 /* skip */ ; + +					/* +					 * We got some sort of unexpected punctuation or an +					 * otherwise unexpected character, so just complain about +					 * that one character. +					 */ +					if (p == s) +					{ +						lex->prev_token_terminator = lex->token_terminator; +						lex->token_terminator = s + 1; +						return JSON_INVALID_TOKEN; +					} + +					/* +					 * We've got a real alphanumeric token here.  If it +					 * happens to be true, false, or null, all is well.  If +					 * not, error out. +					 */ +					lex->prev_token_terminator = lex->token_terminator; +					lex->token_terminator = p; +					if (p - s == 4) +					{ +						if (memcmp(s, "true", 4) == 0) +							lex->token_type = JSON_TOKEN_TRUE; +						else if (memcmp(s, "null", 4) == 0) +							lex->token_type = JSON_TOKEN_NULL; +						else +							return JSON_INVALID_TOKEN; +					} +					else if (p - s == 5 && memcmp(s, "false", 5) == 0) +						lex->token_type = JSON_TOKEN_FALSE; +					else +						return JSON_INVALID_TOKEN; +				} +		}						/* end of switch */ +	} + +	return JSON_SUCCESS; +} + +/* + * The next token in the input stream is known to be a string; lex it. + * + * If lex->strval isn't NULL, fill it with the decoded string. + * Set lex->token_terminator to the end of the decoded input, and in + * success cases, transfer its previous value to lex->prev_token_terminator. + * Return JSON_SUCCESS or an error code. + * + * Note: be careful that all error exits advance lex->token_terminator + * to the point after the character we detected the error on. + */ +static inline JsonParseErrorType +json_lex_string(JsonLexContext *lex) +{ +	char	   *s; +	char	   *const end = lex->input + lex->input_length; +	int			hi_surrogate = -1; + +	/* Convenience macros for error exits */ +#define FAIL_AT_CHAR_START(code) \ +	do { \ +		lex->token_terminator = s; \ +		return code; \ +	} while (0) +#define FAIL_AT_CHAR_END(code) \ +	do { \ +		lex->token_terminator = \ +			s + pg_encoding_mblen_bounded(lex->input_encoding, s); \ +		return code; \ +	} while (0) + +	if (lex->strval != NULL) +		resetStringInfo(lex->strval); + +	Assert(lex->input_length > 0); +	s = lex->token_start; +	for (;;) +	{ +		s++; +		/* Premature end of the string. */ +		if (s >= end) +			FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); +		else if (*s == '"') +			break; +		else if (*s == '\\') +		{ +			/* OK, we have an escape character. */ +			s++; +			if (s >= end) +				FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); +			else if (*s == 'u') +			{ +				int			i; +				int			ch = 0; + +				for (i = 1; i <= 4; i++) +				{ +					s++; +					if (s >= end) +						FAIL_AT_CHAR_START(JSON_INVALID_TOKEN); +					else if (*s >= '0' && *s <= '9') +						ch = (ch * 16) + (*s - '0'); +					else if (*s >= 'a' && *s <= 'f') +						ch = (ch * 16) + (*s - 'a') + 10; +					else if (*s >= 'A' && *s <= 'F') +						ch = (ch * 16) + (*s - 'A') + 10; +					else +						FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT); +				} +				if (lex->strval != NULL) +				{ +					/* +					 * Combine surrogate pairs. +					 */ +					if (is_utf16_surrogate_first(ch)) +					{ +						if (hi_surrogate != -1) +							FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE); +						hi_surrogate = ch; +						continue; +					} +					else if (is_utf16_surrogate_second(ch)) +					{ +						if (hi_surrogate == -1) +							FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); +						ch = surrogate_pair_to_codepoint(hi_surrogate, ch); +						hi_surrogate = -1; +					} + +					if (hi_surrogate != -1) +						FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + +					/* +					 * Reject invalid cases.  We can't have a value above +					 * 0xFFFF here (since we only accepted 4 hex digits +					 * above), so no need to test for out-of-range chars. +					 */ +					if (ch == 0) +					{ +						/* We can't allow this, since our TEXT type doesn't */ +						FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO); +					} + +					/* +					 * Add the represented character to lex->strval.  In the +					 * backend, we can let pg_unicode_to_server_noerror() +					 * handle any required character set conversion; in +					 * frontend, we can only deal with trivial conversions. +					 */ +#ifndef FRONTEND +					{ +						char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + +						if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf)) +							FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE); +						appendStringInfoString(lex->strval, cbuf); +					} +#else +					if (lex->input_encoding == PG_UTF8) +					{ +						/* OK, we can map the code point to UTF8 easily */ +						char		utf8str[5]; +						int			utf8len; + +						unicode_to_utf8(ch, (unsigned char *) utf8str); +						utf8len = pg_utf_mblen((unsigned char *) utf8str); +						appendBinaryStringInfo(lex->strval, utf8str, utf8len); +					} +					else if (ch <= 0x007f) +					{ +						/* The ASCII range is the same in all encodings */ +						appendStringInfoChar(lex->strval, (char) ch); +					} +					else +						FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE); +#endif							/* FRONTEND */ +				} +			} +			else if (lex->strval != NULL) +			{ +				if (hi_surrogate != -1) +					FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + +				switch (*s) +				{ +					case '"': +					case '\\': +					case '/': +						appendStringInfoChar(lex->strval, *s); +						break; +					case 'b': +						appendStringInfoChar(lex->strval, '\b'); +						break; +					case 'f': +						appendStringInfoChar(lex->strval, '\f'); +						break; +					case 'n': +						appendStringInfoChar(lex->strval, '\n'); +						break; +					case 'r': +						appendStringInfoChar(lex->strval, '\r'); +						break; +					case 't': +						appendStringInfoChar(lex->strval, '\t'); +						break; +					default: + +						/* +						 * Not a valid string escape, so signal error.  We +						 * adjust token_start so that just the escape sequence +						 * is reported, not the whole string. +						 */ +						lex->token_start = s; +						FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); +				} +			} +			else if (strchr("\"\\/bfnrt", *s) == NULL) +			{ +				/* +				 * Simpler processing if we're not bothered about de-escaping +				 * +				 * It's very tempting to remove the strchr() call here and +				 * replace it with a switch statement, but testing so far has +				 * shown it's not a performance win. +				 */ +				lex->token_start = s; +				FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID); +			} +		} +		else +		{ +			char	   *p = s; + +			if (hi_surrogate != -1) +				FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); + +			/* +			 * Skip to the first byte that requires special handling, so we +			 * can batch calls to appendBinaryStringInfo. +			 */ +			while (p < end - sizeof(Vector8) && +				   !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && +				   !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) && +				   !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8))) +				p += sizeof(Vector8); + +			for (; p < end; p++) +			{ +				if (*p == '\\' || *p == '"') +					break; +				else if ((unsigned char) *p <= 31) +				{ +					/* Per RFC4627, these characters MUST be escaped. */ +					/* +					 * Since *p isn't printable, exclude it from the context +					 * string +					 */ +					lex->token_terminator = p; +					return JSON_ESCAPING_REQUIRED; +				} +			} + +			if (lex->strval != NULL) +				appendBinaryStringInfo(lex->strval, s, p - s); + +			/* +			 * s will be incremented at the top of the loop, so set it to just +			 * behind our lookahead position +			 */ +			s = p - 1; +		} +	} + +	if (hi_surrogate != -1) +	{ +		lex->token_terminator = s + 1; +		return JSON_UNICODE_LOW_SURROGATE; +	} + +	/* Hooray, we found the end of the string! */ +	lex->prev_token_terminator = lex->token_terminator; +	lex->token_terminator = s + 1; +	return JSON_SUCCESS; + +#undef FAIL_AT_CHAR_START +#undef FAIL_AT_CHAR_END +} + +/* + * The next token in the input stream is known to be a number; lex it. + * + * In JSON, a number consists of four parts: + * + * (1) An optional minus sign ('-'). + * + * (2) Either a single '0', or a string of one or more digits that does not + *	   begin with a '0'. + * + * (3) An optional decimal part, consisting of a period ('.') followed by + *	   one or more digits.  (Note: While this part can be omitted + *	   completely, it's not OK to have only the decimal point without + *	   any digits afterwards.) + * + * (4) An optional exponent part, consisting of 'e' or 'E', optionally + *	   followed by '+' or '-', followed by one or more digits.  (Note: + *	   As with the decimal part, if 'e' or 'E' is present, it must be + *	   followed by at least one digit.) + * + * The 's' argument to this function points to the ostensible beginning + * of part 2 - i.e. the character after any optional minus sign, or the + * first character of the string if there is none. + * + * If num_err is not NULL, we return an error flag to *num_err rather than + * raising an error for a badly-formed number.  Also, if total_len is not NULL + * the distance from lex->input to the token end+1 is returned to *total_len. + */ +static inline JsonParseErrorType +json_lex_number(JsonLexContext *lex, char *s, +				bool *num_err, int *total_len) +{ +	bool		error = false; +	int			len = s - lex->input; + +	/* Part (1): leading sign indicator. */ +	/* Caller already did this for us; so do nothing. */ + +	/* Part (2): parse main digit string. */ +	if (len < lex->input_length && *s == '0') +	{ +		s++; +		len++; +	} +	else if (len < lex->input_length && *s >= '1' && *s <= '9') +	{ +		do +		{ +			s++; +			len++; +		} while (len < lex->input_length && *s >= '0' && *s <= '9'); +	} +	else +		error = true; + +	/* Part (3): parse optional decimal portion. */ +	if (len < lex->input_length && *s == '.') +	{ +		s++; +		len++; +		if (len == lex->input_length || *s < '0' || *s > '9') +			error = true; +		else +		{ +			do +			{ +				s++; +				len++; +			} while (len < lex->input_length && *s >= '0' && *s <= '9'); +		} +	} + +	/* Part (4): parse optional exponent. */ +	if (len < lex->input_length && (*s == 'e' || *s == 'E')) +	{ +		s++; +		len++; +		if (len < lex->input_length && (*s == '+' || *s == '-')) +		{ +			s++; +			len++; +		} +		if (len == lex->input_length || *s < '0' || *s > '9') +			error = true; +		else +		{ +			do +			{ +				s++; +				len++; +			} while (len < lex->input_length && *s >= '0' && *s <= '9'); +		} +	} + +	/* +	 * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff +	 * here should be considered part of the token for error-reporting +	 * purposes. +	 */ +	for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++) +		error = true; + +	if (total_len != NULL) +		*total_len = len; + +	if (num_err != NULL) +	{ +		/* let the caller handle any error */ +		*num_err = error; +	} +	else +	{ +		/* return token endpoint */ +		lex->prev_token_terminator = lex->token_terminator; +		lex->token_terminator = s; +		/* handle error if any */ +		if (error) +			return JSON_INVALID_TOKEN; +	} + +	return JSON_SUCCESS; +} + +/* + * Report a parse error. + * + * lex->token_start and lex->token_terminator must identify the current token. + */ +static JsonParseErrorType +report_parse_error(JsonParseContext ctx, JsonLexContext *lex) +{ +	/* Handle case where the input ended prematurely. */ +	if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END) +		return JSON_EXPECTED_MORE; + +	/* Otherwise choose the error type based on the parsing context. */ +	switch (ctx) +	{ +		case JSON_PARSE_END: +			return JSON_EXPECTED_END; +		case JSON_PARSE_VALUE: +			return JSON_EXPECTED_JSON; +		case JSON_PARSE_STRING: +			return JSON_EXPECTED_STRING; +		case JSON_PARSE_ARRAY_START: +			return JSON_EXPECTED_ARRAY_FIRST; +		case JSON_PARSE_ARRAY_NEXT: +			return JSON_EXPECTED_ARRAY_NEXT; +		case JSON_PARSE_OBJECT_START: +			return JSON_EXPECTED_OBJECT_FIRST; +		case JSON_PARSE_OBJECT_LABEL: +			return JSON_EXPECTED_COLON; +		case JSON_PARSE_OBJECT_NEXT: +			return JSON_EXPECTED_OBJECT_NEXT; +		case JSON_PARSE_OBJECT_COMMA: +			return JSON_EXPECTED_STRING; +	} + +	/* +	 * We don't use a default: case, so that the compiler will warn about +	 * unhandled enum values. +	 */ +	Assert(false); +	return JSON_SUCCESS;		/* silence stupider compilers */ +} + + +#ifndef FRONTEND +/* + * Extract the current token from a lexing context, for error reporting. + */ +static char * +extract_token(JsonLexContext *lex) +{ +	int			toklen = lex->token_terminator - lex->token_start; +	char	   *token = palloc(toklen + 1); + +	memcpy(token, lex->token_start, toklen); +	token[toklen] = '\0'; +	return token; +} + +/* + * Construct an (already translated) detail message for a JSON error. + * + * Note that the error message generated by this routine may not be + * palloc'd, making it unsafe for frontend code as there is no way to + * know if this can be safely pfree'd or not. + */ +char * +json_errdetail(JsonParseErrorType error, JsonLexContext *lex) +{ +	switch (error) +	{ +		case JSON_SUCCESS: +			/* fall through to the error code after switch */ +			break; +		case JSON_ESCAPING_INVALID: +			return psprintf(_("Escape sequence \"\\%s\" is invalid."), +							extract_token(lex)); +		case JSON_ESCAPING_REQUIRED: +			return psprintf(_("Character with value 0x%02x must be escaped."), +							(unsigned char) *(lex->token_terminator)); +		case JSON_EXPECTED_END: +			return psprintf(_("Expected end of input, but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_ARRAY_FIRST: +			return psprintf(_("Expected array element or \"]\", but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_ARRAY_NEXT: +			return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_COLON: +			return psprintf(_("Expected \":\", but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_JSON: +			return psprintf(_("Expected JSON value, but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_MORE: +			return _("The input string ended unexpectedly."); +		case JSON_EXPECTED_OBJECT_FIRST: +			return psprintf(_("Expected string or \"}\", but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_OBJECT_NEXT: +			return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."), +							extract_token(lex)); +		case JSON_EXPECTED_STRING: +			return psprintf(_("Expected string, but found \"%s\"."), +							extract_token(lex)); +		case JSON_INVALID_TOKEN: +			return psprintf(_("Token \"%s\" is invalid."), +							extract_token(lex)); +		case JSON_UNICODE_CODE_POINT_ZERO: +			return _("\\u0000 cannot be converted to text."); +		case JSON_UNICODE_ESCAPE_FORMAT: +			return _("\"\\u\" must be followed by four hexadecimal digits."); +		case JSON_UNICODE_HIGH_ESCAPE: +			/* note: this case is only reachable in frontend not backend */ +			return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8."); +		case JSON_UNICODE_UNTRANSLATABLE: +			/* note: this case is only reachable in backend not frontend */ +			return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."), +							GetDatabaseEncodingName()); +		case JSON_UNICODE_HIGH_SURROGATE: +			return _("Unicode high surrogate must not follow a high surrogate."); +		case JSON_UNICODE_LOW_SURROGATE: +			return _("Unicode low surrogate must follow a high surrogate."); +		case JSON_SEM_ACTION_FAILED: +			/* fall through to the error code after switch */ +			break; +	} + +	/* +	 * We don't use a default: case, so that the compiler will warn about +	 * unhandled enum values.  But this needs to be here anyway to cover the +	 * possibility of an incorrect input. +	 */ +	elog(ERROR, "unexpected json parse error type: %d", (int) error); +	return NULL; +} +#endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/keywords.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/keywords.c new file mode 100644 index 00000000000..b72f0d554ff --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/keywords.c @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * keywords.c + *	  PostgreSQL's list of SQL keywords + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/keywords.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/keywords.h" + + +/* ScanKeywordList lookup data for SQL keywords */ + +#include "kwlist_d.h" + +/* Keyword categories for SQL keywords */ + +#define PG_KEYWORD(kwname, value, category, collabel) category, + +const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = { +#include "parser/kwlist.h" +}; + +#undef PG_KEYWORD + +/* Keyword can-be-bare-label flags for SQL keywords */ + +#define PG_KEYWORD(kwname, value, category, collabel) collabel, + +#define BARE_LABEL true +#define AS_LABEL false + +const bool	ScanKeywordBareLabel[SCANKEYWORDS_NUM_KEYWORDS] = { +#include "parser/kwlist.h" +}; + +#undef PG_KEYWORD +#undef BARE_LABEL +#undef AS_LABEL diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlist_d.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlist_d.h new file mode 100644 index 00000000000..e8af2602376 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlist_d.h @@ -0,0 +1,1119 @@ +/*------------------------------------------------------------------------- + * + * kwlist_d.h + *    List of keywords represented as a ScanKeywordList. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES + *  ****************************** + *  *** DO NOT EDIT THIS FILE! *** + *  ****************************** + * + *  It has been GENERATED by src/tools/gen_keywordlist.pl + * + *------------------------------------------------------------------------- + */ + +#ifndef KWLIST_D_H +#define KWLIST_D_H + +#include "common/kwlookup.h" + +static const char ScanKeywords_kw_string[] = +	"abort\0" +	"absent\0" +	"absolute\0" +	"access\0" +	"action\0" +	"add\0" +	"admin\0" +	"after\0" +	"aggregate\0" +	"all\0" +	"also\0" +	"alter\0" +	"always\0" +	"analyse\0" +	"analyze\0" +	"and\0" +	"any\0" +	"array\0" +	"as\0" +	"asc\0" +	"asensitive\0" +	"assertion\0" +	"assignment\0" +	"asymmetric\0" +	"at\0" +	"atomic\0" +	"attach\0" +	"attribute\0" +	"authorization\0" +	"backward\0" +	"before\0" +	"begin\0" +	"between\0" +	"bigint\0" +	"binary\0" +	"bit\0" +	"boolean\0" +	"both\0" +	"breadth\0" +	"by\0" +	"cache\0" +	"call\0" +	"called\0" +	"cascade\0" +	"cascaded\0" +	"case\0" +	"cast\0" +	"catalog\0" +	"chain\0" +	"char\0" +	"character\0" +	"characteristics\0" +	"check\0" +	"checkpoint\0" +	"class\0" +	"close\0" +	"cluster\0" +	"coalesce\0" +	"collate\0" +	"collation\0" +	"column\0" +	"columns\0" +	"comment\0" +	"comments\0" +	"commit\0" +	"committed\0" +	"compression\0" +	"concurrently\0" +	"configuration\0" +	"conflict\0" +	"connection\0" +	"constraint\0" +	"constraints\0" +	"content\0" +	"continue\0" +	"conversion\0" +	"copy\0" +	"cost\0" +	"create\0" +	"cross\0" +	"csv\0" +	"cube\0" +	"current\0" +	"current_catalog\0" +	"current_date\0" +	"current_role\0" +	"current_schema\0" +	"current_time\0" +	"current_timestamp\0" +	"current_user\0" +	"cursor\0" +	"cycle\0" +	"data\0" +	"database\0" +	"day\0" +	"deallocate\0" +	"dec\0" +	"decimal\0" +	"declare\0" +	"default\0" +	"defaults\0" +	"deferrable\0" +	"deferred\0" +	"definer\0" +	"delete\0" +	"delimiter\0" +	"delimiters\0" +	"depends\0" +	"depth\0" +	"desc\0" +	"detach\0" +	"dictionary\0" +	"disable\0" +	"discard\0" +	"distinct\0" +	"do\0" +	"document\0" +	"domain\0" +	"double\0" +	"drop\0" +	"each\0" +	"else\0" +	"enable\0" +	"encoding\0" +	"encrypted\0" +	"end\0" +	"enum\0" +	"escape\0" +	"event\0" +	"except\0" +	"exclude\0" +	"excluding\0" +	"exclusive\0" +	"execute\0" +	"exists\0" +	"explain\0" +	"expression\0" +	"extension\0" +	"external\0" +	"extract\0" +	"false\0" +	"family\0" +	"fetch\0" +	"filter\0" +	"finalize\0" +	"first\0" +	"float\0" +	"following\0" +	"for\0" +	"force\0" +	"foreign\0" +	"format\0" +	"forward\0" +	"freeze\0" +	"from\0" +	"full\0" +	"function\0" +	"functions\0" +	"generated\0" +	"global\0" +	"grant\0" +	"granted\0" +	"greatest\0" +	"group\0" +	"grouping\0" +	"groups\0" +	"handler\0" +	"having\0" +	"header\0" +	"hold\0" +	"hour\0" +	"identity\0" +	"if\0" +	"ilike\0" +	"immediate\0" +	"immutable\0" +	"implicit\0" +	"import\0" +	"in\0" +	"include\0" +	"including\0" +	"increment\0" +	"indent\0" +	"index\0" +	"indexes\0" +	"inherit\0" +	"inherits\0" +	"initially\0" +	"inline\0" +	"inner\0" +	"inout\0" +	"input\0" +	"insensitive\0" +	"insert\0" +	"instead\0" +	"int\0" +	"integer\0" +	"intersect\0" +	"interval\0" +	"into\0" +	"invoker\0" +	"is\0" +	"isnull\0" +	"isolation\0" +	"join\0" +	"json\0" +	"json_array\0" +	"json_arrayagg\0" +	"json_object\0" +	"json_objectagg\0" +	"key\0" +	"keys\0" +	"label\0" +	"language\0" +	"large\0" +	"last\0" +	"lateral\0" +	"leading\0" +	"leakproof\0" +	"least\0" +	"left\0" +	"level\0" +	"like\0" +	"limit\0" +	"listen\0" +	"load\0" +	"local\0" +	"localtime\0" +	"localtimestamp\0" +	"location\0" +	"lock\0" +	"locked\0" +	"logged\0" +	"mapping\0" +	"match\0" +	"matched\0" +	"materialized\0" +	"maxvalue\0" +	"merge\0" +	"method\0" +	"minute\0" +	"minvalue\0" +	"mode\0" +	"month\0" +	"move\0" +	"name\0" +	"names\0" +	"national\0" +	"natural\0" +	"nchar\0" +	"new\0" +	"next\0" +	"nfc\0" +	"nfd\0" +	"nfkc\0" +	"nfkd\0" +	"no\0" +	"none\0" +	"normalize\0" +	"normalized\0" +	"not\0" +	"nothing\0" +	"notify\0" +	"notnull\0" +	"nowait\0" +	"null\0" +	"nullif\0" +	"nulls\0" +	"numeric\0" +	"object\0" +	"of\0" +	"off\0" +	"offset\0" +	"oids\0" +	"old\0" +	"on\0" +	"only\0" +	"operator\0" +	"option\0" +	"options\0" +	"or\0" +	"order\0" +	"ordinality\0" +	"others\0" +	"out\0" +	"outer\0" +	"over\0" +	"overlaps\0" +	"overlay\0" +	"overriding\0" +	"owned\0" +	"owner\0" +	"parallel\0" +	"parameter\0" +	"parser\0" +	"partial\0" +	"partition\0" +	"passing\0" +	"password\0" +	"placing\0" +	"plans\0" +	"policy\0" +	"position\0" +	"preceding\0" +	"precision\0" +	"prepare\0" +	"prepared\0" +	"preserve\0" +	"primary\0" +	"prior\0" +	"privileges\0" +	"procedural\0" +	"procedure\0" +	"procedures\0" +	"program\0" +	"publication\0" +	"quote\0" +	"range\0" +	"read\0" +	"real\0" +	"reassign\0" +	"recheck\0" +	"recursive\0" +	"ref\0" +	"references\0" +	"referencing\0" +	"refresh\0" +	"reindex\0" +	"relative\0" +	"release\0" +	"rename\0" +	"repeatable\0" +	"replace\0" +	"replica\0" +	"reset\0" +	"restart\0" +	"restrict\0" +	"return\0" +	"returning\0" +	"returns\0" +	"revoke\0" +	"right\0" +	"role\0" +	"rollback\0" +	"rollup\0" +	"routine\0" +	"routines\0" +	"row\0" +	"rows\0" +	"rule\0" +	"savepoint\0" +	"scalar\0" +	"schema\0" +	"schemas\0" +	"scroll\0" +	"search\0" +	"second\0" +	"security\0" +	"select\0" +	"sequence\0" +	"sequences\0" +	"serializable\0" +	"server\0" +	"session\0" +	"session_user\0" +	"set\0" +	"setof\0" +	"sets\0" +	"share\0" +	"show\0" +	"similar\0" +	"simple\0" +	"skip\0" +	"smallint\0" +	"snapshot\0" +	"some\0" +	"sql\0" +	"stable\0" +	"standalone\0" +	"start\0" +	"statement\0" +	"statistics\0" +	"stdin\0" +	"stdout\0" +	"storage\0" +	"stored\0" +	"strict\0" +	"strip\0" +	"subscription\0" +	"substring\0" +	"support\0" +	"symmetric\0" +	"sysid\0" +	"system\0" +	"system_user\0" +	"table\0" +	"tables\0" +	"tablesample\0" +	"tablespace\0" +	"temp\0" +	"template\0" +	"temporary\0" +	"text\0" +	"then\0" +	"ties\0" +	"time\0" +	"timestamp\0" +	"to\0" +	"trailing\0" +	"transaction\0" +	"transform\0" +	"treat\0" +	"trigger\0" +	"trim\0" +	"true\0" +	"truncate\0" +	"trusted\0" +	"type\0" +	"types\0" +	"uescape\0" +	"unbounded\0" +	"uncommitted\0" +	"unencrypted\0" +	"union\0" +	"unique\0" +	"unknown\0" +	"unlisten\0" +	"unlogged\0" +	"until\0" +	"update\0" +	"user\0" +	"using\0" +	"vacuum\0" +	"valid\0" +	"validate\0" +	"validator\0" +	"value\0" +	"values\0" +	"varchar\0" +	"variadic\0" +	"varying\0" +	"verbose\0" +	"version\0" +	"view\0" +	"views\0" +	"volatile\0" +	"when\0" +	"where\0" +	"whitespace\0" +	"window\0" +	"with\0" +	"within\0" +	"without\0" +	"work\0" +	"wrapper\0" +	"write\0" +	"xml\0" +	"xmlattributes\0" +	"xmlconcat\0" +	"xmlelement\0" +	"xmlexists\0" +	"xmlforest\0" +	"xmlnamespaces\0" +	"xmlparse\0" +	"xmlpi\0" +	"xmlroot\0" +	"xmlserialize\0" +	"xmltable\0" +	"year\0" +	"yes\0" +	"zone"; + +static const uint16 ScanKeywords_kw_offsets[] = { +	0, +	6, +	13, +	22, +	29, +	36, +	40, +	46, +	52, +	62, +	66, +	71, +	77, +	84, +	92, +	100, +	104, +	108, +	114, +	117, +	121, +	132, +	142, +	153, +	164, +	167, +	174, +	181, +	191, +	205, +	214, +	221, +	227, +	235, +	242, +	249, +	253, +	261, +	266, +	274, +	277, +	283, +	288, +	295, +	303, +	312, +	317, +	322, +	330, +	336, +	341, +	351, +	367, +	373, +	384, +	390, +	396, +	404, +	413, +	421, +	431, +	438, +	446, +	454, +	463, +	470, +	480, +	492, +	505, +	519, +	528, +	539, +	550, +	562, +	570, +	579, +	590, +	595, +	600, +	607, +	613, +	617, +	622, +	630, +	646, +	659, +	672, +	687, +	700, +	718, +	731, +	738, +	744, +	749, +	758, +	762, +	773, +	777, +	785, +	793, +	801, +	810, +	821, +	830, +	838, +	845, +	855, +	866, +	874, +	880, +	885, +	892, +	903, +	911, +	919, +	928, +	931, +	940, +	947, +	954, +	959, +	964, +	969, +	976, +	985, +	995, +	999, +	1004, +	1011, +	1017, +	1024, +	1032, +	1042, +	1052, +	1060, +	1067, +	1075, +	1086, +	1096, +	1105, +	1113, +	1119, +	1126, +	1132, +	1139, +	1148, +	1154, +	1160, +	1170, +	1174, +	1180, +	1188, +	1195, +	1203, +	1210, +	1215, +	1220, +	1229, +	1239, +	1249, +	1256, +	1262, +	1270, +	1279, +	1285, +	1294, +	1301, +	1309, +	1316, +	1323, +	1328, +	1333, +	1342, +	1345, +	1351, +	1361, +	1371, +	1380, +	1387, +	1390, +	1398, +	1408, +	1418, +	1425, +	1431, +	1439, +	1447, +	1456, +	1466, +	1473, +	1479, +	1485, +	1491, +	1503, +	1510, +	1518, +	1522, +	1530, +	1540, +	1549, +	1554, +	1562, +	1565, +	1572, +	1582, +	1587, +	1592, +	1603, +	1617, +	1629, +	1644, +	1648, +	1653, +	1659, +	1668, +	1674, +	1679, +	1687, +	1695, +	1705, +	1711, +	1716, +	1722, +	1727, +	1733, +	1740, +	1745, +	1751, +	1761, +	1776, +	1785, +	1790, +	1797, +	1804, +	1812, +	1818, +	1826, +	1839, +	1848, +	1854, +	1861, +	1868, +	1877, +	1882, +	1888, +	1893, +	1898, +	1904, +	1913, +	1921, +	1927, +	1931, +	1936, +	1940, +	1944, +	1949, +	1954, +	1957, +	1962, +	1972, +	1983, +	1987, +	1995, +	2002, +	2010, +	2017, +	2022, +	2029, +	2035, +	2043, +	2050, +	2053, +	2057, +	2064, +	2069, +	2073, +	2076, +	2081, +	2090, +	2097, +	2105, +	2108, +	2114, +	2125, +	2132, +	2136, +	2142, +	2147, +	2156, +	2164, +	2175, +	2181, +	2187, +	2196, +	2206, +	2213, +	2221, +	2231, +	2239, +	2248, +	2256, +	2262, +	2269, +	2278, +	2288, +	2298, +	2306, +	2315, +	2324, +	2332, +	2338, +	2349, +	2360, +	2370, +	2381, +	2389, +	2401, +	2407, +	2413, +	2418, +	2423, +	2432, +	2440, +	2450, +	2454, +	2465, +	2477, +	2485, +	2493, +	2502, +	2510, +	2517, +	2528, +	2536, +	2544, +	2550, +	2558, +	2567, +	2574, +	2584, +	2592, +	2599, +	2605, +	2610, +	2619, +	2626, +	2634, +	2643, +	2647, +	2652, +	2657, +	2667, +	2674, +	2681, +	2689, +	2696, +	2703, +	2710, +	2719, +	2726, +	2735, +	2745, +	2758, +	2765, +	2773, +	2786, +	2790, +	2796, +	2801, +	2807, +	2812, +	2820, +	2827, +	2832, +	2841, +	2850, +	2855, +	2859, +	2866, +	2877, +	2883, +	2893, +	2904, +	2910, +	2917, +	2925, +	2932, +	2939, +	2945, +	2958, +	2968, +	2976, +	2986, +	2992, +	2999, +	3011, +	3017, +	3024, +	3036, +	3047, +	3052, +	3061, +	3071, +	3076, +	3081, +	3086, +	3091, +	3101, +	3104, +	3113, +	3125, +	3135, +	3141, +	3149, +	3154, +	3159, +	3168, +	3176, +	3181, +	3187, +	3195, +	3205, +	3217, +	3229, +	3235, +	3242, +	3250, +	3259, +	3268, +	3274, +	3281, +	3286, +	3292, +	3299, +	3305, +	3314, +	3324, +	3330, +	3337, +	3345, +	3354, +	3362, +	3370, +	3378, +	3383, +	3389, +	3398, +	3403, +	3409, +	3420, +	3427, +	3432, +	3439, +	3447, +	3452, +	3460, +	3466, +	3470, +	3484, +	3494, +	3505, +	3515, +	3525, +	3539, +	3548, +	3554, +	3562, +	3575, +	3584, +	3589, +	3593, +}; + +#define SCANKEYWORDS_NUM_KEYWORDS 471 + +static int +ScanKeywords_hash_func(const void *key, size_t keylen) +{ +	static const int16 h[943] = { +		543,   -186,  201,   0,     32767, 32767, 32767, 32767, +		221,   -207,  32767, 0,     135,   283,   32767, 454, +		14,    79,    32767, 32767, 77,    32767, 102,   160, +		0,     32767, 151,   32767, 30,    392,   -322,  452, +		32767, 0,     32767, 0,     0,     32767, 32767, 32767, +		234,   32767, 0,     32767, 0,     631,   32767, 368, +		80,    0,     0,     -115,  32767, 285,   32767, 423, +		0,     32767, 155,   229,   32767, 126,   291,   165, +		-22,   400,   327,   32767, 32767, 32767, 32767, -399, +		0,     406,   32767, 210,   1102,  -203,  32767, 32767, +		32767, -944,  0,     -188,  32767, 32767, 0,     347, +		32767, 0,     559,   316,   133,   32767, 202,   32767, +		305,   0,     32767, -94,   32767, 0,     32767, -222, +		32767, 138,   32767, -52,   32767, 32767, 279,   69, +		-136,  0,     32767, 32767, 189,   32767, 32767, 88, +		0,     32767, 32767, 274,   32767, 514,   769,   248, +		32767, 32767, 32767, 32767, 32767, 32767, 0,     81, +		8,     -29,   32767, 32767, 32767, -174,  258,   0, +		465,   211,   32767, 0,     -229,  32767, -191,  32767, +		1263,  48,    32767, 343,   0,     58,    0,     32767, +		32767, 855,   0,     415,   0,     -217,  32767, 1195, +		32767, 32767, 166,   32767, 42,    262,   -736,  0, +		32767, 32767, 418,   178,   122,   32767, 46,    32767, +		32767, 32767, 229,   443,   32767, 32767, 250,   32767, +		-300,  0,     32767, 1153,  32767, 108,   32767, -462, +		266,   32767, 478,   -220,  235,   32767, 32767, -127, +		32767, 32767, 32767, 427,   -231,  156,   32767, 0, +		0,     148,   -218,  142,   73,    420,   32767, 32767, +		523,   32767, -36,   32767, 32767, 467,   844,   -415, +		32767, 32767, -148,  179,   361,   32767, 151,   0, +		0,     32767, 145,   32767, 248,   110,   29,    125, +		282,   32767, -36,   43,    32767, 1125,  32767, 530, +		251,   519,   191,   0,     32767, -34,   -502,  313, +		462,   845,   32767, 32767, -255,  412,   32767, 78, +		0,     32767, 444,   161,   0,     32767, 308,   32767, +		-273,  400,   32767, 296,   32767, 32767, 72,    32767, +		32767, 34,    32767, 364,   151,   -63,   4,     229, +		0,     -276,  32767, 32767, 32767, 32767, -406,  32767, +		203,   32767, 140,   187,   160,   32767, 286,   0, +		32767, 32767, -88,   0,     100,   -361,  32767, 9, +		0,     -456,  32767, -37,   -404,  32767, -969,  32767, +		371,   95,    0,     703,   -31,   263,   373,   -745, +		507,   14,    32767, -159,  0,     32767, 47,    299, +		-126,  0,     32767, 83,    32767, 32767, 420,   236, +		32767, 32767, 0,     310,   89,    233,   32767, 93, +		32767, 0,     816,   60,    301,   211,   193,   0, +		452,   -107,  -403,  -242,  353,   18,    32767, 32767, +		32767, 243,   104,   32767, 32767, 32767, -305,  32767, +		-1048, 54,    0,     383,   32767, 32767, 32767, 226, +		319,   0,     32767, 32767, 32767, -130,  537,   32767, +		0,     -206,  240,   696,   121,   32767, 180,   164, +		32767, 390,   185,   32767, 220,   545,   29,    32767, +		0,     32767, 32767, 1120,  -163,  32767, 32767, 32767, +		-368,  136,   445,   171,   233,   32767, 73,    32767, +		92,    32767, 0,     32767, 0,     208,   354,   32767, +		54,    32767, 32767, -246,  -93,   389,   32767, 32767, +		32767, 32767, 50,    32767, 32767, 308,   32767, -278, +		0,     32767, 32767, -1172, 32767, 8,     32767, 0, +		32767, 341,   304,   242,   -174,  -92,   76,    419, +		32767, 87,    32767, -262,  32767, 32767, 32767, 109, +		200,   0,     32767, 0,     85,    530,   32767, -316, +		32767, 0,     -286,  32767, 193,   268,   32767, 32767, +		278,   32767, 32767, 155,   445,   95,    -310,  32767, +		207,   -56,   32767, 32767, 0,     -127,  232,   -283, +		103,   32767, 1,     0,     32767, 32767, -485,  350, +		79,    -56,   -354,  32767, 121,   24,    81,    20, +		325,   40,    248,   32767, 32767, 32767, 358,   32767, +		-56,   32767, 0,     174,   -28,   -301,  -92,   32767, +		114,   295,   32767, 363,   -355,  32767, 290,   0, +		32767, 32767, 32767, 122,   55,    -142,  32767, 50, +		32767, 32767, 152,   571,   1397,  0,     472,   -448, +		185,   140,   228,   435,   0,     32767, 32767, 414, +		32767, 379,   92,    185,   23,    299,   32767, 32767, +		0,     32767, 32767, 32767, 306,   439,   -198,  219, +		340,   32767, 416,   0,     -123,  377,   32767, 32767, +		0,     32767, 670,   -670,  339,   32767, 32767, 32767, +		0,     -256,  70,    514,   331,   0,     302,   469, +		0,     370,   32767, 32767, 42,    255,   212,   0, +		322,   277,   32767, -163,  32767, 216,   32767, 32767, +		0,     32767, 190,   32767, 32767, 0,     32767, 0, +		-409,  1366,  32767, 32767, 32767, 193,   32767, 325, +		32767, 0,     142,   466,   32767, 32767, 32767, 113, +		32767, 32767, 62,    0,     -62,   113,   -90,   34, +		-256,  32767, 32767, -936,  32767, 32767, 32767, 0, +		-64,   0,     -34,   451,   290,   108,   32767, 276, +		842,   0,     556,   -153,  32767, 412,   -168,  32767, +		32767, 1331,  407,   234,   -60,   115,   457,   -73, +		502,   772,   32767, 33,    404,   -925,  32767, 32767, +		421,   -123,  32767, 32767, 32767, 0,     0,     32767, +		32767, 32767, 429,   0,     3,     769,   -81,   306, +		64,    32767, 192,   96,    0,     63,    44,    32767, +		32767, 32767, 32767, 0,     284,   32767, 575,   32767, +		32767, 12,    32767, 516,   116,   32767, 32767, 150, +		442,   134,   32767, 198,   -45,   249,   40,    373, +		32767, 0,     32767, 32767, 0,     0,     352,   32767, +		117,   32767, 426,   0,     0,     32767, 32767, 32767, +		32767, -92,   32767, -442,  32767, 269,   32767, 32767, +		32767, 429,   32767, 0,     32767, 0,     143,   32767, +		508,   -66,   32767, 280,   32767, 39,    162,   32767, +		32767, 0,     32767, 31,    32767, 32767, 32767, 0, +		32767, 257,   -90,   -249,  224,   272,   32767, 32767, +		313,   -467,  214,   0,     -85,   32767, 48,    0, +		32767, -336,  202,   0,     447,   90,    264,   32767, +		32767, 0,     101,   32767, 32767, 32767, 0,     32767, +		32767, 227,   -1093, 32767, 0,     32767, 27,    174, +		32767, 7,     32767, -621,  146,   32767, 32767, 32767, +		854,   0,     32767, 161,   0,     137,   32767, 32767, +		32767, 32767, 0,     391,   219,   276,   32767, 168, +		32767, 32767, 0,     32767, 32767, 32767, 1,     -4, +		32767, 0,     293,   0,     374,   256,   0,     0, +		32767, 355,   212,   404,   0,     186,   32767, 0, +		359,   32767, 32767, 172,   32767, 32767, -131,  0, +		402,   0,     56,    32767, 462,   389,   82,    0, +		32767, 0,     32767, 0,     32767, 32767, 32767, 32767, +		106,   425,   -160,  31,    32767, 55,    0,     0, +		32767, 32767, 430,   1224,  179,   -179,  0,     397, +		32767, 0,     0,     0,     -60,   47,    32767, 396, +		32767, 326,   383,   369,   32767, 368,   32767 +	}; + +	const unsigned char *k = (const unsigned char *) key; +	uint32		a = 0; +	uint32		b = 0; + +	while (keylen--) +	{ +		unsigned char c = *k++ | 0x20; + +		a = a * 257 + c; +		b = b * 31 + c; +	} +	return h[a % 943] + h[b % 943]; +} + +const ScanKeywordList ScanKeywords = { +	ScanKeywords_kw_string, +	ScanKeywords_kw_offsets, +	ScanKeywords_hash_func, +	SCANKEYWORDS_NUM_KEYWORDS, +	17 +}; + +#endif							/* KWLIST_D_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlookup.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlookup.c new file mode 100644 index 00000000000..7e49825c7be --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/kwlookup.c @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * kwlookup.c + *	  Key word lookup for PostgreSQL + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/kwlookup.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/kwlookup.h" + + +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * The list of keywords to be matched against is passed as a ScanKeywordList. + * + * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. + * Callers typically use the keyword number to index into information + * arrays, but that is no concern of this code. + * + * The match is done case-insensitively.  Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations.  This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ +int +ScanKeywordLookup(const char *str, +				  const ScanKeywordList *keywords) +{ +	size_t		len; +	int			h; +	const char *kw; + +	/* +	 * Reject immediately if too long to be any keyword.  This saves useless +	 * hashing and downcasing work on long strings. +	 */ +	len = strlen(str); +	if (len > keywords->max_kw_len) +		return -1; + +	/* +	 * Compute the hash function.  We assume it was generated to produce +	 * case-insensitive results.  Since it's a perfect hash, we need only +	 * match to the specific keyword it identifies. +	 */ +	h = keywords->hash(str, len); + +	/* An out-of-range result implies no match */ +	if (h < 0 || h >= keywords->num_keywords) +		return -1; + +	/* +	 * Compare character-by-character to see if we have a match, applying an +	 * ASCII-only downcasing to the input characters.  We must not use +	 * tolower() since it may produce the wrong translation in some locales +	 * (eg, Turkish). +	 */ +	kw = GetScanKeyword(h, keywords); +	while (*str != '\0') +	{ +		char		ch = *str++; + +		if (ch >= 'A' && ch <= 'Z') +			ch += 'a' - 'A'; +		if (ch != *kw++) +			return -1; +	} +	if (*kw != '\0') +		return -1; + +	/* Success! */ +	return h; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/link-canary.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/link-canary.c new file mode 100644 index 00000000000..f84331a9a47 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/link-canary.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * link-canary.c + *	  Detect whether src/common functions came from frontend or backend. + * + * Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/link-canary.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/link-canary.h" + +/* + * This function just reports whether this file was compiled for frontend + * or backend environment.  We need this because in some systems, mainly + * ELF-based platforms, it is possible for a shlib (such as libpq) loaded + * into the backend to call a backend function named XYZ in preference to + * the shlib's own function XYZ.  That's bad if the two functions don't + * act identically.  This exact situation comes up for many functions in + * src/common and src/port, where the same function names exist in both + * libpq and the backend but they don't act quite identically.  To verify + * that appropriate measures have been taken to prevent incorrect symbol + * resolution, libpq should test that this function returns true. + */ +bool +pg_link_canary_is_frontend(void) +{ +#ifdef FRONTEND +	return true; +#else +	return false; +#endif +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_common.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_common.c new file mode 100644 index 00000000000..79f14ae23b9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_common.c @@ -0,0 +1,172 @@ +/*------------------------------------------------------------------------- + * + * md5_common.c + *	  Routines shared between all MD5 implementations used for encrypted + *	  passwords. + * + * Sverre H. Huseby <[email protected]> + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/md5_common.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/cryptohash.h" +#include "common/md5.h" + +static void +bytesToHex(uint8 b[16], char *s) +{ +	static __thread const char *hex = "0123456789abcdef"; +	int			q, +				w; + +	for (q = 0, w = 0; q < 16; q++) +	{ +		s[w++] = hex[(b[q] >> 4) & 0x0F]; +		s[w++] = hex[b[q] & 0x0F]; +	} +	s[w] = '\0'; +} + +/* + *	pg_md5_hash + * + *	Calculates the MD5 sum of the bytes in a buffer. + * + *	SYNOPSIS	  #include "md5.h" + *				  int pg_md5_hash(const void *buff, size_t len, char *hexsum) + * + *	INPUT		  buff	  the buffer containing the bytes that you want + *						  the MD5 sum of. + *				  len	  number of bytes in the buffer. + * + *	OUTPUT		  hexsum  the MD5 sum as a '\0'-terminated string of + *						  hexadecimal digits.  an MD5 sum is 16 bytes long. + *						  each byte is represented by two hexadecimal + *						  characters.  you thus need to provide an array + *						  of 33 characters, including the trailing '\0'. + * + *				  errstr  filled with a constant-string error message + *						  on failure return; NULL on success. + * + *	RETURNS		  false on failure (out of memory for internal buffers + *				  or MD5 computation failure) or true on success. + * + *	STANDARDS	  MD5 is described in RFC 1321. + * + *	AUTHOR		  Sverre H. Huseby <[email protected]> + * + */ + +bool +pg_md5_hash(const void *buff, size_t len, char *hexsum, const char **errstr) +{ +	uint8		sum[MD5_DIGEST_LENGTH]; +	pg_cryptohash_ctx *ctx; + +	*errstr = NULL; +	ctx = pg_cryptohash_create(PG_MD5); +	if (ctx == NULL) +	{ +		*errstr = pg_cryptohash_error(NULL);	/* returns OOM */ +		return false; +	} + +	if (pg_cryptohash_init(ctx) < 0 || +		pg_cryptohash_update(ctx, buff, len) < 0 || +		pg_cryptohash_final(ctx, sum, sizeof(sum)) < 0) +	{ +		*errstr = pg_cryptohash_error(ctx); +		pg_cryptohash_free(ctx); +		return false; +	} + +	bytesToHex(sum, hexsum); +	pg_cryptohash_free(ctx); +	return true; +} + +/* + * pg_md5_binary + * + * As above, except that the MD5 digest is returned as a binary string + * (of size MD5_DIGEST_LENGTH) rather than being converted to ASCII hex. + */ +bool +pg_md5_binary(const void *buff, size_t len, void *outbuf, const char **errstr) +{ +	pg_cryptohash_ctx *ctx; + +	*errstr = NULL; +	ctx = pg_cryptohash_create(PG_MD5); +	if (ctx == NULL) +	{ +		*errstr = pg_cryptohash_error(NULL);	/* returns OOM */ +		return false; +	} + +	if (pg_cryptohash_init(ctx) < 0 || +		pg_cryptohash_update(ctx, buff, len) < 0 || +		pg_cryptohash_final(ctx, outbuf, MD5_DIGEST_LENGTH) < 0) +	{ +		*errstr = pg_cryptohash_error(ctx); +		pg_cryptohash_free(ctx); +		return false; +	} + +	pg_cryptohash_free(ctx); +	return true; +} + + +/* + * Computes MD5 checksum of "passwd" (a null-terminated string) followed + * by "salt" (which need not be null-terminated). + * + * Output format is "md5" followed by a 32-hex-digit MD5 checksum. + * Hence, the output buffer "buf" must be at least 36 bytes long. + * + * Returns true if okay, false on error with *errstr providing some + * error context. + */ +bool +pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len, +			   char *buf, const char **errstr) +{ +	size_t		passwd_len = strlen(passwd); + +	/* +1 here is just to avoid risk of unportable malloc(0) */ +	char	   *crypt_buf = malloc(passwd_len + salt_len + 1); +	bool		ret; + +	if (!crypt_buf) +	{ +		*errstr = _("out of memory"); +		return false; +	} + +	/* +	 * Place salt at the end because it may be known by users trying to crack +	 * the MD5 output. +	 */ +	memcpy(crypt_buf, passwd, passwd_len); +	memcpy(crypt_buf + passwd_len, salt, salt_len); + +	strcpy(buf, "md5"); +	ret = pg_md5_hash(crypt_buf, passwd_len + salt_len, buf + 3, errstr); + +	free(crypt_buf); + +	return ret; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_int.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_int.h new file mode 100644 index 00000000000..63fd3956f82 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/md5_int.h @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * md5_int.h + *	  Internal headers for fallback implementation of MD5 + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *		  src/common/md5_int.h + * + *------------------------------------------------------------------------- + */ + +/*	   $KAME: md5.h,v 1.3 2000/02/22 14:01:18 itojun Exp $	   */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *	  notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *	  notice, this list of conditions and the following disclaimer in the + *	  documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + *	  may be used to endorse or promote products derived from this software + *	  without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef PG_MD5_INT_H +#define PG_MD5_INT_H + +#include "common/md5.h" + +#define MD5_BUFLEN 64 + +/* Context data for MD5 */ +typedef struct +{ +	union +	{ +		uint32		md5_state32[4]; +		uint8		md5_state8[16]; +	}			md5_st; + +#define md5_sta     md5_st.md5_state32[0] +#define md5_stb     md5_st.md5_state32[1] +#define md5_stc     md5_st.md5_state32[2] +#define md5_std     md5_st.md5_state32[3] +#define md5_st8     md5_st.md5_state8 + +	union +	{ +		uint64		md5_count64; +		uint8		md5_count8[8]; +	}			md5_count; +#define md5_n   md5_count.md5_count64 +#define md5_n8  md5_count.md5_count8 + +	unsigned int md5_i; +	uint8		md5_buf[MD5_BUFLEN]; +} pg_md5_ctx; + +/* Interface routines for MD5 */ +extern void pg_md5_init(pg_md5_ctx *ctx); +extern void pg_md5_update(pg_md5_ctx *ctx, const uint8 *data, size_t len); +extern void pg_md5_final(pg_md5_ctx *ctx, uint8 *dest); + +#endif							/* PG_MD5_INT_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/percentrepl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/percentrepl.c new file mode 100644 index 00000000000..655aea6428d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/percentrepl.c @@ -0,0 +1,137 @@ +/*------------------------------------------------------------------------- + * + * percentrepl.c + *	  Common routines to replace percent placeholders in strings + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/percentrepl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#error #include "common/logging.h" +#endif + +#include "common/percentrepl.h" +#include "lib/stringinfo.h" + +/* + * replace_percent_placeholders + * + * Replace percent-letter placeholders in input string with the supplied + * values.  For example, to replace %f with foo and %b with bar, call + * + * replace_percent_placeholders(instr, "param_name", "bf", bar, foo); + * + * The return value is palloc'd. + * + * "%%" is replaced by a single "%". + * + * This throws an error for an unsupported placeholder or a "%" at the end of + * the input string. + * + * A value may be NULL.  If the corresponding placeholder is found in the + * input string, it will be treated as if an unsupported placeholder was used. + * This allows callers to share a "letters" specification but vary the + * actually supported placeholders at run time. + * + * This functions is meant for cases where all the values are readily + * available or cheap to compute and most invocations will use most values + * (for example for archive_command).  Also, it requires that all values are + * strings.  It won't be a good match for things like log prefixes or prompts + * that use a mix of data types and any invocation will only use a few of the + * possible values. + * + * param_name is the name of the underlying GUC parameter, for error + * reporting.  At the moment, this function is only used for GUC parameters. + * If other kinds of uses were added, the error reporting would need to be + * revised. + */ +char * +replace_percent_placeholders(const char *instr, const char *param_name, const char *letters,...) +{ +	StringInfoData result; + +	initStringInfo(&result); + +	for (const char *sp = instr; *sp; sp++) +	{ +		if (*sp == '%') +		{ +			if (sp[1] == '%') +			{ +				/* Convert %% to a single % */ +				sp++; +				appendStringInfoChar(&result, *sp); +			} +			else if (sp[1] == '\0') +			{ +				/* Incomplete escape sequence, expected a character afterward */ +#ifdef FRONTEND +				pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr); +				pg_log_error_detail("String ends unexpectedly after escape character \"%%\"."); +				exit(1); +#else +				ereport(ERROR, +						errcode(ERRCODE_INVALID_PARAMETER_VALUE), +						errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr), +						errdetail("String ends unexpectedly after escape character \"%%\".")); +#endif +			} +			else +			{ +				/* Look up placeholder character */ +				bool		found = false; +				va_list		ap; + +				sp++; + +				va_start(ap, letters); +				for (const char *lp = letters; *lp; lp++) +				{ +					char	   *val = va_arg(ap, char *); + +					if (*sp == *lp) +					{ +						if (val) +						{ +							appendStringInfoString(&result, val); +							found = true; +						} +						/* If val is NULL, we will report an error. */ +						break; +					} +				} +				va_end(ap); +				if (!found) +				{ +					/* Unknown placeholder */ +#ifdef FRONTEND +					pg_log_error("invalid value for parameter \"%s\": \"%s\"", param_name, instr); +					pg_log_error_detail("String contains unexpected placeholder \"%%%c\".", *sp); +					exit(1); +#else +					ereport(ERROR, +							errcode(ERRCODE_INVALID_PARAMETER_VALUE), +							errmsg("invalid value for parameter \"%s\": \"%s\"", param_name, instr), +							errdetail("String contains unexpected placeholder \"%%%c\".", *sp)); +#endif +				} +			} +		} +		else +		{ +			appendStringInfoChar(&result, *sp); +		} +	} + +	return result.data; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_get_line.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_get_line.c new file mode 100644 index 00000000000..3cdf0908d2a --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_get_line.c @@ -0,0 +1,180 @@ +/*------------------------------------------------------------------------- + * + * pg_get_line.c + *	  fgets() with an expansible result buffer + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/pg_get_line.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <setjmp.h> + +#include "common/string.h" +#include "lib/stringinfo.h" + + +/* + * pg_get_line() + * + * This is meant to be equivalent to fgets(), except that instead of + * reading into a caller-supplied, fixed-size buffer, it reads into + * a palloc'd (in frontend, really malloc'd) string, which is resized + * as needed to handle indefinitely long input lines.  The caller is + * responsible for pfree'ing the result string when appropriate. + * + * As with fgets(), returns NULL if there is a read error or if no + * characters are available before EOF.  The caller can distinguish + * these cases by checking ferror(stream). + * + * Since this is meant to be equivalent to fgets(), the trailing newline + * (if any) is not stripped.  Callers may wish to apply pg_strip_crlf(). + * + * Note that while I/O errors are reflected back to the caller to be + * dealt with, an OOM condition for the palloc'd buffer will not be; + * there'll be an ereport(ERROR) or exit(1) inside stringinfo.c. + * + * Also note that the palloc'd buffer is usually a lot longer than + * strictly necessary, so it may be inadvisable to use this function + * to collect lots of long-lived data.  A less memory-hungry option + * is to use pg_get_line_buf() or pg_get_line_append() in a loop, + * then pstrdup() each line. + * + * prompt_ctx can optionally be provided to allow this function to be + * canceled via an existing SIGINT signal handler that will longjmp to the + * specified place only when *(prompt_ctx->enabled) is true.  If canceled, + * this function returns NULL, and prompt_ctx->canceled is set to true. + */ +char * +pg_get_line(FILE *stream, PromptInterruptContext *prompt_ctx) +{ +	StringInfoData buf; + +	initStringInfo(&buf); + +	if (!pg_get_line_append(stream, &buf, prompt_ctx)) +	{ +		/* ensure that free() doesn't mess up errno */ +		int			save_errno = errno; + +		pfree(buf.data); +		errno = save_errno; +		return NULL; +	} + +	return buf.data; +} + +/* + * pg_get_line_buf() + * + * This has similar behavior to pg_get_line(), and thence to fgets(), + * except that the collected data is returned in a caller-supplied + * StringInfo buffer.  This is a convenient API for code that just + * wants to read and process one line at a time, without any artificial + * limit on line length. + * + * Returns true if a line was successfully collected (including the + * case of a non-newline-terminated line at EOF).  Returns false if + * there was an I/O error or no data was available before EOF. + * (Check ferror(stream) to distinguish these cases.) + * + * In the false-result case, buf is reset to empty. + */ +bool +pg_get_line_buf(FILE *stream, StringInfo buf) +{ +	/* We just need to drop any data from the previous call */ +	resetStringInfo(buf); +	return pg_get_line_append(stream, buf, NULL); +} + +/* + * pg_get_line_append() + * + * This has similar behavior to pg_get_line(), and thence to fgets(), + * except that the collected data is appended to whatever is in *buf. + * This is useful in preference to pg_get_line_buf() if the caller wants + * to merge some lines together, e.g. to implement backslash continuation. + * + * Returns true if a line was successfully collected (including the + * case of a non-newline-terminated line at EOF).  Returns false if + * there was an I/O error or no data was available before EOF. + * (Check ferror(stream) to distinguish these cases.) + * + * In the false-result case, the contents of *buf are logically unmodified, + * though it's possible that the buffer has been resized. + * + * prompt_ctx can optionally be provided to allow this function to be + * canceled via an existing SIGINT signal handler that will longjmp to the + * specified place only when *(prompt_ctx->enabled) is true.  If canceled, + * this function returns false, and prompt_ctx->canceled is set to true. + */ +bool +pg_get_line_append(FILE *stream, StringInfo buf, +				   PromptInterruptContext *prompt_ctx) +{ +	int			orig_len = buf->len; + +	if (prompt_ctx && sigsetjmp(*((sigjmp_buf *) prompt_ctx->jmpbuf), 1) != 0) +	{ +		/* Got here with longjmp */ +		prompt_ctx->canceled = true; +		/* Discard any data we collected before detecting error */ +		buf->len = orig_len; +		buf->data[orig_len] = '\0'; +		return false; +	} + +	/* Loop until newline or EOF/error */ +	for (;;) +	{ +		char	   *res; + +		/* Enable longjmp while waiting for input */ +		if (prompt_ctx) +			*(prompt_ctx->enabled) = true; + +		/* Read some data, appending it to whatever we already have */ +		res = fgets(buf->data + buf->len, buf->maxlen - buf->len, stream); + +		/* Disable longjmp again, then break if fgets failed */ +		if (prompt_ctx) +			*(prompt_ctx->enabled) = false; + +		if (res == NULL) +			break; + +		/* Got data, so update buf->len */ +		buf->len += strlen(buf->data + buf->len); + +		/* Done if we have collected a newline */ +		if (buf->len > orig_len && buf->data[buf->len - 1] == '\n') +			return true; + +		/* Make some more room in the buffer, and loop to read more data */ +		enlargeStringInfo(buf, 128); +	} + +	/* Check for I/O errors and EOF */ +	if (ferror(stream) || buf->len == orig_len) +	{ +		/* Discard any data we collected before detecting error */ +		buf->len = orig_len; +		buf->data[orig_len] = '\0'; +		return false; +	} + +	/* No newline at EOF, but we did collect some data */ +	return true; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c new file mode 100644 index 00000000000..20b5938e074 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c @@ -0,0 +1,876 @@ +/* ---------- + * pg_lzcompress.c - + * + *		This is an implementation of LZ compression for PostgreSQL. + *		It uses a simple history table and generates 2-3 byte tags + *		capable of backward copy information for 3-273 bytes with + *		a max offset of 4095. + * + *		Entry routines: + * + *			int32 + *			pglz_compress(const char *source, int32 slen, char *dest, + *						  const PGLZ_Strategy *strategy); + * + *				source is the input data to be compressed. + * + *				slen is the length of the input data. + * + *				dest is the output area for the compressed result. + *					It must be at least as big as PGLZ_MAX_OUTPUT(slen). + * + *				strategy is a pointer to some information controlling + *					the compression algorithm. If NULL, the compiled + *					in default strategy is used. + * + *				The return value is the number of bytes written in the + *				buffer dest, or -1 if compression fails; in the latter + *				case the contents of dest are undefined. + * + *			int32 + *			pglz_decompress(const char *source, int32 slen, char *dest, + *							int32 rawsize, bool check_complete) + * + *				source is the compressed input. + * + *				slen is the length of the compressed input. + * + *				dest is the area where the uncompressed data will be + *					written to. It is the callers responsibility to + *					provide enough space. + * + *					The data is written to buff exactly as it was handed + *					to pglz_compress(). No terminating zero byte is added. + * + *				rawsize is the length of the uncompressed data. + * + *				check_complete is a flag to let us know if -1 should be + *					returned in cases where we don't reach the end of the + *					source or dest buffers, or not.  This should be false + *					if the caller is asking for only a partial result and + *					true otherwise. + * + *				The return value is the number of bytes written in the + *				buffer dest, or -1 if decompression fails. + * + *		The decompression algorithm and internal data format: + * + *			It is made with the compressed data itself. + * + *			The data representation is easiest explained by describing + *			the process of decompression. + * + *			If compressed_size == rawsize, then the data + *			is stored uncompressed as plain bytes. Thus, the decompressor + *			simply copies rawsize bytes to the destination. + * + *			Otherwise the first byte tells what to do the next 8 times. + *			We call this the control byte. + * + *			An unset bit in the control byte means, that one uncompressed + *			byte follows, which is copied from input to output. + * + *			A set bit in the control byte means, that a tag of 2-3 bytes + *			follows. A tag contains information to copy some bytes, that + *			are already in the output buffer, to the current location in + *			the output. Let's call the three tag bytes T1, T2 and T3. The + *			position of the data to copy is coded as an offset from the + *			actual output position. + * + *			The offset is in the upper nibble of T1 and in T2. + *			The length is in the lower nibble of T1. + * + *			So the 16 bits of a 2 byte tag are coded as + * + *				7---T1--0  7---T2--0 + *				OOOO LLLL  OOOO OOOO + * + *			This limits the offset to 1-4095 (12 bits) and the length + *			to 3-18 (4 bits) because 3 is always added to it. To emit + *			a tag of 2 bytes with a length of 2 only saves one control + *			bit. But we lose one byte in the possible length of a tag. + * + *			In the actual implementation, the 2 byte tag's length is + *			limited to 3-17, because the value 0xF in the length nibble + *			has special meaning. It means, that the next following + *			byte (T3) has to be added to the length value of 18. That + *			makes total limits of 1-4095 for offset and 3-273 for length. + * + *			Now that we have successfully decoded a tag. We simply copy + *			the output that occurred <offset> bytes back to the current + *			output location in the specified <length>. Thus, a + *			sequence of 200 spaces (think about bpchar fields) could be + *			coded in 4 bytes. One literal space and a three byte tag to + *			copy 199 bytes with a -1 offset. Whow - that's a compression + *			rate of 98%! Well, the implementation needs to save the + *			original data size too, so we need another 4 bytes for it + *			and end up with a total compression rate of 96%, what's still + *			worth a Whow. + * + *		The compression algorithm + * + *			The following uses numbers used in the default strategy. + * + *			The compressor works best for attributes of a size between + *			1K and 1M. For smaller items there's not that much chance of + *			redundancy in the character sequence (except for large areas + *			of identical bytes like trailing spaces) and for bigger ones + *			our 4K maximum look-back distance is too small. + * + *			The compressor creates a table for lists of positions. + *			For each input position (except the last 3), a hash key is + *			built from the 4 next input bytes and the position remembered + *			in the appropriate list. Thus, the table points to linked + *			lists of likely to be at least in the first 4 characters + *			matching strings. This is done on the fly while the input + *			is compressed into the output area.  Table entries are only + *			kept for the last 4096 input positions, since we cannot use + *			back-pointers larger than that anyway.  The size of the hash + *			table is chosen based on the size of the input - a larger table + *			has a larger startup cost, as it needs to be initialized to + *			zero, but reduces the number of hash collisions on long inputs. + * + *			For each byte in the input, its hash key (built from this + *			byte and the next 3) is used to find the appropriate list + *			in the table. The lists remember the positions of all bytes + *			that had the same hash key in the past in increasing backward + *			offset order. Now for all entries in the used lists, the + *			match length is computed by comparing the characters from the + *			entries position with the characters from the actual input + *			position. + * + *			The compressor starts with a so called "good_match" of 128. + *			It is a "prefer speed against compression ratio" optimizer. + *			So if the first entry looked at already has 128 or more + *			matching characters, the lookup stops and that position is + *			used for the next tag in the output. + * + *			For each subsequent entry in the history list, the "good_match" + *			is lowered by 10%. So the compressor will be more happy with + *			short matches the further it has to go back in the history. + *			Another "speed against ratio" preference characteristic of + *			the algorithm. + * + *			Thus there are 3 stop conditions for the lookup of matches: + * + *				- a match >= good_match is found + *				- there are no more history entries to look at + *				- the next history entry is already too far back + *				  to be coded into a tag. + * + *			Finally the match algorithm checks that at least a match + *			of 3 or more bytes has been found, because that is the smallest + *			amount of copy information to code into a tag. If so, a tag + *			is omitted and all the input bytes covered by that are just + *			scanned for the history add's, otherwise a literal character + *			is omitted and only his history entry added. + * + *		Acknowledgments: + * + *			Many thanks to Adisak Pochanayon, who's article about SLZ + *			inspired me to write the PostgreSQL compression this way. + * + *			Jan Wieck + * + * Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * src/common/pg_lzcompress.c + * ---------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <limits.h> + +#include "common/pg_lzcompress.h" + + +/* ---------- + * Local definitions + * ---------- + */ +#define PGLZ_MAX_HISTORY_LISTS	8192	/* must be power of 2 */ +#define PGLZ_HISTORY_SIZE		4096 +#define PGLZ_MAX_MATCH			273 + + +/* ---------- + * PGLZ_HistEntry - + * + *		Linked list for the backward history lookup + * + * All the entries sharing a hash key are linked in a doubly linked list. + * This makes it easy to remove an entry when it's time to recycle it + * (because it's more than 4K positions old). + * ---------- + */ +typedef struct PGLZ_HistEntry +{ +	struct PGLZ_HistEntry *next;	/* links for my hash key's list */ +	struct PGLZ_HistEntry *prev; +	int			hindex;			/* my current hash key */ +	const char *pos;			/* my input position */ +} PGLZ_HistEntry; + + +/* ---------- + * The provided standard strategies + * ---------- + */ +static const PGLZ_Strategy strategy_default_data = { +	32,							/* Data chunks less than 32 bytes are not +								 * compressed */ +	INT_MAX,					/* No upper limit on what we'll try to +								 * compress */ +	25,							/* Require 25% compression rate, or not worth +								 * it */ +	1024,						/* Give up if no compression in the first 1KB */ +	128,						/* Stop history lookup if a match of 128 bytes +								 * is found */ +	10							/* Lower good match size by 10% at every loop +								 * iteration */ +}; +const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; + + +static const PGLZ_Strategy strategy_always_data = { +	0,							/* Chunks of any size are compressed */ +	INT_MAX, +	0,							/* It's enough to save one single byte */ +	INT_MAX,					/* Never give up early */ +	128,						/* Stop history lookup if a match of 128 bytes +								 * is found */ +	6							/* Look harder for a good match */ +}; +const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; + + +/* ---------- + * Statically allocated work arrays for history + * ---------- + */ +static __thread int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; +static __thread PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; + +/* + * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, + * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. + */ +#define INVALID_ENTRY			0 +#define INVALID_ENTRY_PTR		(&hist_entries[INVALID_ENTRY]) + +/* ---------- + * pglz_hist_idx - + * + *		Computes the history table slot for the lookup by the next 4 + *		characters in the input. + * + * NB: because we use the next 4 characters, we are not guaranteed to + * find 3-character matches; they very possibly will be in the wrong + * hash list.  This seems an acceptable tradeoff for spreading out the + * hash keys more. + * ---------- + */ +#define pglz_hist_idx(_s,_e, _mask) (										\ +			((((_e) - (_s)) < 4) ? (int) (_s)[0] :							\ +			 (((_s)[0] << 6) ^ ((_s)[1] << 4) ^								\ +			  ((_s)[2] << 2) ^ (_s)[3])) & (_mask)				\ +		) + + +/* ---------- + * pglz_hist_add - + * + *		Adds a new entry to the history table. + * + * If _recycle is true, then we are recycling a previously used entry, + * and must first delink it from its old hashcode's linked list. + * + * NOTE: beware of multiple evaluations of macro's arguments, and note that + * _hn and _recycle are modified in the macro. + * ---------- + */ +#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask)	\ +do {									\ +			int __hindex = pglz_hist_idx((_s),(_e), (_mask));				\ +			int16 *__myhsp = &(_hs)[__hindex];								\ +			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\ +			if (_recycle) {													\ +				if (__myhe->prev == NULL)									\ +					(_hs)[__myhe->hindex] = __myhe->next - (_he);			\ +				else														\ +					__myhe->prev->next = __myhe->next;						\ +				if (__myhe->next != NULL)									\ +					__myhe->next->prev = __myhe->prev;						\ +			}																\ +			__myhe->next = &(_he)[*__myhsp];								\ +			__myhe->prev = NULL;											\ +			__myhe->hindex = __hindex;										\ +			__myhe->pos  = (_s);											\ +			/* If there was an existing entry in this hash slot, link */	\ +			/* this new entry to it. However, the 0th entry in the */		\ +			/* entries table is unused, so we can freely scribble on it. */ \ +			/* So don't bother checking if the slot was used - we'll */		\ +			/* scribble on the unused entry if it was not, but that's */	\ +			/* harmless. Avoiding the branch in this critical path */		\ +			/* speeds this up a little bit. */								\ +			/* if (*__myhsp != INVALID_ENTRY) */							\ +				(_he)[(*__myhsp)].prev = __myhe;							\ +			*__myhsp = _hn;													\ +			if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) {							\ +				(_hn) = 1;													\ +				(_recycle) = true;											\ +			}																\ +} while (0) + + +/* ---------- + * pglz_out_ctrl - + * + *		Outputs the last and allocates a new control byte if needed. + * ---------- + */ +#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ +do { \ +	if ((__ctrl & 0xff) == 0)												\ +	{																		\ +		*(__ctrlp) = __ctrlb;												\ +		__ctrlp = (__buf)++;												\ +		__ctrlb = 0;														\ +		__ctrl = 1;															\ +	}																		\ +} while (0) + + +/* ---------- + * pglz_out_literal - + * + *		Outputs a literal byte to the destination buffer including the + *		appropriate control bit. + * ---------- + */ +#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ +do { \ +	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\ +	*(_buf)++ = (unsigned char)(_byte);										\ +	_ctrl <<= 1;															\ +} while (0) + + +/* ---------- + * pglz_out_tag - + * + *		Outputs a backward reference tag of 2-4 bytes (depending on + *		offset and length) to the destination buffer including the + *		appropriate control bit. + * ---------- + */ +#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ +do { \ +	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\ +	_ctrlb |= _ctrl;														\ +	_ctrl <<= 1;															\ +	if (_len > 17)															\ +	{																		\ +		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f);		\ +		(_buf)[1] = (unsigned char)(((_off) & 0xff));						\ +		(_buf)[2] = (unsigned char)((_len) - 18);							\ +		(_buf) += 3;														\ +	} else {																\ +		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ +		(_buf)[1] = (unsigned char)((_off) & 0xff);							\ +		(_buf) += 2;														\ +	}																		\ +} while (0) + + +/* ---------- + * pglz_find_match - + * + *		Lookup the history table if the actual input stream matches + *		another sequence of characters, starting somewhere earlier + *		in the input buffer. + * ---------- + */ +static inline int +pglz_find_match(int16 *hstart, const char *input, const char *end, +				int *lenp, int *offp, int good_match, int good_drop, int mask) +{ +	PGLZ_HistEntry *hent; +	int16		hentno; +	int32		len = 0; +	int32		off = 0; + +	/* +	 * Traverse the linked history list until a good enough match is found. +	 */ +	hentno = hstart[pglz_hist_idx(input, end, mask)]; +	hent = &hist_entries[hentno]; +	while (hent != INVALID_ENTRY_PTR) +	{ +		const char *ip = input; +		const char *hp = hent->pos; +		int32		thisoff; +		int32		thislen; + +		/* +		 * Stop if the offset does not fit into our tag anymore. +		 */ +		thisoff = ip - hp; +		if (thisoff >= 0x0fff) +			break; + +		/* +		 * Determine length of match. A better match must be larger than the +		 * best so far. And if we already have a match of 16 or more bytes, +		 * it's worth the call overhead to use memcmp() to check if this match +		 * is equal for the same size. After that we must fallback to +		 * character by character comparison to know the exact position where +		 * the diff occurred. +		 */ +		thislen = 0; +		if (len >= 16) +		{ +			if (memcmp(ip, hp, len) == 0) +			{ +				thislen = len; +				ip += len; +				hp += len; +				while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) +				{ +					thislen++; +					ip++; +					hp++; +				} +			} +		} +		else +		{ +			while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) +			{ +				thislen++; +				ip++; +				hp++; +			} +		} + +		/* +		 * Remember this match as the best (if it is) +		 */ +		if (thislen > len) +		{ +			len = thislen; +			off = thisoff; +		} + +		/* +		 * Advance to the next history entry +		 */ +		hent = hent->next; + +		/* +		 * Be happy with lesser good matches the more entries we visited. But +		 * no point in doing calculation if we're at end of list. +		 */ +		if (hent != INVALID_ENTRY_PTR) +		{ +			if (len >= good_match) +				break; +			good_match -= (good_match * good_drop) / 100; +		} +	} + +	/* +	 * Return match information only if it results at least in one byte +	 * reduction. +	 */ +	if (len > 2) +	{ +		*lenp = len; +		*offp = off; +		return 1; +	} + +	return 0; +} + + +/* ---------- + * pglz_compress - + * + *		Compresses source into dest using strategy. Returns the number of + *		bytes written in buffer dest, or -1 if compression fails. + * ---------- + */ +int32 +pglz_compress(const char *source, int32 slen, char *dest, +			  const PGLZ_Strategy *strategy) +{ +	unsigned char *bp = (unsigned char *) dest; +	unsigned char *bstart = bp; +	int			hist_next = 1; +	bool		hist_recycle = false; +	const char *dp = source; +	const char *dend = source + slen; +	unsigned char ctrl_dummy = 0; +	unsigned char *ctrlp = &ctrl_dummy; +	unsigned char ctrlb = 0; +	unsigned char ctrl = 0; +	bool		found_match = false; +	int32		match_len; +	int32		match_off; +	int32		good_match; +	int32		good_drop; +	int32		result_size; +	int32		result_max; +	int32		need_rate; +	int			hashsz; +	int			mask; + +	/* +	 * Our fallback strategy is the default. +	 */ +	if (strategy == NULL) +		strategy = PGLZ_strategy_default; + +	/* +	 * If the strategy forbids compression (at all or if source chunk size out +	 * of range), fail. +	 */ +	if (strategy->match_size_good <= 0 || +		slen < strategy->min_input_size || +		slen > strategy->max_input_size) +		return -1; + +	/* +	 * Limit the match parameters to the supported range. +	 */ +	good_match = strategy->match_size_good; +	if (good_match > PGLZ_MAX_MATCH) +		good_match = PGLZ_MAX_MATCH; +	else if (good_match < 17) +		good_match = 17; + +	good_drop = strategy->match_size_drop; +	if (good_drop < 0) +		good_drop = 0; +	else if (good_drop > 100) +		good_drop = 100; + +	need_rate = strategy->min_comp_rate; +	if (need_rate < 0) +		need_rate = 0; +	else if (need_rate > 99) +		need_rate = 99; + +	/* +	 * Compute the maximum result size allowed by the strategy, namely the +	 * input size minus the minimum wanted compression rate.  This had better +	 * be <= slen, else we might overrun the provided output buffer. +	 */ +	if (slen > (INT_MAX / 100)) +	{ +		/* Approximate to avoid overflow */ +		result_max = (slen / 100) * (100 - need_rate); +	} +	else +		result_max = (slen * (100 - need_rate)) / 100; + +	/* +	 * Experiments suggest that these hash sizes work pretty well. A large +	 * hash table minimizes collision, but has a higher startup cost. For a +	 * small input, the startup cost dominates. The table size must be a power +	 * of two. +	 */ +	if (slen < 128) +		hashsz = 512; +	else if (slen < 256) +		hashsz = 1024; +	else if (slen < 512) +		hashsz = 2048; +	else if (slen < 1024) +		hashsz = 4096; +	else +		hashsz = 8192; +	mask = hashsz - 1; + +	/* +	 * Initialize the history lists to empty.  We do not need to zero the +	 * hist_entries[] array; its entries are initialized as they are used. +	 */ +	memset(hist_start, 0, hashsz * sizeof(int16)); + +	/* +	 * Compress the source directly into the output buffer. +	 */ +	while (dp < dend) +	{ +		/* +		 * If we already exceeded the maximum result size, fail. +		 * +		 * We check once per loop; since the loop body could emit as many as 4 +		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better +		 * allow 4 slop bytes. +		 */ +		if (bp - bstart >= result_max) +			return -1; + +		/* +		 * If we've emitted more than first_success_by bytes without finding +		 * anything compressible at all, fail.  This lets us fall out +		 * reasonably quickly when looking at incompressible input (such as +		 * pre-compressed data). +		 */ +		if (!found_match && bp - bstart >= strategy->first_success_by) +			return -1; + +		/* +		 * Try to find a match in the history +		 */ +		if (pglz_find_match(hist_start, dp, dend, &match_len, +							&match_off, good_match, good_drop, mask)) +		{ +			/* +			 * Create the tag and add history entries for all matched +			 * characters. +			 */ +			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); +			while (match_len--) +			{ +				pglz_hist_add(hist_start, hist_entries, +							  hist_next, hist_recycle, +							  dp, dend, mask); +				dp++;			/* Do not do this ++ in the line above! */ +				/* The macro would do it four times - Jan.  */ +			} +			found_match = true; +		} +		else +		{ +			/* +			 * No match found. Copy one literal byte. +			 */ +			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); +			pglz_hist_add(hist_start, hist_entries, +						  hist_next, hist_recycle, +						  dp, dend, mask); +			dp++;				/* Do not do this ++ in the line above! */ +			/* The macro would do it four times - Jan.  */ +		} +	} + +	/* +	 * Write out the last control byte and check that we haven't overrun the +	 * output size allowed by the strategy. +	 */ +	*ctrlp = ctrlb; +	result_size = bp - bstart; +	if (result_size >= result_max) +		return -1; + +	/* success */ +	return result_size; +} + + +/* ---------- + * pglz_decompress - + * + *		Decompresses source into dest. Returns the number of bytes + *		decompressed into the destination buffer, or -1 if the + *		compressed data is corrupted. + * + *		If check_complete is true, the data is considered corrupted + *		if we don't exactly fill the destination buffer.  Callers that + *		are extracting a slice typically can't apply this check. + * ---------- + */ +int32 +pglz_decompress(const char *source, int32 slen, char *dest, +				int32 rawsize, bool check_complete) +{ +	const unsigned char *sp; +	const unsigned char *srcend; +	unsigned char *dp; +	unsigned char *destend; + +	sp = (const unsigned char *) source; +	srcend = ((const unsigned char *) source) + slen; +	dp = (unsigned char *) dest; +	destend = dp + rawsize; + +	while (sp < srcend && dp < destend) +	{ +		/* +		 * Read one control byte and process the next 8 items (or as many as +		 * remain in the compressed input). +		 */ +		unsigned char ctrl = *sp++; +		int			ctrlc; + +		for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) +		{ +			if (ctrl & 1) +			{ +				/* +				 * Set control bit means we must read a match tag. The match +				 * is coded with two bytes. First byte uses lower nibble to +				 * code length - 3. Higher nibble contains upper 4 bits of the +				 * offset. The next following byte contains the lower 8 bits +				 * of the offset. If the length is coded as 18, another +				 * extension tag byte tells how much longer the match really +				 * was (0-255). +				 */ +				int32		len; +				int32		off; + +				len = (sp[0] & 0x0f) + 3; +				off = ((sp[0] & 0xf0) << 4) | sp[1]; +				sp += 2; +				if (len == 18) +					len += *sp++; + +				/* +				 * Check for corrupt data: if we fell off the end of the +				 * source, or if we obtained off = 0, or if off is more than +				 * the distance back to the buffer start, we have problems. +				 * (We must check for off = 0, else we risk an infinite loop +				 * below in the face of corrupt data.  Likewise, the upper +				 * limit on off prevents accessing outside the buffer +				 * boundaries.) +				 */ +				if (unlikely(sp > srcend || off == 0 || +							 off > (dp - (unsigned char *) dest))) +					return -1; + +				/* +				 * Don't emit more data than requested. +				 */ +				len = Min(len, destend - dp); + +				/* +				 * Now we copy the bytes specified by the tag from OUTPUT to +				 * OUTPUT (copy len bytes from dp - off to dp).  The copied +				 * areas could overlap, so to avoid undefined behavior in +				 * memcpy(), be careful to copy only non-overlapping regions. +				 * +				 * Note that we cannot use memmove() instead, since while its +				 * behavior is well-defined, it's also not what we want. +				 */ +				while (off < len) +				{ +					/* +					 * We can safely copy "off" bytes since that clearly +					 * results in non-overlapping source and destination. +					 */ +					memcpy(dp, dp - off, off); +					len -= off; +					dp += off; + +					/*---------- +					 * This bit is less obvious: we can double "off" after +					 * each such step.  Consider this raw input: +					 *		112341234123412341234 +					 * This will be encoded as 5 literal bytes "11234" and +					 * then a match tag with length 16 and offset 4.  After +					 * memcpy'ing the first 4 bytes, we will have emitted +					 *		112341234 +					 * so we can double "off" to 8, then after the next step +					 * we have emitted +					 *		11234123412341234 +					 * Then we can double "off" again, after which it is more +					 * than the remaining "len" so we fall out of this loop +					 * and finish with a non-overlapping copy of the +					 * remainder.  In general, a match tag with off < len +					 * implies that the decoded data has a repeat length of +					 * "off".  We can handle 1, 2, 4, etc repetitions of the +					 * repeated string per memcpy until we get to a situation +					 * where the final copy step is non-overlapping. +					 * +					 * (Another way to understand this is that we are keeping +					 * the copy source point dp - off the same throughout.) +					 *---------- +					 */ +					off += off; +				} +				memcpy(dp, dp - off, len); +				dp += len; +			} +			else +			{ +				/* +				 * An unset control bit means LITERAL BYTE. So we just copy +				 * one from INPUT to OUTPUT. +				 */ +				*dp++ = *sp++; +			} + +			/* +			 * Advance the control bit +			 */ +			ctrl >>= 1; +		} +	} + +	/* +	 * If requested, check we decompressed the right amount. +	 */ +	if (check_complete && (dp != destend || sp != srcend)) +		return -1; + +	/* +	 * That's it. +	 */ +	return (char *) dp - dest; +} + + +/* ---------- + * pglz_maximum_compressed_size - + * + *		Calculate the maximum compressed size for a given amount of raw data. + *		Return the maximum size, or total compressed size if maximum size is + *		larger than total compressed size. + * + * We can't use PGLZ_MAX_OUTPUT for this purpose, because that's used to size + * the compression buffer (and abort the compression). It does not really say + * what's the maximum compressed size for an input of a given length, and it + * may happen that while the whole value is compressible (and thus fits into + * PGLZ_MAX_OUTPUT nicely), the prefix is not compressible at all. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size) +{ +	int64		compressed_size; + +	/* +	 * pglz uses one control bit per byte, so if the entire desired prefix is +	 * represented as literal bytes, we'll need (rawsize * 9) bits.  We care +	 * about bytes though, so be sure to round up not down. +	 * +	 * Use int64 here to prevent overflow during calculation. +	 */ +	compressed_size = ((int64) rawsize * 9 + 7) / 8; + +	/* +	 * The above fails to account for a corner case: we could have compressed +	 * data that starts with N-1 or N-2 literal bytes and then has a match tag +	 * of 2 or 3 bytes.  It's therefore possible that we need to fetch 1 or 2 +	 * more bytes in order to have the whole match tag.  (Match tags earlier +	 * in the compressed data don't cause a problem, since they should +	 * represent more decompressed bytes than they occupy themselves.) +	 */ +	compressed_size += 2; + +	/* +	 * Maximum compressed size can't be larger than total compressed size. +	 * (This also ensures that our result fits in int32.) +	 */ +	compressed_size = Min(compressed_size, total_compressed_size); + +	return (int32) compressed_size; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_prng.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_prng.c new file mode 100644 index 00000000000..1b524c91227 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_prng.c @@ -0,0 +1,282 @@ +/*------------------------------------------------------------------------- + * + * Pseudo-Random Number Generator + * + * We use Blackman and Vigna's xoroshiro128** 1.0 algorithm + * to have a small, fast PRNG suitable for generating reasonably + * good-quality 64-bit data.  This should not be considered + * cryptographically strong, however. + * + * About these generators: https://prng.di.unimi.it/ + * See also https://en.wikipedia.org/wiki/List_of_random_number_generators + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * src/common/pg_prng.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#include <math.h> + +#include "common/pg_prng.h" +#include "port/pg_bitutils.h" + +/* X/Open (XSI) requires <math.h> to provide M_PI, but core POSIX does not */ +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + + +/* process-wide state vector */ +__thread pg_prng_state pg_global_prng_state; + + +/* + * 64-bit rotate left + */ +static inline uint64 +rotl(uint64 x, int bits) +{ +	return (x << bits) | (x >> (64 - bits)); +} + +/* + * The basic xoroshiro128** algorithm. + * Generates and returns a 64-bit uniformly distributed number, + * updating the state vector for next time. + * + * Note: the state vector must not be all-zeroes, as that is a fixed point. + */ +static uint64 +xoroshiro128ss(pg_prng_state *state) +{ +	uint64		s0 = state->s0, +				sx = state->s1 ^ s0, +				val = rotl(s0 * 5, 7) * 9; + +	/* update state */ +	state->s0 = rotl(s0, 24) ^ sx ^ (sx << 16); +	state->s1 = rotl(sx, 37); + +	return val; +} + +/* + * We use this generator just to fill the xoroshiro128** state vector + * from a 64-bit seed. + */ +static uint64 +splitmix64(uint64 *state) +{ +	/* state update */ +	uint64		val = (*state += UINT64CONST(0x9E3779B97f4A7C15)); + +	/* value extraction */ +	val = (val ^ (val >> 30)) * UINT64CONST(0xBF58476D1CE4E5B9); +	val = (val ^ (val >> 27)) * UINT64CONST(0x94D049BB133111EB); + +	return val ^ (val >> 31); +} + +/* + * Initialize the PRNG state from a 64-bit integer, + * taking care that we don't produce all-zeroes. + */ +void +pg_prng_seed(pg_prng_state *state, uint64 seed) +{ +	state->s0 = splitmix64(&seed); +	state->s1 = splitmix64(&seed); +	/* Let's just make sure we didn't get all-zeroes */ +	(void) pg_prng_seed_check(state); +} + +/* + * Initialize the PRNG state from a double in the range [-1.0, 1.0], + * taking care that we don't produce all-zeroes. + */ +void +pg_prng_fseed(pg_prng_state *state, double fseed) +{ +	/* Assume there's about 52 mantissa bits; the sign contributes too. */ +	int64		seed = ((double) ((UINT64CONST(1) << 52) - 1)) * fseed; + +	pg_prng_seed(state, (uint64) seed); +} + +/* + * Validate a PRNG seed value. + */ +bool +pg_prng_seed_check(pg_prng_state *state) +{ +	/* +	 * If the seeding mechanism chanced to produce all-zeroes, insert +	 * something nonzero.  Anything would do; use Knuth's LCG parameters. +	 */ +	if (unlikely(state->s0 == 0 && state->s1 == 0)) +	{ +		state->s0 = UINT64CONST(0x5851F42D4C957F2D); +		state->s1 = UINT64CONST(0x14057B7EF767814F); +	} + +	/* As a convenience for the pg_prng_strong_seed macro, return true */ +	return true; +} + +/* + * Select a random uint64 uniformly from the range [0, PG_UINT64_MAX]. + */ +uint64 +pg_prng_uint64(pg_prng_state *state) +{ +	return xoroshiro128ss(state); +} + +/* + * Select a random uint64 uniformly from the range [rmin, rmax]. + * If the range is empty, rmin is always produced. + */ +uint64 +pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax) +{ +	uint64		val; + +	if (likely(rmax > rmin)) +	{ +		/* +		 * Use bitmask rejection method to generate an offset in 0..range. +		 * Each generated val is less than twice "range", so on average we +		 * should not have to iterate more than twice. +		 */ +		uint64		range = rmax - rmin; +		uint32		rshift = 63 - pg_leftmost_one_pos64(range); + +		do +		{ +			val = xoroshiro128ss(state) >> rshift; +		} while (val > range); +	} +	else +		val = 0; + +	return rmin + val; +} + +/* + * Select a random int64 uniformly from the range [PG_INT64_MIN, PG_INT64_MAX]. + */ +int64 +pg_prng_int64(pg_prng_state *state) +{ +	return (int64) xoroshiro128ss(state); +} + +/* + * Select a random int64 uniformly from the range [0, PG_INT64_MAX]. + */ +int64 +pg_prng_int64p(pg_prng_state *state) +{ +	return (int64) (xoroshiro128ss(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF)); +} + +/* + * Select a random uint32 uniformly from the range [0, PG_UINT32_MAX]. + */ +uint32 +pg_prng_uint32(pg_prng_state *state) +{ +	/* +	 * Although xoroshiro128** is not known to have any weaknesses in +	 * randomness of low-order bits, we prefer to use the upper bits of its +	 * result here and below. +	 */ +	uint64		v = xoroshiro128ss(state); + +	return (uint32) (v >> 32); +} + +/* + * Select a random int32 uniformly from the range [PG_INT32_MIN, PG_INT32_MAX]. + */ +int32 +pg_prng_int32(pg_prng_state *state) +{ +	uint64		v = xoroshiro128ss(state); + +	return (int32) (v >> 32); +} + +/* + * Select a random int32 uniformly from the range [0, PG_INT32_MAX]. + */ +int32 +pg_prng_int32p(pg_prng_state *state) +{ +	uint64		v = xoroshiro128ss(state); + +	return (int32) (v >> 33); +} + +/* + * Select a random double uniformly from the range [0.0, 1.0). + * + * Note: if you want a result in the range (0.0, 1.0], the standard way + * to get that is "1.0 - pg_prng_double(state)". + */ +double +pg_prng_double(pg_prng_state *state) +{ +	uint64		v = xoroshiro128ss(state); + +	/* +	 * As above, assume there's 52 mantissa bits in a double.  This result +	 * could round to 1.0 if double's precision is less than that; but we +	 * assume IEEE float arithmetic elsewhere in Postgres, so this seems OK. +	 */ +	return ldexp((double) (v >> (64 - 52)), -52); +} + +/* + * Select a random double from the normal distribution with + * mean = 0.0 and stddev = 1.0. + * + * To get a result from a different normal distribution use + *   STDDEV * pg_prng_double_normal() + MEAN + * + * Uses https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + */ +double +pg_prng_double_normal(pg_prng_state *state) +{ +	double		u1, +				u2, +				z0; + +	/* +	 * pg_prng_double generates [0, 1), but for the basic version of the +	 * Box-Muller transform the two uniformly distributed random numbers are +	 * expected to be in (0, 1]; in particular we'd better not compute log(0). +	 */ +	u1 = 1.0 - pg_prng_double(state); +	u2 = 1.0 - pg_prng_double(state); + +	/* Apply Box-Muller transform to get one normal-valued output */ +	z0 = sqrt(-2.0 * log(u1)) * sin(2.0 * M_PI * u2); +	return z0; +} + +/* + * Select a random boolean value. + */ +bool +pg_prng_bool(pg_prng_state *state) +{ +	uint64		v = xoroshiro128ss(state); + +	return (bool) (v >> 63); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pgfnames.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pgfnames.c new file mode 100644 index 00000000000..18cd7e12200 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pgfnames.c @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------- + * + * pgfnames.c + *	  directory handling functions + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/pgfnames.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <dirent.h> + +#ifndef FRONTEND +#define pg_log_warning(...) elog(WARNING, __VA_ARGS__) +#else +#error #include "common/logging.h" +#endif + +/* + * pgfnames + * + * return a list of the names of objects in the argument directory.  Caller + * must call pgfnames_cleanup later to free the memory allocated by this + * function. + */ +char	  ** +pgfnames(const char *path) +{ +	DIR		   *dir; +	struct dirent *file; +	char	  **filenames; +	int			numnames = 0; +	int			fnsize = 200;	/* enough for many small dbs */ + +	dir = opendir(path); +	if (dir == NULL) +	{ +		pg_log_warning("could not open directory \"%s\": %m", path); +		return NULL; +	} + +	filenames = (char **) palloc(fnsize * sizeof(char *)); + +	while (errno = 0, (file = readdir(dir)) != NULL) +	{ +		if (strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0) +		{ +			if (numnames + 1 >= fnsize) +			{ +				fnsize *= 2; +				filenames = (char **) repalloc(filenames, +											   fnsize * sizeof(char *)); +			} +			filenames[numnames++] = pstrdup(file->d_name); +		} +	} + +	if (errno) +		pg_log_warning("could not read directory \"%s\": %m", path); + +	filenames[numnames] = NULL; + +	if (closedir(dir)) +		pg_log_warning("could not close directory \"%s\": %m", path); + +	return filenames; +} + + +/* + *	pgfnames_cleanup + * + *	deallocate memory used for filenames + */ +void +pgfnames_cleanup(char **filenames) +{ +	char	  **fn; + +	for (fn = filenames; *fn; fn++) +		pfree(*fn); + +	pfree(filenames); +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c new file mode 100644 index 00000000000..089cbd33cca --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c @@ -0,0 +1,117 @@ +/*------------------------------------------------------------------------- + * + * protocol_openssl.c + *	  OpenSSL functionality shared between frontend and backend + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *		  src/common/protocol_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/openssl.h" + +/* + * Replacements for APIs introduced in OpenSSL 1.1.0. + */ +#ifndef SSL_CTX_set_min_proto_version + +/* + * OpenSSL versions that support TLS 1.3 shouldn't get here because they + * already have these functions.  So we don't have to keep updating the below + * code for every new TLS version, and eventually it can go away.  But let's + * just check this to make sure ... + */ +#ifdef TLS1_3_VERSION +#error OpenSSL version mismatch +#endif + +int +SSL_CTX_set_min_proto_version(SSL_CTX *ctx, int version) +{ +	int			ssl_options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3; + +	if (version > TLS1_VERSION) +		ssl_options |= SSL_OP_NO_TLSv1; + +	/* +	 * Some OpenSSL versions define TLS*_VERSION macros but not the +	 * corresponding SSL_OP_NO_* macro, so in those cases we have to return +	 * unsuccessfully here. +	 */ +#ifdef TLS1_1_VERSION +	if (version > TLS1_1_VERSION) +	{ +#ifdef SSL_OP_NO_TLSv1_1 +		ssl_options |= SSL_OP_NO_TLSv1_1; +#else +		return 0; +#endif +	} +#endif +#ifdef TLS1_2_VERSION +	if (version > TLS1_2_VERSION) +	{ +#ifdef SSL_OP_NO_TLSv1_2 +		ssl_options |= SSL_OP_NO_TLSv1_2; +#else +		return 0; +#endif +	} +#endif + +	SSL_CTX_set_options(ctx, ssl_options); + +	return 1;					/* success */ +} + +int +SSL_CTX_set_max_proto_version(SSL_CTX *ctx, int version) +{ +	int			ssl_options = 0; + +	Assert(version != 0); + +	/* +	 * Some OpenSSL versions define TLS*_VERSION macros but not the +	 * corresponding SSL_OP_NO_* macro, so in those cases we have to return +	 * unsuccessfully here. +	 */ +#ifdef TLS1_1_VERSION +	if (version < TLS1_1_VERSION) +	{ +#ifdef SSL_OP_NO_TLSv1_1 +		ssl_options |= SSL_OP_NO_TLSv1_1; +#else +		return 0; +#endif +	} +#endif +#ifdef TLS1_2_VERSION +	if (version < TLS1_2_VERSION) +	{ +#ifdef SSL_OP_NO_TLSv1_2 +		ssl_options |= SSL_OP_NO_TLSv1_2; +#else +		return 0; +#endif +	} +#endif + +	SSL_CTX_set_options(ctx, ssl_options); + +	return 1;					/* success */ +} + +#endif							/* !SSL_CTX_set_min_proto_version */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c new file mode 100644 index 00000000000..c1d2807cea9 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * psprintf.c + *		sprintf into an allocated-on-demand buffer + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/psprintf.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND + +#include "postgres.h" + +#include "utils/memutils.h" + +#else + +#include "postgres_fe.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize	((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +#endif + + +/* + * psprintf + * + * Format text data under the control of fmt (an sprintf-style format string) + * and return it in an allocated-on-demand buffer.  The buffer is allocated + * with palloc in the backend, or malloc in frontend builds.  Caller is + * responsible to free the buffer when no longer needed, if appropriate. + * + * Errors are not returned to the caller, but are reported via elog(ERROR) + * in the backend, or printf-to-stderr-and-exit() in frontend builds. + * One should therefore think twice about using this in libpq. + */ +char * +psprintf(const char *fmt,...) +{ +	int			save_errno = errno; +	size_t		len = 128;		/* initial assumption about buffer size */ + +	for (;;) +	{ +		char	   *result; +		va_list		args; +		size_t		newlen; + +		/* +		 * Allocate result buffer.  Note that in frontend this maps to malloc +		 * with exit-on-error. +		 */ +		result = (char *) palloc(len); + +		/* Try to format the data. */ +		errno = save_errno; +		va_start(args, fmt); +		newlen = pvsnprintf(result, len, fmt, args); +		va_end(args); + +		if (newlen < len) +			return result;		/* success */ + +		/* Release buffer and loop around to try again with larger len. */ +		pfree(result); +		len = newlen; +	} +} + +/* + * pvsnprintf + * + * Attempt to format text data under the control of fmt (an sprintf-style + * format string) and insert it into buf (which has length len). + * + * If successful, return the number of bytes emitted, not counting the + * trailing zero byte.  This will always be strictly less than len. + * + * If there's not enough space in buf, return an estimate of the buffer size + * needed to succeed (this *must* be more than the given len, else callers + * might loop infinitely). + * + * Other error cases do not return, but exit via elog(ERROR) or exit(). + * Hence, this shouldn't be used inside libpq. + * + * Caution: callers must be sure to preserve their entry-time errno + * when looping, in case the fmt contains "%m". + * + * Note that the semantics of the return value are not exactly C99's. + * First, we don't promise that the estimated buffer size is exactly right; + * callers must be prepared to loop multiple times to get the right size. + * (Given a C99-compliant vsnprintf, that won't happen, but it is rumored + * that some implementations don't always return the same value ...) + * Second, we return the recommended buffer size, not one less than that; + * this lets overflow concerns be handled here rather than in the callers. + */ +size_t +pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) +{ +	int			nprinted; + +	nprinted = vsnprintf(buf, len, fmt, args); + +	/* We assume failure means the fmt is bogus, hence hard failure is OK */ +	if (unlikely(nprinted < 0)) +	{ +#ifndef FRONTEND +		elog(ERROR, "vsnprintf failed: %m with format string \"%s\"", fmt); +#else +		fprintf(stderr, "vsnprintf failed: %s with format string \"%s\"\n", +				strerror(errno), fmt); +		exit(EXIT_FAILURE); +#endif +	} + +	if ((size_t) nprinted < len) +	{ +		/* Success.  Note nprinted does not include trailing null. */ +		return (size_t) nprinted; +	} + +	/* +	 * We assume a C99-compliant vsnprintf, so believe its estimate of the +	 * required space, and add one for the trailing null.  (If it's wrong, the +	 * logic will still work, but we may loop multiple times.) +	 * +	 * Choke if the required space would exceed MaxAllocSize.  Note we use +	 * this palloc-oriented overflow limit even when in frontend. +	 */ +	if (unlikely((size_t) nprinted > MaxAllocSize - 1)) +	{ +#ifndef FRONTEND +		ereport(ERROR, +				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), +				 errmsg("out of memory"))); +#else +		fprintf(stderr, _("out of memory\n")); +		exit(EXIT_FAILURE); +#endif +	} + +	return nprinted + 1; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/relpath.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/relpath.c new file mode 100644 index 00000000000..87de5f6c960 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/relpath.c @@ -0,0 +1,210 @@ +/*------------------------------------------------------------------------- + * relpath.c + *		Shared frontend/backend code to compute pathnames of relation files + * + * This module also contains some logic associated with fork names. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/relpath.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "catalog/pg_tablespace_d.h" +#include "common/relpath.h" +#include "storage/backendid.h" + + +/* + * Lookup table of fork name by fork number. + * + * If you add a new entry, remember to update the errhint in + * forkname_to_number() below, and update the SGML documentation for + * pg_relation_size(). + */ +const char *const forkNames[] = { +	"main",						/* MAIN_FORKNUM */ +	"fsm",						/* FSM_FORKNUM */ +	"vm",						/* VISIBILITYMAP_FORKNUM */ +	"init"						/* INIT_FORKNUM */ +}; + +StaticAssertDecl(lengthof(forkNames) == (MAX_FORKNUM + 1), +				 "array length mismatch"); + +/* + * forkname_to_number - look up fork number by name + * + * In backend, we throw an error for no match; in frontend, we just + * return InvalidForkNumber. + */ +ForkNumber +forkname_to_number(const char *forkName) +{ +	ForkNumber	forkNum; + +	for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) +		if (strcmp(forkNames[forkNum], forkName) == 0) +			return forkNum; + +#ifndef FRONTEND +	ereport(ERROR, +			(errcode(ERRCODE_INVALID_PARAMETER_VALUE), +			 errmsg("invalid fork name"), +			 errhint("Valid fork names are \"main\", \"fsm\", " +					 "\"vm\", and \"init\"."))); +#endif + +	return InvalidForkNumber; +} + +/* + * forkname_chars + *		We use this to figure out whether a filename could be a relation + *		fork (as opposed to an oddly named stray file that somehow ended + *		up in the database directory).  If the passed string begins with + *		a fork name (other than the main fork name), we return its length, + *		and set *fork (if not NULL) to the fork number.  If not, we return 0. + * + * Note that the present coding assumes that there are no fork names which + * are prefixes of other fork names. + */ +int +forkname_chars(const char *str, ForkNumber *fork) +{ +	ForkNumber	forkNum; + +	for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++) +	{ +		int			len = strlen(forkNames[forkNum]); + +		if (strncmp(forkNames[forkNum], str, len) == 0) +		{ +			if (fork) +				*fork = forkNum; +			return len; +		} +	} +	if (fork) +		*fork = InvalidForkNumber; +	return 0; +} + + +/* + * GetDatabasePath - construct path to a database directory + * + * Result is a palloc'd string. + * + * XXX this must agree with GetRelationPath()! + */ +char * +GetDatabasePath(Oid dbOid, Oid spcOid) +{ +	if (spcOid == GLOBALTABLESPACE_OID) +	{ +		/* Shared system relations live in {datadir}/global */ +		Assert(dbOid == 0); +		return pstrdup("global"); +	} +	else if (spcOid == DEFAULTTABLESPACE_OID) +	{ +		/* The default tablespace is {datadir}/base */ +		return psprintf("base/%u", dbOid); +	} +	else +	{ +		/* All other tablespaces are accessed via symlinks */ +		return psprintf("pg_tblspc/%u/%s/%u", +						spcOid, TABLESPACE_VERSION_DIRECTORY, dbOid); +	} +} + +/* + * GetRelationPath - construct path to a relation's file + * + * Result is a palloc'd string. + * + * Note: ideally, backendId would be declared as type BackendId, but relpath.h + * would have to include a backend-only header to do that; doesn't seem worth + * the trouble considering BackendId is just int anyway. + */ +char * +GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber, +				int backendId, ForkNumber forkNumber) +{ +	char	   *path; + +	if (spcOid == GLOBALTABLESPACE_OID) +	{ +		/* Shared system relations live in {datadir}/global */ +		Assert(dbOid == 0); +		Assert(backendId == InvalidBackendId); +		if (forkNumber != MAIN_FORKNUM) +			path = psprintf("global/%u_%s", +							relNumber, forkNames[forkNumber]); +		else +			path = psprintf("global/%u", relNumber); +	} +	else if (spcOid == DEFAULTTABLESPACE_OID) +	{ +		/* The default tablespace is {datadir}/base */ +		if (backendId == InvalidBackendId) +		{ +			if (forkNumber != MAIN_FORKNUM) +				path = psprintf("base/%u/%u_%s", +								dbOid, relNumber, +								forkNames[forkNumber]); +			else +				path = psprintf("base/%u/%u", +								dbOid, relNumber); +		} +		else +		{ +			if (forkNumber != MAIN_FORKNUM) +				path = psprintf("base/%u/t%d_%u_%s", +								dbOid, backendId, relNumber, +								forkNames[forkNumber]); +			else +				path = psprintf("base/%u/t%d_%u", +								dbOid, backendId, relNumber); +		} +	} +	else +	{ +		/* All other tablespaces are accessed via symlinks */ +		if (backendId == InvalidBackendId) +		{ +			if (forkNumber != MAIN_FORKNUM) +				path = psprintf("pg_tblspc/%u/%s/%u/%u_%s", +								spcOid, TABLESPACE_VERSION_DIRECTORY, +								dbOid, relNumber, +								forkNames[forkNumber]); +			else +				path = psprintf("pg_tblspc/%u/%s/%u/%u", +								spcOid, TABLESPACE_VERSION_DIRECTORY, +								dbOid, relNumber); +		} +		else +		{ +			if (forkNumber != MAIN_FORKNUM) +				path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s", +								spcOid, TABLESPACE_VERSION_DIRECTORY, +								dbOid, backendId, relNumber, +								forkNames[forkNumber]); +			else +				path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u", +								spcOid, TABLESPACE_VERSION_DIRECTORY, +								dbOid, backendId, relNumber); +		} +	} +	return path; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/rmtree.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/rmtree.c new file mode 100644 index 00000000000..78c5e93ad99 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/rmtree.c @@ -0,0 +1,130 @@ +/*------------------------------------------------------------------------- + * + * rmtree.c + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/rmtree.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> + +#include "common/file_utils.h" + +#ifndef FRONTEND +#include "storage/fd.h" +#define pg_log_warning(...) elog(WARNING, __VA_ARGS__) +#define LOG_LEVEL WARNING +#define OPENDIR(x) AllocateDir(x) +#define CLOSEDIR(x) FreeDir(x) +#else +#error #include "common/logging.h" +#define LOG_LEVEL PG_LOG_WARNING +#define OPENDIR(x) opendir(x) +#define CLOSEDIR(x) closedir(x) +#endif + +/* + *	rmtree + * + *	Delete a directory tree recursively. + *	Assumes path points to a valid directory. + *	Deletes everything under path. + *	If rmtopdir is true deletes the directory too. + *	Returns true if successful, false if there was any problem. + *	(The details of the problem are reported already, so caller + *	doesn't really have to say anything more, but most do.) + */ +bool +rmtree(const char *path, bool rmtopdir) +{ +	char		pathbuf[MAXPGPATH]; +	DIR		   *dir; +	struct dirent *de; +	bool		result = true; +	size_t		dirnames_size = 0; +	size_t		dirnames_capacity = 8; +	char	  **dirnames = palloc(sizeof(char *) * dirnames_capacity); + +	dir = OPENDIR(path); +	if (dir == NULL) +	{ +		pg_log_warning("could not open directory \"%s\": %m", path); +		return false; +	} + +	while (errno = 0, (de = readdir(dir))) +	{ +		if (strcmp(de->d_name, ".") == 0 || +			strcmp(de->d_name, "..") == 0) +			continue; +		snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name); +		switch (get_dirent_type(pathbuf, de, false, LOG_LEVEL)) +		{ +			case PGFILETYPE_ERROR: +				/* already logged, press on */ +				break; +			case PGFILETYPE_DIR: + +				/* +				 * Defer recursion until after we've closed this directory, to +				 * avoid using more than one file descriptor at a time. +				 */ +				if (dirnames_size == dirnames_capacity) +				{ +					dirnames = repalloc(dirnames, +										sizeof(char *) * dirnames_capacity * 2); +					dirnames_capacity *= 2; +				} +				dirnames[dirnames_size++] = pstrdup(pathbuf); +				break; +			default: +				if (unlink(pathbuf) != 0 && errno != ENOENT) +				{ +					pg_log_warning("could not remove file \"%s\": %m", pathbuf); +					result = false; +				} +				break; +		} +	} + +	if (errno != 0) +	{ +		pg_log_warning("could not read directory \"%s\": %m", path); +		result = false; +	} + +	CLOSEDIR(dir); + +	/* Now recurse into the subdirectories we found. */ +	for (size_t i = 0; i < dirnames_size; ++i) +	{ +		if (!rmtree(dirnames[i], true)) +			result = false; +		pfree(dirnames[i]); +	} + +	if (rmtopdir) +	{ +		if (rmdir(path) != 0) +		{ +			pg_log_warning("could not remove directory \"%s\": %m", path); +			result = false; +		} +	} + +	pfree(dirnames); + +	return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/ryu_common.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/ryu_common.h new file mode 100644 index 00000000000..ad850acf62f --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/ryu_common.h @@ -0,0 +1,133 @@ +/*--------------------------------------------------------------------------- + * + * Common routines for Ryu floating-point output. + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/ryu_common.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + *     (See accompanying file LICENSE-Apache or copy at + *      http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + *     (See accompanying file LICENSE-Boost or copy at + *      https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ +#ifndef RYU_COMMON_H +#define RYU_COMMON_H + +/* + * Upstream Ryu's output is always the shortest possible. But we adjust that + * slightly to improve portability: we avoid outputting the exact midpoint + * value between two representable floats, since that relies on the reader + * getting the round-to-even rule correct, which seems to be the common + * failure mode. + * + * Defining this to 1 would restore the upstream behavior. + */ +#define STRICTLY_SHORTEST 0 + +#if SIZEOF_SIZE_T < 8 +#define RYU_32_BIT_PLATFORM +#endif + +/*  Returns e == 0 ? 1 : ceil(log_2(5^e)). */ +static inline uint32 +pow5bits(const int32 e) +{ +	/* +	 * This approximation works up to the point that the multiplication +	 * overflows at e = 3529. +	 * +	 * If the multiplication were done in 64 bits, it would fail at 5^4004 +	 * which is just greater than 2^9297. +	 */ +	Assert(e >= 0); +	Assert(e <= 3528); +	return ((((uint32) e) * 1217359) >> 19) + 1; +} + +/*  Returns floor(log_10(2^e)). */ +static inline int32 +log10Pow2(const int32 e) +{ +	/* +	 * The first value this approximation fails for is 2^1651 which is just +	 * greater than 10^297. +	 */ +	Assert(e >= 0); +	Assert(e <= 1650); +	return (int32) ((((uint32) e) * 78913) >> 18); +} + +/*  Returns floor(log_10(5^e)). */ +static inline int32 +log10Pow5(const int32 e) +{ +	/* +	 * The first value this approximation fails for is 5^2621 which is just +	 * greater than 10^1832. +	 */ +	Assert(e >= 0); +	Assert(e <= 2620); +	return (int32) ((((uint32) e) * 732923) >> 20); +} + +static inline int +copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa) +{ +	if (mantissa) +	{ +		memcpy(result, "NaN", 3); +		return 3; +	} +	if (sign) +	{ +		result[0] = '-'; +	} +	if (exponent) +	{ +		memcpy(result + sign, "Infinity", 8); +		return sign + 8; +	} +	result[sign] = '0'; +	return sign + 1; +} + +static inline uint32 +float_to_bits(const float f) +{ +	uint32		bits = 0; + +	memcpy(&bits, &f, sizeof(float)); +	return bits; +} + +static inline uint64 +double_to_bits(const double d) +{ +	uint64		bits = 0; + +	memcpy(&bits, &d, sizeof(double)); +	return bits; +} + +#endif							/* RYU_COMMON_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c new file mode 100644 index 00000000000..3cf498866a5 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c @@ -0,0 +1,1245 @@ +/*------------------------------------------------------------------------- + * saslprep.c + *		SASLprep normalization, for SCRAM authentication + * + * The SASLprep algorithm is used to process a user-supplied password into + * canonical form.  For more details, see: + * + * [RFC3454] Preparation of Internationalized Strings ("stringprep"), + *	  http://www.ietf.org/rfc/rfc3454.txt + * + * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords + *	  http://www.ietf.org/rfc/rfc4013.txt + * + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/saslprep.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/saslprep.h" +#include "common/string.h" +#include "common/unicode_norm.h" +#include "mb/pg_wchar.h" + +/* + * In backend, we will use palloc/pfree.  In frontend, use malloc, and + * return SASLPREP_OOM on out-of-memory. + */ +#ifndef FRONTEND +#define STRDUP(s) pstrdup(s) +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define STRDUP(s) strdup(s) +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Prototypes for local functions */ +static int	codepoint_range_cmp(const void *a, const void *b); +static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); +static int	pg_utf8_string_len(const char *source); + +/* + * Stringprep Mapping Tables. + * + * The stringprep specification includes a number of tables of Unicode + * codepoints, used in different parts of the algorithm.  They are below, + * as arrays of codepoint ranges.  Each range is a pair of codepoints, + * for the first and last codepoint included the range (inclusive!). + */ + +/* + * C.1.2 Non-ASCII space characters + * + * These are all mapped to the ASCII space character (U+00A0). + */ +static const pg_wchar non_ascii_space_ranges[] = +{ +	0x00A0, 0x00A0, +	0x1680, 0x1680, +	0x2000, 0x200B, +	0x202F, 0x202F, +	0x205F, 0x205F, +	0x3000, 0x3000 +}; + +/* + * B.1 Commonly mapped to nothing + * + * If any of these appear in the input, they are removed. + */ +static const pg_wchar commonly_mapped_to_nothing_ranges[] = +{ +	0x00AD, 0x00AD, +	0x034F, 0x034F, +	0x1806, 0x1806, +	0x180B, 0x180D, +	0x200B, 0x200D, +	0x2060, 0x2060, +	0xFE00, 0xFE0F, +	0xFEFF, 0xFEFF +}; + +/* + * prohibited_output_ranges is a union of all the characters from + * the following tables: + * + * C.1.2 Non-ASCII space characters + * C.2.1 ASCII control characters + * C.2.2 Non-ASCII control characters + * C.3 Private Use characters + * C.4 Non-character code points + * C.5 Surrogate code points + * C.6 Inappropriate for plain text characters + * C.7 Inappropriate for canonical representation characters + * C.7 Change display properties or deprecated characters + * C.8 Tagging characters + * + * These are the tables that are listed as "prohibited output" + * characters in the SASLprep profile. + * + * The comment after each code range indicates which source table + * the code came from.  Note that there is some overlap in the source + * tables, so one code might originate from multiple source tables. + * Adjacent ranges have also been merged together, to save space. + */ +static const pg_wchar prohibited_output_ranges[] = +{ +	0x0000, 0x001F,				/* C.2.1 */ +	0x007F, 0x00A0,				/* C.1.2, C.2.1, C.2.2 */ +	0x0340, 0x0341,				/* C.8 */ +	0x06DD, 0x06DD,				/* C.2.2 */ +	0x070F, 0x070F,				/* C.2.2 */ +	0x1680, 0x1680,				/* C.1.2 */ +	0x180E, 0x180E,				/* C.2.2 */ +	0x2000, 0x200F,				/* C.1.2, C.2.2, C.8 */ +	0x2028, 0x202F,				/* C.1.2, C.2.2, C.8 */ +	0x205F, 0x2063,				/* C.1.2, C.2.2 */ +	0x206A, 0x206F,				/* C.2.2, C.8 */ +	0x2FF0, 0x2FFB,				/* C.7 */ +	0x3000, 0x3000,				/* C.1.2 */ +	0xD800, 0xF8FF,				/* C.3, C.5 */ +	0xFDD0, 0xFDEF,				/* C.4 */ +	0xFEFF, 0xFEFF,				/* C.2.2 */ +	0xFFF9, 0xFFFF,				/* C.2.2, C.4, C.6 */ +	0x1D173, 0x1D17A,			/* C.2.2 */ +	0x1FFFE, 0x1FFFF,			/* C.4 */ +	0x2FFFE, 0x2FFFF,			/* C.4 */ +	0x3FFFE, 0x3FFFF,			/* C.4 */ +	0x4FFFE, 0x4FFFF,			/* C.4 */ +	0x5FFFE, 0x5FFFF,			/* C.4 */ +	0x6FFFE, 0x6FFFF,			/* C.4 */ +	0x7FFFE, 0x7FFFF,			/* C.4 */ +	0x8FFFE, 0x8FFFF,			/* C.4 */ +	0x9FFFE, 0x9FFFF,			/* C.4 */ +	0xAFFFE, 0xAFFFF,			/* C.4 */ +	0xBFFFE, 0xBFFFF,			/* C.4 */ +	0xCFFFE, 0xCFFFF,			/* C.4 */ +	0xDFFFE, 0xDFFFF,			/* C.4 */ +	0xE0001, 0xE0001,			/* C.9 */ +	0xE0020, 0xE007F,			/* C.9 */ +	0xEFFFE, 0xEFFFF,			/* C.4 */ +	0xF0000, 0xFFFFF,			/* C.3, C.4 */ +	0x100000, 0x10FFFF			/* C.3, C.4 */ +}; + +/* A.1 Unassigned code points in Unicode 3.2 */ +static const pg_wchar unassigned_codepoint_ranges[] = +{ +	0x0221, 0x0221, +	0x0234, 0x024F, +	0x02AE, 0x02AF, +	0x02EF, 0x02FF, +	0x0350, 0x035F, +	0x0370, 0x0373, +	0x0376, 0x0379, +	0x037B, 0x037D, +	0x037F, 0x0383, +	0x038B, 0x038B, +	0x038D, 0x038D, +	0x03A2, 0x03A2, +	0x03CF, 0x03CF, +	0x03F7, 0x03FF, +	0x0487, 0x0487, +	0x04CF, 0x04CF, +	0x04F6, 0x04F7, +	0x04FA, 0x04FF, +	0x0510, 0x0530, +	0x0557, 0x0558, +	0x0560, 0x0560, +	0x0588, 0x0588, +	0x058B, 0x0590, +	0x05A2, 0x05A2, +	0x05BA, 0x05BA, +	0x05C5, 0x05CF, +	0x05EB, 0x05EF, +	0x05F5, 0x060B, +	0x060D, 0x061A, +	0x061C, 0x061E, +	0x0620, 0x0620, +	0x063B, 0x063F, +	0x0656, 0x065F, +	0x06EE, 0x06EF, +	0x06FF, 0x06FF, +	0x070E, 0x070E, +	0x072D, 0x072F, +	0x074B, 0x077F, +	0x07B2, 0x0900, +	0x0904, 0x0904, +	0x093A, 0x093B, +	0x094E, 0x094F, +	0x0955, 0x0957, +	0x0971, 0x0980, +	0x0984, 0x0984, +	0x098D, 0x098E, +	0x0991, 0x0992, +	0x09A9, 0x09A9, +	0x09B1, 0x09B1, +	0x09B3, 0x09B5, +	0x09BA, 0x09BB, +	0x09BD, 0x09BD, +	0x09C5, 0x09C6, +	0x09C9, 0x09CA, +	0x09CE, 0x09D6, +	0x09D8, 0x09DB, +	0x09DE, 0x09DE, +	0x09E4, 0x09E5, +	0x09FB, 0x0A01, +	0x0A03, 0x0A04, +	0x0A0B, 0x0A0E, +	0x0A11, 0x0A12, +	0x0A29, 0x0A29, +	0x0A31, 0x0A31, +	0x0A34, 0x0A34, +	0x0A37, 0x0A37, +	0x0A3A, 0x0A3B, +	0x0A3D, 0x0A3D, +	0x0A43, 0x0A46, +	0x0A49, 0x0A4A, +	0x0A4E, 0x0A58, +	0x0A5D, 0x0A5D, +	0x0A5F, 0x0A65, +	0x0A75, 0x0A80, +	0x0A84, 0x0A84, +	0x0A8C, 0x0A8C, +	0x0A8E, 0x0A8E, +	0x0A92, 0x0A92, +	0x0AA9, 0x0AA9, +	0x0AB1, 0x0AB1, +	0x0AB4, 0x0AB4, +	0x0ABA, 0x0ABB, +	0x0AC6, 0x0AC6, +	0x0ACA, 0x0ACA, +	0x0ACE, 0x0ACF, +	0x0AD1, 0x0ADF, +	0x0AE1, 0x0AE5, +	0x0AF0, 0x0B00, +	0x0B04, 0x0B04, +	0x0B0D, 0x0B0E, +	0x0B11, 0x0B12, +	0x0B29, 0x0B29, +	0x0B31, 0x0B31, +	0x0B34, 0x0B35, +	0x0B3A, 0x0B3B, +	0x0B44, 0x0B46, +	0x0B49, 0x0B4A, +	0x0B4E, 0x0B55, +	0x0B58, 0x0B5B, +	0x0B5E, 0x0B5E, +	0x0B62, 0x0B65, +	0x0B71, 0x0B81, +	0x0B84, 0x0B84, +	0x0B8B, 0x0B8D, +	0x0B91, 0x0B91, +	0x0B96, 0x0B98, +	0x0B9B, 0x0B9B, +	0x0B9D, 0x0B9D, +	0x0BA0, 0x0BA2, +	0x0BA5, 0x0BA7, +	0x0BAB, 0x0BAD, +	0x0BB6, 0x0BB6, +	0x0BBA, 0x0BBD, +	0x0BC3, 0x0BC5, +	0x0BC9, 0x0BC9, +	0x0BCE, 0x0BD6, +	0x0BD8, 0x0BE6, +	0x0BF3, 0x0C00, +	0x0C04, 0x0C04, +	0x0C0D, 0x0C0D, +	0x0C11, 0x0C11, +	0x0C29, 0x0C29, +	0x0C34, 0x0C34, +	0x0C3A, 0x0C3D, +	0x0C45, 0x0C45, +	0x0C49, 0x0C49, +	0x0C4E, 0x0C54, +	0x0C57, 0x0C5F, +	0x0C62, 0x0C65, +	0x0C70, 0x0C81, +	0x0C84, 0x0C84, +	0x0C8D, 0x0C8D, +	0x0C91, 0x0C91, +	0x0CA9, 0x0CA9, +	0x0CB4, 0x0CB4, +	0x0CBA, 0x0CBD, +	0x0CC5, 0x0CC5, +	0x0CC9, 0x0CC9, +	0x0CCE, 0x0CD4, +	0x0CD7, 0x0CDD, +	0x0CDF, 0x0CDF, +	0x0CE2, 0x0CE5, +	0x0CF0, 0x0D01, +	0x0D04, 0x0D04, +	0x0D0D, 0x0D0D, +	0x0D11, 0x0D11, +	0x0D29, 0x0D29, +	0x0D3A, 0x0D3D, +	0x0D44, 0x0D45, +	0x0D49, 0x0D49, +	0x0D4E, 0x0D56, +	0x0D58, 0x0D5F, +	0x0D62, 0x0D65, +	0x0D70, 0x0D81, +	0x0D84, 0x0D84, +	0x0D97, 0x0D99, +	0x0DB2, 0x0DB2, +	0x0DBC, 0x0DBC, +	0x0DBE, 0x0DBF, +	0x0DC7, 0x0DC9, +	0x0DCB, 0x0DCE, +	0x0DD5, 0x0DD5, +	0x0DD7, 0x0DD7, +	0x0DE0, 0x0DF1, +	0x0DF5, 0x0E00, +	0x0E3B, 0x0E3E, +	0x0E5C, 0x0E80, +	0x0E83, 0x0E83, +	0x0E85, 0x0E86, +	0x0E89, 0x0E89, +	0x0E8B, 0x0E8C, +	0x0E8E, 0x0E93, +	0x0E98, 0x0E98, +	0x0EA0, 0x0EA0, +	0x0EA4, 0x0EA4, +	0x0EA6, 0x0EA6, +	0x0EA8, 0x0EA9, +	0x0EAC, 0x0EAC, +	0x0EBA, 0x0EBA, +	0x0EBE, 0x0EBF, +	0x0EC5, 0x0EC5, +	0x0EC7, 0x0EC7, +	0x0ECE, 0x0ECF, +	0x0EDA, 0x0EDB, +	0x0EDE, 0x0EFF, +	0x0F48, 0x0F48, +	0x0F6B, 0x0F70, +	0x0F8C, 0x0F8F, +	0x0F98, 0x0F98, +	0x0FBD, 0x0FBD, +	0x0FCD, 0x0FCE, +	0x0FD0, 0x0FFF, +	0x1022, 0x1022, +	0x1028, 0x1028, +	0x102B, 0x102B, +	0x1033, 0x1035, +	0x103A, 0x103F, +	0x105A, 0x109F, +	0x10C6, 0x10CF, +	0x10F9, 0x10FA, +	0x10FC, 0x10FF, +	0x115A, 0x115E, +	0x11A3, 0x11A7, +	0x11FA, 0x11FF, +	0x1207, 0x1207, +	0x1247, 0x1247, +	0x1249, 0x1249, +	0x124E, 0x124F, +	0x1257, 0x1257, +	0x1259, 0x1259, +	0x125E, 0x125F, +	0x1287, 0x1287, +	0x1289, 0x1289, +	0x128E, 0x128F, +	0x12AF, 0x12AF, +	0x12B1, 0x12B1, +	0x12B6, 0x12B7, +	0x12BF, 0x12BF, +	0x12C1, 0x12C1, +	0x12C6, 0x12C7, +	0x12CF, 0x12CF, +	0x12D7, 0x12D7, +	0x12EF, 0x12EF, +	0x130F, 0x130F, +	0x1311, 0x1311, +	0x1316, 0x1317, +	0x131F, 0x131F, +	0x1347, 0x1347, +	0x135B, 0x1360, +	0x137D, 0x139F, +	0x13F5, 0x1400, +	0x1677, 0x167F, +	0x169D, 0x169F, +	0x16F1, 0x16FF, +	0x170D, 0x170D, +	0x1715, 0x171F, +	0x1737, 0x173F, +	0x1754, 0x175F, +	0x176D, 0x176D, +	0x1771, 0x1771, +	0x1774, 0x177F, +	0x17DD, 0x17DF, +	0x17EA, 0x17FF, +	0x180F, 0x180F, +	0x181A, 0x181F, +	0x1878, 0x187F, +	0x18AA, 0x1DFF, +	0x1E9C, 0x1E9F, +	0x1EFA, 0x1EFF, +	0x1F16, 0x1F17, +	0x1F1E, 0x1F1F, +	0x1F46, 0x1F47, +	0x1F4E, 0x1F4F, +	0x1F58, 0x1F58, +	0x1F5A, 0x1F5A, +	0x1F5C, 0x1F5C, +	0x1F5E, 0x1F5E, +	0x1F7E, 0x1F7F, +	0x1FB5, 0x1FB5, +	0x1FC5, 0x1FC5, +	0x1FD4, 0x1FD5, +	0x1FDC, 0x1FDC, +	0x1FF0, 0x1FF1, +	0x1FF5, 0x1FF5, +	0x1FFF, 0x1FFF, +	0x2053, 0x2056, +	0x2058, 0x205E, +	0x2064, 0x2069, +	0x2072, 0x2073, +	0x208F, 0x209F, +	0x20B2, 0x20CF, +	0x20EB, 0x20FF, +	0x213B, 0x213C, +	0x214C, 0x2152, +	0x2184, 0x218F, +	0x23CF, 0x23FF, +	0x2427, 0x243F, +	0x244B, 0x245F, +	0x24FF, 0x24FF, +	0x2614, 0x2615, +	0x2618, 0x2618, +	0x267E, 0x267F, +	0x268A, 0x2700, +	0x2705, 0x2705, +	0x270A, 0x270B, +	0x2728, 0x2728, +	0x274C, 0x274C, +	0x274E, 0x274E, +	0x2753, 0x2755, +	0x2757, 0x2757, +	0x275F, 0x2760, +	0x2795, 0x2797, +	0x27B0, 0x27B0, +	0x27BF, 0x27CF, +	0x27EC, 0x27EF, +	0x2B00, 0x2E7F, +	0x2E9A, 0x2E9A, +	0x2EF4, 0x2EFF, +	0x2FD6, 0x2FEF, +	0x2FFC, 0x2FFF, +	0x3040, 0x3040, +	0x3097, 0x3098, +	0x3100, 0x3104, +	0x312D, 0x3130, +	0x318F, 0x318F, +	0x31B8, 0x31EF, +	0x321D, 0x321F, +	0x3244, 0x3250, +	0x327C, 0x327E, +	0x32CC, 0x32CF, +	0x32FF, 0x32FF, +	0x3377, 0x337A, +	0x33DE, 0x33DF, +	0x33FF, 0x33FF, +	0x4DB6, 0x4DFF, +	0x9FA6, 0x9FFF, +	0xA48D, 0xA48F, +	0xA4C7, 0xABFF, +	0xD7A4, 0xD7FF, +	0xFA2E, 0xFA2F, +	0xFA6B, 0xFAFF, +	0xFB07, 0xFB12, +	0xFB18, 0xFB1C, +	0xFB37, 0xFB37, +	0xFB3D, 0xFB3D, +	0xFB3F, 0xFB3F, +	0xFB42, 0xFB42, +	0xFB45, 0xFB45, +	0xFBB2, 0xFBD2, +	0xFD40, 0xFD4F, +	0xFD90, 0xFD91, +	0xFDC8, 0xFDCF, +	0xFDFD, 0xFDFF, +	0xFE10, 0xFE1F, +	0xFE24, 0xFE2F, +	0xFE47, 0xFE48, +	0xFE53, 0xFE53, +	0xFE67, 0xFE67, +	0xFE6C, 0xFE6F, +	0xFE75, 0xFE75, +	0xFEFD, 0xFEFE, +	0xFF00, 0xFF00, +	0xFFBF, 0xFFC1, +	0xFFC8, 0xFFC9, +	0xFFD0, 0xFFD1, +	0xFFD8, 0xFFD9, +	0xFFDD, 0xFFDF, +	0xFFE7, 0xFFE7, +	0xFFEF, 0xFFF8, +	0x10000, 0x102FF, +	0x1031F, 0x1031F, +	0x10324, 0x1032F, +	0x1034B, 0x103FF, +	0x10426, 0x10427, +	0x1044E, 0x1CFFF, +	0x1D0F6, 0x1D0FF, +	0x1D127, 0x1D129, +	0x1D1DE, 0x1D3FF, +	0x1D455, 0x1D455, +	0x1D49D, 0x1D49D, +	0x1D4A0, 0x1D4A1, +	0x1D4A3, 0x1D4A4, +	0x1D4A7, 0x1D4A8, +	0x1D4AD, 0x1D4AD, +	0x1D4BA, 0x1D4BA, +	0x1D4BC, 0x1D4BC, +	0x1D4C1, 0x1D4C1, +	0x1D4C4, 0x1D4C4, +	0x1D506, 0x1D506, +	0x1D50B, 0x1D50C, +	0x1D515, 0x1D515, +	0x1D51D, 0x1D51D, +	0x1D53A, 0x1D53A, +	0x1D53F, 0x1D53F, +	0x1D545, 0x1D545, +	0x1D547, 0x1D549, +	0x1D551, 0x1D551, +	0x1D6A4, 0x1D6A7, +	0x1D7CA, 0x1D7CD, +	0x1D800, 0x1FFFD, +	0x2A6D7, 0x2F7FF, +	0x2FA1E, 0x2FFFD, +	0x30000, 0x3FFFD, +	0x40000, 0x4FFFD, +	0x50000, 0x5FFFD, +	0x60000, 0x6FFFD, +	0x70000, 0x7FFFD, +	0x80000, 0x8FFFD, +	0x90000, 0x9FFFD, +	0xA0000, 0xAFFFD, +	0xB0000, 0xBFFFD, +	0xC0000, 0xCFFFD, +	0xD0000, 0xDFFFD, +	0xE0000, 0xE0000, +	0xE0002, 0xE001F, +	0xE0080, 0xEFFFD +}; + +/* D.1 Characters with bidirectional property "R" or "AL" */ +static const pg_wchar RandALCat_codepoint_ranges[] = +{ +	0x05BE, 0x05BE, +	0x05C0, 0x05C0, +	0x05C3, 0x05C3, +	0x05D0, 0x05EA, +	0x05F0, 0x05F4, +	0x061B, 0x061B, +	0x061F, 0x061F, +	0x0621, 0x063A, +	0x0640, 0x064A, +	0x066D, 0x066F, +	0x0671, 0x06D5, +	0x06DD, 0x06DD, +	0x06E5, 0x06E6, +	0x06FA, 0x06FE, +	0x0700, 0x070D, +	0x0710, 0x0710, +	0x0712, 0x072C, +	0x0780, 0x07A5, +	0x07B1, 0x07B1, +	0x200F, 0x200F, +	0xFB1D, 0xFB1D, +	0xFB1F, 0xFB28, +	0xFB2A, 0xFB36, +	0xFB38, 0xFB3C, +	0xFB3E, 0xFB3E, +	0xFB40, 0xFB41, +	0xFB43, 0xFB44, +	0xFB46, 0xFBB1, +	0xFBD3, 0xFD3D, +	0xFD50, 0xFD8F, +	0xFD92, 0xFDC7, +	0xFDF0, 0xFDFC, +	0xFE70, 0xFE74, +	0xFE76, 0xFEFC +}; + +/* D.2 Characters with bidirectional property "L" */ +static const pg_wchar LCat_codepoint_ranges[] = +{ +	0x0041, 0x005A, +	0x0061, 0x007A, +	0x00AA, 0x00AA, +	0x00B5, 0x00B5, +	0x00BA, 0x00BA, +	0x00C0, 0x00D6, +	0x00D8, 0x00F6, +	0x00F8, 0x0220, +	0x0222, 0x0233, +	0x0250, 0x02AD, +	0x02B0, 0x02B8, +	0x02BB, 0x02C1, +	0x02D0, 0x02D1, +	0x02E0, 0x02E4, +	0x02EE, 0x02EE, +	0x037A, 0x037A, +	0x0386, 0x0386, +	0x0388, 0x038A, +	0x038C, 0x038C, +	0x038E, 0x03A1, +	0x03A3, 0x03CE, +	0x03D0, 0x03F5, +	0x0400, 0x0482, +	0x048A, 0x04CE, +	0x04D0, 0x04F5, +	0x04F8, 0x04F9, +	0x0500, 0x050F, +	0x0531, 0x0556, +	0x0559, 0x055F, +	0x0561, 0x0587, +	0x0589, 0x0589, +	0x0903, 0x0903, +	0x0905, 0x0939, +	0x093D, 0x0940, +	0x0949, 0x094C, +	0x0950, 0x0950, +	0x0958, 0x0961, +	0x0964, 0x0970, +	0x0982, 0x0983, +	0x0985, 0x098C, +	0x098F, 0x0990, +	0x0993, 0x09A8, +	0x09AA, 0x09B0, +	0x09B2, 0x09B2, +	0x09B6, 0x09B9, +	0x09BE, 0x09C0, +	0x09C7, 0x09C8, +	0x09CB, 0x09CC, +	0x09D7, 0x09D7, +	0x09DC, 0x09DD, +	0x09DF, 0x09E1, +	0x09E6, 0x09F1, +	0x09F4, 0x09FA, +	0x0A05, 0x0A0A, +	0x0A0F, 0x0A10, +	0x0A13, 0x0A28, +	0x0A2A, 0x0A30, +	0x0A32, 0x0A33, +	0x0A35, 0x0A36, +	0x0A38, 0x0A39, +	0x0A3E, 0x0A40, +	0x0A59, 0x0A5C, +	0x0A5E, 0x0A5E, +	0x0A66, 0x0A6F, +	0x0A72, 0x0A74, +	0x0A83, 0x0A83, +	0x0A85, 0x0A8B, +	0x0A8D, 0x0A8D, +	0x0A8F, 0x0A91, +	0x0A93, 0x0AA8, +	0x0AAA, 0x0AB0, +	0x0AB2, 0x0AB3, +	0x0AB5, 0x0AB9, +	0x0ABD, 0x0AC0, +	0x0AC9, 0x0AC9, +	0x0ACB, 0x0ACC, +	0x0AD0, 0x0AD0, +	0x0AE0, 0x0AE0, +	0x0AE6, 0x0AEF, +	0x0B02, 0x0B03, +	0x0B05, 0x0B0C, +	0x0B0F, 0x0B10, +	0x0B13, 0x0B28, +	0x0B2A, 0x0B30, +	0x0B32, 0x0B33, +	0x0B36, 0x0B39, +	0x0B3D, 0x0B3E, +	0x0B40, 0x0B40, +	0x0B47, 0x0B48, +	0x0B4B, 0x0B4C, +	0x0B57, 0x0B57, +	0x0B5C, 0x0B5D, +	0x0B5F, 0x0B61, +	0x0B66, 0x0B70, +	0x0B83, 0x0B83, +	0x0B85, 0x0B8A, +	0x0B8E, 0x0B90, +	0x0B92, 0x0B95, +	0x0B99, 0x0B9A, +	0x0B9C, 0x0B9C, +	0x0B9E, 0x0B9F, +	0x0BA3, 0x0BA4, +	0x0BA8, 0x0BAA, +	0x0BAE, 0x0BB5, +	0x0BB7, 0x0BB9, +	0x0BBE, 0x0BBF, +	0x0BC1, 0x0BC2, +	0x0BC6, 0x0BC8, +	0x0BCA, 0x0BCC, +	0x0BD7, 0x0BD7, +	0x0BE7, 0x0BF2, +	0x0C01, 0x0C03, +	0x0C05, 0x0C0C, +	0x0C0E, 0x0C10, +	0x0C12, 0x0C28, +	0x0C2A, 0x0C33, +	0x0C35, 0x0C39, +	0x0C41, 0x0C44, +	0x0C60, 0x0C61, +	0x0C66, 0x0C6F, +	0x0C82, 0x0C83, +	0x0C85, 0x0C8C, +	0x0C8E, 0x0C90, +	0x0C92, 0x0CA8, +	0x0CAA, 0x0CB3, +	0x0CB5, 0x0CB9, +	0x0CBE, 0x0CBE, +	0x0CC0, 0x0CC4, +	0x0CC7, 0x0CC8, +	0x0CCA, 0x0CCB, +	0x0CD5, 0x0CD6, +	0x0CDE, 0x0CDE, +	0x0CE0, 0x0CE1, +	0x0CE6, 0x0CEF, +	0x0D02, 0x0D03, +	0x0D05, 0x0D0C, +	0x0D0E, 0x0D10, +	0x0D12, 0x0D28, +	0x0D2A, 0x0D39, +	0x0D3E, 0x0D40, +	0x0D46, 0x0D48, +	0x0D4A, 0x0D4C, +	0x0D57, 0x0D57, +	0x0D60, 0x0D61, +	0x0D66, 0x0D6F, +	0x0D82, 0x0D83, +	0x0D85, 0x0D96, +	0x0D9A, 0x0DB1, +	0x0DB3, 0x0DBB, +	0x0DBD, 0x0DBD, +	0x0DC0, 0x0DC6, +	0x0DCF, 0x0DD1, +	0x0DD8, 0x0DDF, +	0x0DF2, 0x0DF4, +	0x0E01, 0x0E30, +	0x0E32, 0x0E33, +	0x0E40, 0x0E46, +	0x0E4F, 0x0E5B, +	0x0E81, 0x0E82, +	0x0E84, 0x0E84, +	0x0E87, 0x0E88, +	0x0E8A, 0x0E8A, +	0x0E8D, 0x0E8D, +	0x0E94, 0x0E97, +	0x0E99, 0x0E9F, +	0x0EA1, 0x0EA3, +	0x0EA5, 0x0EA5, +	0x0EA7, 0x0EA7, +	0x0EAA, 0x0EAB, +	0x0EAD, 0x0EB0, +	0x0EB2, 0x0EB3, +	0x0EBD, 0x0EBD, +	0x0EC0, 0x0EC4, +	0x0EC6, 0x0EC6, +	0x0ED0, 0x0ED9, +	0x0EDC, 0x0EDD, +	0x0F00, 0x0F17, +	0x0F1A, 0x0F34, +	0x0F36, 0x0F36, +	0x0F38, 0x0F38, +	0x0F3E, 0x0F47, +	0x0F49, 0x0F6A, +	0x0F7F, 0x0F7F, +	0x0F85, 0x0F85, +	0x0F88, 0x0F8B, +	0x0FBE, 0x0FC5, +	0x0FC7, 0x0FCC, +	0x0FCF, 0x0FCF, +	0x1000, 0x1021, +	0x1023, 0x1027, +	0x1029, 0x102A, +	0x102C, 0x102C, +	0x1031, 0x1031, +	0x1038, 0x1038, +	0x1040, 0x1057, +	0x10A0, 0x10C5, +	0x10D0, 0x10F8, +	0x10FB, 0x10FB, +	0x1100, 0x1159, +	0x115F, 0x11A2, +	0x11A8, 0x11F9, +	0x1200, 0x1206, +	0x1208, 0x1246, +	0x1248, 0x1248, +	0x124A, 0x124D, +	0x1250, 0x1256, +	0x1258, 0x1258, +	0x125A, 0x125D, +	0x1260, 0x1286, +	0x1288, 0x1288, +	0x128A, 0x128D, +	0x1290, 0x12AE, +	0x12B0, 0x12B0, +	0x12B2, 0x12B5, +	0x12B8, 0x12BE, +	0x12C0, 0x12C0, +	0x12C2, 0x12C5, +	0x12C8, 0x12CE, +	0x12D0, 0x12D6, +	0x12D8, 0x12EE, +	0x12F0, 0x130E, +	0x1310, 0x1310, +	0x1312, 0x1315, +	0x1318, 0x131E, +	0x1320, 0x1346, +	0x1348, 0x135A, +	0x1361, 0x137C, +	0x13A0, 0x13F4, +	0x1401, 0x1676, +	0x1681, 0x169A, +	0x16A0, 0x16F0, +	0x1700, 0x170C, +	0x170E, 0x1711, +	0x1720, 0x1731, +	0x1735, 0x1736, +	0x1740, 0x1751, +	0x1760, 0x176C, +	0x176E, 0x1770, +	0x1780, 0x17B6, +	0x17BE, 0x17C5, +	0x17C7, 0x17C8, +	0x17D4, 0x17DA, +	0x17DC, 0x17DC, +	0x17E0, 0x17E9, +	0x1810, 0x1819, +	0x1820, 0x1877, +	0x1880, 0x18A8, +	0x1E00, 0x1E9B, +	0x1EA0, 0x1EF9, +	0x1F00, 0x1F15, +	0x1F18, 0x1F1D, +	0x1F20, 0x1F45, +	0x1F48, 0x1F4D, +	0x1F50, 0x1F57, +	0x1F59, 0x1F59, +	0x1F5B, 0x1F5B, +	0x1F5D, 0x1F5D, +	0x1F5F, 0x1F7D, +	0x1F80, 0x1FB4, +	0x1FB6, 0x1FBC, +	0x1FBE, 0x1FBE, +	0x1FC2, 0x1FC4, +	0x1FC6, 0x1FCC, +	0x1FD0, 0x1FD3, +	0x1FD6, 0x1FDB, +	0x1FE0, 0x1FEC, +	0x1FF2, 0x1FF4, +	0x1FF6, 0x1FFC, +	0x200E, 0x200E, +	0x2071, 0x2071, +	0x207F, 0x207F, +	0x2102, 0x2102, +	0x2107, 0x2107, +	0x210A, 0x2113, +	0x2115, 0x2115, +	0x2119, 0x211D, +	0x2124, 0x2124, +	0x2126, 0x2126, +	0x2128, 0x2128, +	0x212A, 0x212D, +	0x212F, 0x2131, +	0x2133, 0x2139, +	0x213D, 0x213F, +	0x2145, 0x2149, +	0x2160, 0x2183, +	0x2336, 0x237A, +	0x2395, 0x2395, +	0x249C, 0x24E9, +	0x3005, 0x3007, +	0x3021, 0x3029, +	0x3031, 0x3035, +	0x3038, 0x303C, +	0x3041, 0x3096, +	0x309D, 0x309F, +	0x30A1, 0x30FA, +	0x30FC, 0x30FF, +	0x3105, 0x312C, +	0x3131, 0x318E, +	0x3190, 0x31B7, +	0x31F0, 0x321C, +	0x3220, 0x3243, +	0x3260, 0x327B, +	0x327F, 0x32B0, +	0x32C0, 0x32CB, +	0x32D0, 0x32FE, +	0x3300, 0x3376, +	0x337B, 0x33DD, +	0x33E0, 0x33FE, +	0x3400, 0x4DB5, +	0x4E00, 0x9FA5, +	0xA000, 0xA48C, +	0xAC00, 0xD7A3, +	0xD800, 0xFA2D, +	0xFA30, 0xFA6A, +	0xFB00, 0xFB06, +	0xFB13, 0xFB17, +	0xFF21, 0xFF3A, +	0xFF41, 0xFF5A, +	0xFF66, 0xFFBE, +	0xFFC2, 0xFFC7, +	0xFFCA, 0xFFCF, +	0xFFD2, 0xFFD7, +	0xFFDA, 0xFFDC, +	0x10300, 0x1031E, +	0x10320, 0x10323, +	0x10330, 0x1034A, +	0x10400, 0x10425, +	0x10428, 0x1044D, +	0x1D000, 0x1D0F5, +	0x1D100, 0x1D126, +	0x1D12A, 0x1D166, +	0x1D16A, 0x1D172, +	0x1D183, 0x1D184, +	0x1D18C, 0x1D1A9, +	0x1D1AE, 0x1D1DD, +	0x1D400, 0x1D454, +	0x1D456, 0x1D49C, +	0x1D49E, 0x1D49F, +	0x1D4A2, 0x1D4A2, +	0x1D4A5, 0x1D4A6, +	0x1D4A9, 0x1D4AC, +	0x1D4AE, 0x1D4B9, +	0x1D4BB, 0x1D4BB, +	0x1D4BD, 0x1D4C0, +	0x1D4C2, 0x1D4C3, +	0x1D4C5, 0x1D505, +	0x1D507, 0x1D50A, +	0x1D50D, 0x1D514, +	0x1D516, 0x1D51C, +	0x1D51E, 0x1D539, +	0x1D53B, 0x1D53E, +	0x1D540, 0x1D544, +	0x1D546, 0x1D546, +	0x1D54A, 0x1D550, +	0x1D552, 0x1D6A3, +	0x1D6A8, 0x1D7C9, +	0x20000, 0x2A6D6, +	0x2F800, 0x2FA1D, +	0xF0000, 0xFFFFD, +	0x100000, 0x10FFFD +}; + +/* End of stringprep tables */ + + +/* Is the given Unicode codepoint in the given table of ranges? */ +#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map)) + +static int +codepoint_range_cmp(const void *a, const void *b) +{ +	const pg_wchar *key = (const pg_wchar *) a; +	const pg_wchar *range = (const pg_wchar *) b; + +	if (*key < range[0]) +		return -1;				/* less than lower bound */ +	if (*key > range[1]) +		return 1;				/* greater than upper bound */ + +	return 0;					/* within range */ +} + +static bool +is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize) +{ +	Assert(mapsize % 2 == 0); + +	if (code < map[0] || code > map[mapsize - 1]) +		return false; + +	if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2, +				codepoint_range_cmp)) +		return true; +	else +		return false; +} + +/* + * Calculate the length in characters of a null-terminated UTF-8 string. + * + * Returns -1 if the input is not valid UTF-8. + */ +static int +pg_utf8_string_len(const char *source) +{ +	const unsigned char *p = (const unsigned char *) source; +	int			l; +	int			num_chars = 0; + +	while (*p) +	{ +		l = pg_utf_mblen(p); + +		if (!pg_utf8_islegal(p, l)) +			return -1; + +		p += l; +		num_chars++; +	} + +	return num_chars; +} + + +/* + * pg_saslprep - Normalize a password with SASLprep. + * + * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL + * supports many encodings, so we don't blindly assume that.  pg_saslprep + * will check if the input looks like valid UTF-8, and returns + * SASLPREP_INVALID_UTF8 if not. + * + * If the string contains prohibited characters (or more precisely, if the + * output string would contain prohibited characters after normalization), + * returns SASLPREP_PROHIBITED. + * + * On success, returns SASLPREP_SUCCESS, and the normalized string in + * *output. + * + * In frontend, the normalized string is malloc'd, and the caller is + * responsible for freeing it.  If an allocation fails, returns + * SASLPREP_OOM.  In backend, the normalized string is palloc'd instead, + * and a failed allocation leads to ereport(ERROR). + */ +pg_saslprep_rc +pg_saslprep(const char *input, char **output) +{ +	pg_wchar   *input_chars = NULL; +	pg_wchar   *output_chars = NULL; +	int			input_size; +	char	   *result; +	int			result_size; +	int			count; +	int			i; +	bool		contains_RandALCat; +	unsigned char *p; +	pg_wchar   *wp; + +	/* Ensure we return *output as NULL on failure */ +	*output = NULL; + +	/* +	 * Quick check if the input is pure ASCII.  An ASCII string requires no +	 * further processing. +	 */ +	if (pg_is_ascii(input)) +	{ +		*output = STRDUP(input); +		if (!(*output)) +			goto oom; +		return SASLPREP_SUCCESS; +	} + +	/* +	 * Convert the input from UTF-8 to an array of Unicode codepoints. +	 * +	 * This also checks that the input is a legal UTF-8 string. +	 */ +	input_size = pg_utf8_string_len(input); +	if (input_size < 0) +		return SASLPREP_INVALID_UTF8; + +	input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar)); +	if (!input_chars) +		goto oom; + +	p = (unsigned char *) input; +	for (i = 0; i < input_size; i++) +	{ +		input_chars[i] = utf8_to_unicode(p); +		p += pg_utf_mblen(p); +	} +	input_chars[i] = (pg_wchar) '\0'; + +	/* +	 * The steps below correspond to the steps listed in [RFC3454], Section +	 * "2. Preparation Overview" +	 */ + +	/* +	 * 1) Map -- For each character in the input, check if it has a mapping +	 * and, if so, replace it with its mapping. +	 */ +	count = 0; +	for (i = 0; i < input_size; i++) +	{ +		pg_wchar	code = input_chars[i]; + +		if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges)) +			input_chars[count++] = 0x0020; +		else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges)) +		{ +			/* map to nothing */ +		} +		else +			input_chars[count++] = code; +	} +	input_chars[count] = (pg_wchar) '\0'; +	input_size = count; + +	if (input_size == 0) +		goto prohibited;		/* don't allow empty password */ + +	/* +	 * 2) Normalize -- Normalize the result of step 1 using Unicode +	 * normalization. +	 */ +	output_chars = unicode_normalize(UNICODE_NFKC, input_chars); +	if (!output_chars) +		goto oom; + +	/* +	 * 3) Prohibit -- Check for any characters that are not allowed in the +	 * output.  If any are found, return an error. +	 */ +	for (i = 0; i < input_size; i++) +	{ +		pg_wchar	code = input_chars[i]; + +		if (IS_CODE_IN_TABLE(code, prohibited_output_ranges)) +			goto prohibited; +		if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges)) +			goto prohibited; +	} + +	/* +	 * 4) Check bidi -- Possibly check for right-to-left characters, and if +	 * any are found, make sure that the whole string satisfies the +	 * requirements for bidirectional strings.  If the string does not satisfy +	 * the requirements for bidirectional strings, return an error. +	 * +	 * [RFC3454], Section "6. Bidirectional Characters" explains in more +	 * detail what that means: +	 * +	 * "In any profile that specifies bidirectional character handling, all +	 * three of the following requirements MUST be met: +	 * +	 * 1) The characters in section 5.8 MUST be prohibited. +	 * +	 * 2) If a string contains any RandALCat character, the string MUST NOT +	 * contain any LCat character. +	 * +	 * 3) If a string contains any RandALCat character, a RandALCat character +	 * MUST be the first character of the string, and a RandALCat character +	 * MUST be the last character of the string." +	 */ +	contains_RandALCat = false; +	for (i = 0; i < input_size; i++) +	{ +		pg_wchar	code = input_chars[i]; + +		if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges)) +		{ +			contains_RandALCat = true; +			break; +		} +	} + +	if (contains_RandALCat) +	{ +		pg_wchar	first = input_chars[0]; +		pg_wchar	last = input_chars[input_size - 1]; + +		for (i = 0; i < input_size; i++) +		{ +			pg_wchar	code = input_chars[i]; + +			if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges)) +				goto prohibited; +		} + +		if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) || +			!IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges)) +			goto prohibited; +	} + +	/* +	 * Finally, convert the result back to UTF-8. +	 */ +	result_size = 0; +	for (wp = output_chars; *wp; wp++) +	{ +		unsigned char buf[4]; + +		unicode_to_utf8(*wp, buf); +		result_size += pg_utf_mblen(buf); +	} + +	result = ALLOC(result_size + 1); +	if (!result) +		goto oom; + +	/* +	 * There are no error exits below here, so the error exit paths don't need +	 * to worry about possibly freeing "result". +	 */ +	p = (unsigned char *) result; +	for (wp = output_chars; *wp; wp++) +	{ +		unicode_to_utf8(*wp, p); +		p += pg_utf_mblen(p); +	} +	Assert((char *) p == result + result_size); +	*p = '\0'; + +	FREE(input_chars); +	FREE(output_chars); + +	*output = result; +	return SASLPREP_SUCCESS; + +prohibited: +	if (input_chars) +		FREE(input_chars); +	if (output_chars) +		FREE(output_chars); + +	return SASLPREP_PROHIBITED; + +oom: +	if (input_chars) +		FREE(input_chars); +	if (output_chars) +		FREE(output_chars); + +	return SASLPREP_OOM; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c new file mode 100644 index 00000000000..6448564a08c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/scram-common.c @@ -0,0 +1,330 @@ +/*------------------------------------------------------------------------- + * scram-common.c + *		Shared frontend/backend code for SCRAM authentication + * + * This contains the common low-level functions needed in both frontend and + * backend, for implement the Salted Challenge Response Authentication + * Mechanism (SCRAM), per IETF's RFC 5802. + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/scram-common.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/base64.h" +#include "common/hmac.h" +#include "common/scram-common.h" +#ifndef FRONTEND +#include "miscadmin.h" +#endif +#include "port/pg_bswap.h" + +/* + * Calculate SaltedPassword. + * + * The password should already be normalized by SASLprep.  Returns 0 on + * success, -1 on failure with *errstr pointing to a message about the + * error details. + */ +int +scram_SaltedPassword(const char *password, +					 pg_cryptohash_type hash_type, int key_length, +					 const char *salt, int saltlen, int iterations, +					 uint8 *result, const char **errstr) +{ +	int			password_len = strlen(password); +	uint32		one = pg_hton32(1); +	int			i, +				j; +	uint8		Ui[SCRAM_MAX_KEY_LEN]; +	uint8		Ui_prev[SCRAM_MAX_KEY_LEN]; +	pg_hmac_ctx *hmac_ctx = pg_hmac_create(hash_type); + +	if (hmac_ctx == NULL) +	{ +		*errstr = pg_hmac_error(NULL);	/* returns OOM */ +		return -1; +	} + +	/* +	 * Iterate hash calculation of HMAC entry using given salt.  This is +	 * essentially PBKDF2 (see RFC2898) with HMAC() as the pseudorandom +	 * function. +	 */ + +	/* First iteration */ +	if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 || +		pg_hmac_update(hmac_ctx, (uint8 *) salt, saltlen) < 0 || +		pg_hmac_update(hmac_ctx, (uint8 *) &one, sizeof(uint32)) < 0 || +		pg_hmac_final(hmac_ctx, Ui_prev, key_length) < 0) +	{ +		*errstr = pg_hmac_error(hmac_ctx); +		pg_hmac_free(hmac_ctx); +		return -1; +	} + +	memcpy(result, Ui_prev, key_length); + +	/* Subsequent iterations */ +	for (i = 2; i <= iterations; i++) +	{ +#ifndef FRONTEND +		/* +		 * Make sure that this is interruptible as scram_iterations could be +		 * set to a large value. +		 */ +		CHECK_FOR_INTERRUPTS(); +#endif + +		if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 || +			pg_hmac_update(hmac_ctx, (uint8 *) Ui_prev, key_length) < 0 || +			pg_hmac_final(hmac_ctx, Ui, key_length) < 0) +		{ +			*errstr = pg_hmac_error(hmac_ctx); +			pg_hmac_free(hmac_ctx); +			return -1; +		} + +		for (j = 0; j < key_length; j++) +			result[j] ^= Ui[j]; +		memcpy(Ui_prev, Ui, key_length); +	} + +	pg_hmac_free(hmac_ctx); +	return 0; +} + + +/* + * Calculate hash for a NULL-terminated string. (The NULL terminator is + * not included in the hash).  Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_H(const uint8 *input, pg_cryptohash_type hash_type, int key_length, +		uint8 *result, const char **errstr) +{ +	pg_cryptohash_ctx *ctx; + +	ctx = pg_cryptohash_create(hash_type); +	if (ctx == NULL) +	{ +		*errstr = pg_cryptohash_error(NULL);	/* returns OOM */ +		return -1; +	} + +	if (pg_cryptohash_init(ctx) < 0 || +		pg_cryptohash_update(ctx, input, key_length) < 0 || +		pg_cryptohash_final(ctx, result, key_length) < 0) +	{ +		*errstr = pg_cryptohash_error(ctx); +		pg_cryptohash_free(ctx); +		return -1; +	} + +	pg_cryptohash_free(ctx); +	return 0; +} + +/* + * Calculate ClientKey.  Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_ClientKey(const uint8 *salted_password, +				pg_cryptohash_type hash_type, int key_length, +				uint8 *result, const char **errstr) +{ +	pg_hmac_ctx *ctx = pg_hmac_create(hash_type); + +	if (ctx == NULL) +	{ +		*errstr = pg_hmac_error(NULL);	/* returns OOM */ +		return -1; +	} + +	if (pg_hmac_init(ctx, salted_password, key_length) < 0 || +		pg_hmac_update(ctx, (uint8 *) "Client Key", strlen("Client Key")) < 0 || +		pg_hmac_final(ctx, result, key_length) < 0) +	{ +		*errstr = pg_hmac_error(ctx); +		pg_hmac_free(ctx); +		return -1; +	} + +	pg_hmac_free(ctx); +	return 0; +} + +/* + * Calculate ServerKey.  Returns 0 on success, -1 on failure with *errstr + * pointing to a message about the error details. + */ +int +scram_ServerKey(const uint8 *salted_password, +				pg_cryptohash_type hash_type, int key_length, +				uint8 *result, const char **errstr) +{ +	pg_hmac_ctx *ctx = pg_hmac_create(hash_type); + +	if (ctx == NULL) +	{ +		*errstr = pg_hmac_error(NULL);	/* returns OOM */ +		return -1; +	} + +	if (pg_hmac_init(ctx, salted_password, key_length) < 0 || +		pg_hmac_update(ctx, (uint8 *) "Server Key", strlen("Server Key")) < 0 || +		pg_hmac_final(ctx, result, key_length) < 0) +	{ +		*errstr = pg_hmac_error(ctx); +		pg_hmac_free(ctx); +		return -1; +	} + +	pg_hmac_free(ctx); +	return 0; +} + + +/* + * Construct a SCRAM secret, for storing in pg_authid.rolpassword. + * + * The password should already have been processed with SASLprep, if necessary! + * + * If iterations is 0, default number of iterations is used.  The result is + * palloc'd or malloc'd, so caller is responsible for freeing it. + * + * On error, returns NULL and sets *errstr to point to a message about the + * error details. + */ +char * +scram_build_secret(pg_cryptohash_type hash_type, int key_length, +				   const char *salt, int saltlen, int iterations, +				   const char *password, const char **errstr) +{ +	uint8		salted_password[SCRAM_MAX_KEY_LEN]; +	uint8		stored_key[SCRAM_MAX_KEY_LEN]; +	uint8		server_key[SCRAM_MAX_KEY_LEN]; +	char	   *result; +	char	   *p; +	int			maxlen; +	int			encoded_salt_len; +	int			encoded_stored_len; +	int			encoded_server_len; +	int			encoded_result; + +	/* Only this hash method is supported currently */ +	Assert(hash_type == PG_SHA256); + +	Assert(iterations > 0); + +	/* Calculate StoredKey and ServerKey */ +	if (scram_SaltedPassword(password, hash_type, key_length, +							 salt, saltlen, iterations, +							 salted_password, errstr) < 0 || +		scram_ClientKey(salted_password, hash_type, key_length, +						stored_key, errstr) < 0 || +		scram_H(stored_key, hash_type, key_length, +				stored_key, errstr) < 0 || +		scram_ServerKey(salted_password, hash_type, key_length, +						server_key, errstr) < 0) +	{ +		/* errstr is filled already here */ +#ifdef FRONTEND +		return NULL; +#else +		elog(ERROR, "could not calculate stored key and server key: %s", +			 *errstr); +#endif +	} + +	/*---------- +	 * The format is: +	 * SCRAM-SHA-256$<iteration count>:<salt>$<StoredKey>:<ServerKey> +	 *---------- +	 */ +	encoded_salt_len = pg_b64_enc_len(saltlen); +	encoded_stored_len = pg_b64_enc_len(key_length); +	encoded_server_len = pg_b64_enc_len(key_length); + +	maxlen = strlen("SCRAM-SHA-256") + 1 +		+ 10 + 1				/* iteration count */ +		+ encoded_salt_len + 1	/* Base64-encoded salt */ +		+ encoded_stored_len + 1	/* Base64-encoded StoredKey */ +		+ encoded_server_len + 1;	/* Base64-encoded ServerKey */ + +#ifdef FRONTEND +	result = malloc(maxlen); +	if (!result) +	{ +		*errstr = _("out of memory"); +		return NULL; +	} +#else +	result = palloc(maxlen); +#endif + +	p = result + sprintf(result, "SCRAM-SHA-256$%d:", iterations); + +	/* salt */ +	encoded_result = pg_b64_encode(salt, saltlen, p, encoded_salt_len); +	if (encoded_result < 0) +	{ +		*errstr = _("could not encode salt"); +#ifdef FRONTEND +		free(result); +		return NULL; +#else +		elog(ERROR, "%s", *errstr); +#endif +	} +	p += encoded_result; +	*(p++) = '$'; + +	/* stored key */ +	encoded_result = pg_b64_encode((char *) stored_key, key_length, p, +								   encoded_stored_len); +	if (encoded_result < 0) +	{ +		*errstr = _("could not encode stored key"); +#ifdef FRONTEND +		free(result); +		return NULL; +#else +		elog(ERROR, "%s", *errstr); +#endif +	} + +	p += encoded_result; +	*(p++) = ':'; + +	/* server key */ +	encoded_result = pg_b64_encode((char *) server_key, key_length, p, +								   encoded_server_len); +	if (encoded_result < 0) +	{ +		*errstr = _("could not encode server key"); +#ifdef FRONTEND +		free(result); +		return NULL; +#else +		elog(ERROR, "%s", *errstr); +#endif +	} + +	p += encoded_result; +	*(p++) = '\0'; + +	Assert(p - result <= maxlen); + +	return result; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha1_int.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha1_int.h new file mode 100644 index 00000000000..0ec2c69a063 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha1_int.h @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * sha1_int.h + *	  Internal headers for fallback implementation of SHA1 + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *		  src/common/sha1_int.h + * + *------------------------------------------------------------------------- + */ + +/*	   $KAME: sha1.h,v 1.4 2000/02/22 14:01:18 itojun Exp $    */ + +/* + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *	  notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *	  notice, this list of conditions and the following disclaimer in the + *	  documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + *	  may be used to endorse or promote products derived from this software + *	  without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) + * based on: http://www.itl.nist.gov/fipspubs/fip180-1.htm + * implemented by Jun-ichiro itojun Itoh <[email protected]> + */ + +#ifndef PG_SHA1_INT_H +#define PG_SHA1_INT_H + +#include "common/sha1.h" + +typedef struct +{ +	union +	{ +		uint8		b8[20]; +		uint32		b32[5]; +	}			h; +	union +	{ +		uint8		b8[8]; +		uint64		b64[1]; +	}			c; +	union +	{ +		uint8		b8[64]; +		uint32		b32[16]; +	}			m; +	uint8		count; +} pg_sha1_ctx; + +/* Interface routines for SHA1 */ +extern void pg_sha1_init(pg_sha1_ctx *ctx); +extern void pg_sha1_update(pg_sha1_ctx *ctx, const uint8 *data, size_t len); +extern void pg_sha1_final(pg_sha1_ctx *ctx, uint8 *dest); + +#endif							/* PG_SHA1_INT_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha2_int.h b/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha2_int.h new file mode 100644 index 00000000000..954e5d78dcc --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/sha2_int.h @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------- + * + * sha2_int.h + *	  Internal headers for fallback implementation of SHA{224,256,384,512} + * + * Portions Copyright (c) 2016-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *		  src/common/sha2_int.h + * + *------------------------------------------------------------------------- + */ + +/* $OpenBSD: sha2.h,v 1.2 2004/04/28 23:11:57 millert Exp $ */ + +/* + * FILE:	sha2.h + * AUTHOR:	Aaron D. Gifford <[email protected]> + * + * Copyright (c) 2000-2001, Aaron D. Gifford + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *	  notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *	  notice, this list of conditions and the following disclaimer in the + *	  documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + *	  may be used to endorse or promote products derived from this software + *	  without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $From: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $ + */ + +#ifndef PG_SHA2_INT_H +#define PG_SHA2_INT_H + +#include "common/sha2.h" + +typedef struct pg_sha256_ctx +{ +	uint32		state[8]; +	uint64		bitcount; +	uint8		buffer[PG_SHA256_BLOCK_LENGTH]; +} pg_sha256_ctx; +typedef struct pg_sha512_ctx +{ +	uint64		state[8]; +	uint64		bitcount[2]; +	uint8		buffer[PG_SHA512_BLOCK_LENGTH]; +} pg_sha512_ctx; +typedef struct pg_sha256_ctx pg_sha224_ctx; +typedef struct pg_sha512_ctx pg_sha384_ctx; + +/* Interface routines for SHA224/256/384/512 */ +extern void pg_sha224_init(pg_sha224_ctx *ctx); +extern void pg_sha224_update(pg_sha224_ctx *ctx, const uint8 *input0, +							 size_t len); +extern void pg_sha224_final(pg_sha224_ctx *ctx, uint8 *dest); + +extern void pg_sha256_init(pg_sha256_ctx *ctx); +extern void pg_sha256_update(pg_sha256_ctx *ctx, const uint8 *input0, +							 size_t len); +extern void pg_sha256_final(pg_sha256_ctx *ctx, uint8 *dest); + +extern void pg_sha384_init(pg_sha384_ctx *ctx); +extern void pg_sha384_update(pg_sha384_ctx *ctx, +							 const uint8 *, size_t len); +extern void pg_sha384_final(pg_sha384_ctx *ctx, uint8 *dest); + +extern void pg_sha512_init(pg_sha512_ctx *ctx); +extern void pg_sha512_update(pg_sha512_ctx *ctx, const uint8 *input0, +							 size_t len); +extern void pg_sha512_final(pg_sha512_ctx *ctx, uint8 *dest); + +#endif							/* PG_SHA2_INT_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/string.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/string.c new file mode 100644 index 00000000000..de974136355 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/string.c @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------- + * + * string.c + *		string handling helpers + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/string.c + * + *------------------------------------------------------------------------- + */ + + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/string.h" +#include "lib/stringinfo.h" + + +/* + * Returns whether the string `str' has the postfix `end'. + */ +bool +pg_str_endswith(const char *str, const char *end) +{ +	size_t		slen = strlen(str); +	size_t		elen = strlen(end); + +	/* can't be a postfix if longer */ +	if (elen > slen) +		return false; + +	/* compare the end of the strings */ +	str += slen - elen; +	return strcmp(str, end) == 0; +} + + +/* + * strtoint --- just like strtol, but returns int not long + */ +int +strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) +{ +	long		val; + +	val = strtol(str, endptr, base); +	if (val != (int) val) +		errno = ERANGE; +	return (int) val; +} + + +/* + * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string + * + * Makes a newly allocated copy of the string passed in, which must be + * '\0'-terminated. In the backend, additional alloc_flags may be provided and + * will be passed as-is to palloc_extended(); in the frontend, alloc_flags is + * ignored and the copy is malloc'd. + * + * This function exists specifically to deal with filtering out + * non-ASCII characters in a few places where the client can provide an almost + * arbitrary string (and it isn't checked to ensure it's a valid username or + * database name or similar) and we don't want to have control characters or other + * things ending up in the log file where server admins might end up with a + * messed up terminal when looking at them. + * + * In general, this function should NOT be used- instead, consider how to handle + * the string without needing to filter out the non-ASCII characters. + * + * Ultimately, we'd like to improve the situation to not require replacing all + * non-ASCII but perform more intelligent filtering which would allow UTF or + * similar, but it's unclear exactly what we should allow, so stick to ASCII only + * for now. + */ +char * +pg_clean_ascii(const char *str, int alloc_flags) +{ +	size_t		dstlen; +	char	   *dst; +	const char *p; +	size_t		i = 0; + +	/* Worst case, each byte can become four bytes, plus a null terminator. */ +	dstlen = strlen(str) * 4 + 1; + +#ifdef FRONTEND +	dst = malloc(dstlen); +#else +	dst = palloc_extended(dstlen, alloc_flags); +#endif + +	if (!dst) +		return NULL; + +	for (p = str; *p != '\0'; p++) +	{ + +		/* Only allow clean ASCII chars in the string */ +		if (*p < 32 || *p > 126) +		{ +			Assert(i < (dstlen - 3)); +			snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p); +			i += 4; +		} +		else +		{ +			Assert(i < dstlen); +			dst[i] = *p; +			i++; +		} +	} + +	Assert(i < dstlen); +	dst[i] = '\0'; +	return dst; +} + + +/* + * pg_is_ascii -- Check if string is made only of ASCII characters + */ +bool +pg_is_ascii(const char *str) +{ +	while (*str) +	{ +		if (IS_HIGHBIT_SET(*str)) +			return false; +		str++; +	} +	return true; +} + + +/* + * pg_strip_crlf -- Remove any trailing newline and carriage return + * + * Removes any trailing newline and carriage return characters (\r on + * Windows) in the input string, zero-terminating it. + * + * The passed in string must be zero-terminated.  This function returns + * the new length of the string. + */ +int +pg_strip_crlf(char *str) +{ +	int			len = strlen(str); + +	while (len > 0 && (str[len - 1] == '\n' || +					   str[len - 1] == '\r')) +		str[--len] = '\0'; + +	return len; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c new file mode 100644 index 00000000000..05b22b5c53c --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c @@ -0,0 +1,343 @@ +/*------------------------------------------------------------------------- + * + * stringinfo.c + * + * StringInfo provides an extensible string data type (currently limited to a + * length of 1GB).  It can be used to buffer either ordinary C strings + * (null-terminated text) or arbitrary binary data.  All storage is allocated + * with palloc() (falling back to malloc in frontend code). + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + *	  src/common/stringinfo.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND + +#include "postgres.h" +#include "utils/memutils.h" + +#else + +#include "postgres_fe.h" + +/* It's possible we could use a different value for this in frontend code */ +#define MaxAllocSize	((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +#endif + +#include "lib/stringinfo.h" + + +/* + * makeStringInfo + * + * Create an empty 'StringInfoData' & return a pointer to it. + */ +StringInfo +makeStringInfo(void) +{ +	StringInfo	res; + +	res = (StringInfo) palloc(sizeof(StringInfoData)); + +	initStringInfo(res); + +	return res; +} + +/* + * initStringInfo + * + * Initialize a StringInfoData struct (with previously undefined contents) + * to describe an empty string. + */ +void +initStringInfo(StringInfo str) +{ +	int			size = 1024;	/* initial default buffer size */ + +	str->data = (char *) palloc(size); +	str->maxlen = size; +	resetStringInfo(str); +} + +/* + * resetStringInfo + * + * Reset the StringInfo: the data buffer remains valid, but its + * previous content, if any, is cleared. + */ +void +resetStringInfo(StringInfo str) +{ +	str->data[0] = '\0'; +	str->len = 0; +	str->cursor = 0; +} + +/* + * appendStringInfo + * + * Format text data under the control of fmt (an sprintf-style format string) + * and append it to whatever is already in str.  More space is allocated + * to str if necessary.  This is sort of like a combination of sprintf and + * strcat. + */ +void +appendStringInfo(StringInfo str, const char *fmt,...) +{ +	int			save_errno = errno; + +	for (;;) +	{ +		va_list		args; +		int			needed; + +		/* Try to format the data. */ +		errno = save_errno; +		va_start(args, fmt); +		needed = appendStringInfoVA(str, fmt, args); +		va_end(args); + +		if (needed == 0) +			break;				/* success */ + +		/* Increase the buffer size and try again. */ +		enlargeStringInfo(str, needed); +	} +} + +/* + * appendStringInfoVA + * + * Attempt to format text data under the control of fmt (an sprintf-style + * format string) and append it to whatever is already in str.  If successful + * return zero; if not (because there's not enough space), return an estimate + * of the space needed, without modifying str.  Typically the caller should + * pass the return value to enlargeStringInfo() before trying again; see + * appendStringInfo for standard usage pattern. + * + * Caution: callers must be sure to preserve their entry-time errno + * when looping, in case the fmt contains "%m". + * + * XXX This API is ugly, but there seems no alternative given the C spec's + * restrictions on what can portably be done with va_list arguments: you have + * to redo va_start before you can rescan the argument list, and we can't do + * that from here. + */ +int +appendStringInfoVA(StringInfo str, const char *fmt, va_list args) +{ +	int			avail; +	size_t		nprinted; + +	Assert(str != NULL); + +	/* +	 * If there's hardly any space, don't bother trying, just fail to make the +	 * caller enlarge the buffer first.  We have to guess at how much to +	 * enlarge, since we're skipping the formatting work. +	 */ +	avail = str->maxlen - str->len; +	if (avail < 16) +		return 32; + +	nprinted = pvsnprintf(str->data + str->len, (size_t) avail, fmt, args); + +	if (nprinted < (size_t) avail) +	{ +		/* Success.  Note nprinted does not include trailing null. */ +		str->len += (int) nprinted; +		return 0; +	} + +	/* Restore the trailing null so that str is unmodified. */ +	str->data[str->len] = '\0'; + +	/* +	 * Return pvsnprintf's estimate of the space needed.  (Although this is +	 * given as a size_t, we know it will fit in int because it's not more +	 * than MaxAllocSize.) +	 */ +	return (int) nprinted; +} + +/* + * appendStringInfoString + * + * Append a null-terminated string to str. + * Like appendStringInfo(str, "%s", s) but faster. + */ +void +appendStringInfoString(StringInfo str, const char *s) +{ +	appendBinaryStringInfo(str, s, strlen(s)); +} + +/* + * appendStringInfoChar + * + * Append a single byte to str. + * Like appendStringInfo(str, "%c", ch) but much faster. + */ +void +appendStringInfoChar(StringInfo str, char ch) +{ +	/* Make more room if needed */ +	if (str->len + 1 >= str->maxlen) +		enlargeStringInfo(str, 1); + +	/* OK, append the character */ +	str->data[str->len] = ch; +	str->len++; +	str->data[str->len] = '\0'; +} + +/* + * appendStringInfoSpaces + * + * Append the specified number of spaces to a buffer. + */ +void +appendStringInfoSpaces(StringInfo str, int count) +{ +	if (count > 0) +	{ +		/* Make more room if needed */ +		enlargeStringInfo(str, count); + +		/* OK, append the spaces */ +		memset(&str->data[str->len], ' ', count); +		str->len += count; +		str->data[str->len] = '\0'; +	} +} + +/* + * appendBinaryStringInfo + * + * Append arbitrary binary data to a StringInfo, allocating more space + * if necessary. Ensures that a trailing null byte is present. + */ +void +appendBinaryStringInfo(StringInfo str, const void *data, int datalen) +{ +	Assert(str != NULL); + +	/* Make more room if needed */ +	enlargeStringInfo(str, datalen); + +	/* OK, append the data */ +	memcpy(str->data + str->len, data, datalen); +	str->len += datalen; + +	/* +	 * Keep a trailing null in place, even though it's probably useless for +	 * binary data.  (Some callers are dealing with text but call this because +	 * their input isn't null-terminated.) +	 */ +	str->data[str->len] = '\0'; +} + +/* + * appendBinaryStringInfoNT + * + * Append arbitrary binary data to a StringInfo, allocating more space + * if necessary. Does not ensure a trailing null-byte exists. + */ +void +appendBinaryStringInfoNT(StringInfo str, const void *data, int datalen) +{ +	Assert(str != NULL); + +	/* Make more room if needed */ +	enlargeStringInfo(str, datalen); + +	/* OK, append the data */ +	memcpy(str->data + str->len, data, datalen); +	str->len += datalen; +} + +/* + * enlargeStringInfo + * + * Make sure there is enough space for 'needed' more bytes + * ('needed' does not include the terminating null). + * + * External callers usually need not concern themselves with this, since + * all stringinfo.c routines do it automatically.  However, if a caller + * knows that a StringInfo will eventually become X bytes large, it + * can save some palloc overhead by enlarging the buffer before starting + * to store data in it. + * + * NB: In the backend, because we use repalloc() to enlarge the buffer, the + * string buffer will remain allocated in the same memory context that was + * current when initStringInfo was called, even if another context is now + * current.  This is the desired and indeed critical behavior! + */ +void +enlargeStringInfo(StringInfo str, int needed) +{ +	int			newlen; + +	/* +	 * Guard against out-of-range "needed" values.  Without this, we can get +	 * an overflow or infinite loop in the following. +	 */ +	if (needed < 0)				/* should not happen */ +	{ +#ifndef FRONTEND +		elog(ERROR, "invalid string enlargement request size: %d", needed); +#else +		fprintf(stderr, "invalid string enlargement request size: %d\n", needed); +		exit(EXIT_FAILURE); +#endif +	} +	if (((Size) needed) >= (MaxAllocSize - (Size) str->len)) +	{ +#ifndef FRONTEND +		ereport(ERROR, +				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), +				 errmsg("out of memory"), +				 errdetail("Cannot enlarge string buffer containing %d bytes by %d more bytes.", +						   str->len, needed))); +#else +		fprintf(stderr, +				_("out of memory\n\nCannot enlarge string buffer containing %d bytes by %d more bytes.\n"), +				str->len, needed); +		exit(EXIT_FAILURE); +#endif +	} + +	needed += str->len + 1;		/* total space required now */ + +	/* Because of the above test, we now have needed <= MaxAllocSize */ + +	if (needed <= str->maxlen) +		return;					/* got enough space already */ + +	/* +	 * We don't want to allocate just a little more space with each append; +	 * for efficiency, double the buffer size each time it overflows. +	 * Actually, we might need to more than double it if 'needed' is big... +	 */ +	newlen = 2 * str->maxlen; +	while (needed > newlen) +		newlen = 2 * newlen; + +	/* +	 * Clamp to MaxAllocSize in case we went past it.  Note we are assuming +	 * here that MaxAllocSize <= INT_MAX/2, else the above loop could +	 * overflow.  We will still have newlen >= needed. +	 */ +	if (newlen > (int) MaxAllocSize) +		newlen = (int) MaxAllocSize; + +	str->data = (char *) repalloc(str->data, newlen); + +	str->maxlen = newlen; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c new file mode 100644 index 00000000000..da6728605be --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c @@ -0,0 +1,634 @@ +/*------------------------------------------------------------------------- + * unicode_norm.c + *		Normalize a Unicode string + * + * This implements Unicode normalization, per the documentation at + * https://www.unicode.org/reports/tr15/. + * + * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/unicode_norm.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/unicode_norm.h" +#ifndef FRONTEND +#include "common/unicode_norm_hashfunc.h" +#include "common/unicode_normprops_table.h" +#include "port/pg_bswap.h" +#else +#include "common/unicode_norm_table.h" +#endif + +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Constants for calculations with Hangul characters */ +#define SBASE		0xAC00		/* U+AC00 */ +#define LBASE		0x1100		/* U+1100 */ +#define VBASE		0x1161		/* U+1161 */ +#define TBASE		0x11A7		/* U+11A7 */ +#define LCOUNT		19 +#define VCOUNT		21 +#define TCOUNT		28 +#define NCOUNT		VCOUNT * TCOUNT +#define SCOUNT		LCOUNT * NCOUNT + +#ifdef FRONTEND +/* comparison routine for bsearch() of decomposition lookup table. */ +static int +conv_compare(const void *p1, const void *p2) +{ +	uint32		v1, +				v2; + +	v1 = *(const uint32 *) p1; +	v2 = ((const pg_unicode_decomposition *) p2)->codepoint; +	return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1); +} + +#endif + +/* + * get_code_entry + * + * Get the entry corresponding to code in the decomposition lookup table. + * The backend version of this code uses a perfect hash function for the + * lookup, while the frontend version uses a binary search. + */ +static const pg_unicode_decomposition * +get_code_entry(pg_wchar code) +{ +#ifndef FRONTEND +	int			h; +	uint32		hashkey; +	pg_unicode_decompinfo decompinfo = UnicodeDecompInfo; + +	/* +	 * Compute the hash function. The hash key is the codepoint with the bytes +	 * in network order. +	 */ +	hashkey = pg_hton32(code); +	h = decompinfo.hash(&hashkey); + +	/* An out-of-range result implies no match */ +	if (h < 0 || h >= decompinfo.num_decomps) +		return NULL; + +	/* +	 * Since it's a perfect hash, we need only match to the specific codepoint +	 * it identifies. +	 */ +	if (code != decompinfo.decomps[h].codepoint) +		return NULL; + +	/* Success! */ +	return &decompinfo.decomps[h]; +#else +	return bsearch(&(code), +				   UnicodeDecompMain, +				   lengthof(UnicodeDecompMain), +				   sizeof(pg_unicode_decomposition), +				   conv_compare); +#endif +} + +/* + * Get the combining class of the given codepoint. + */ +static uint8 +get_canonical_class(pg_wchar code) +{ +	const pg_unicode_decomposition *entry = get_code_entry(code); + +	/* +	 * If no entries are found, the character used is either an Hangul +	 * character or a character with a class of 0 and no decompositions. +	 */ +	if (!entry) +		return 0; +	else +		return entry->comb_class; +} + +/* + * Given a decomposition entry looked up earlier, get the decomposed + * characters. + * + * Note: the returned pointer can point to statically allocated buffer, and + * is only valid until next call to this function! + */ +static const pg_wchar * +get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size) +{ +	static __thread pg_wchar x; + +	if (DECOMPOSITION_IS_INLINE(entry)) +	{ +		Assert(DECOMPOSITION_SIZE(entry) == 1); +		x = (pg_wchar) entry->dec_index; +		*dec_size = 1; +		return &x; +	} +	else +	{ +		*dec_size = DECOMPOSITION_SIZE(entry); +		return &UnicodeDecomp_codepoints[entry->dec_index]; +	} +} + +/* + * Calculate how many characters a given character will decompose to. + * + * This needs to recurse, if the character decomposes into characters that + * are, in turn, decomposable. + */ +static int +get_decomposed_size(pg_wchar code, bool compat) +{ +	const pg_unicode_decomposition *entry; +	int			size = 0; +	int			i; +	const uint32 *decomp; +	int			dec_size; + +	/* +	 * Fast path for Hangul characters not stored in tables to save memory as +	 * decomposition is algorithmic. See +	 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details +	 * on the matter. +	 */ +	if (code >= SBASE && code < SBASE + SCOUNT) +	{ +		uint32		tindex, +					sindex; + +		sindex = code - SBASE; +		tindex = sindex % TCOUNT; + +		if (tindex != 0) +			return 3; +		return 2; +	} + +	entry = get_code_entry(code); + +	/* +	 * Just count current code if no other decompositions.  A NULL entry is +	 * equivalent to a character with class 0 and no decompositions. +	 */ +	if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 || +		(!compat && DECOMPOSITION_IS_COMPAT(entry))) +		return 1; + +	/* +	 * If this entry has other decomposition codes look at them as well. First +	 * get its decomposition in the list of tables available. +	 */ +	decomp = get_code_decomposition(entry, &dec_size); +	for (i = 0; i < dec_size; i++) +	{ +		uint32		lcode = decomp[i]; + +		size += get_decomposed_size(lcode, compat); +	} + +	return size; +} + +/* + * Recompose a set of characters. For hangul characters, the calculation + * is algorithmic. For others, an inverse lookup at the decomposition + * table is necessary. Returns true if a recomposition can be done, and + * false otherwise. + */ +static bool +recompose_code(uint32 start, uint32 code, uint32 *result) +{ +	/* +	 * Handle Hangul characters algorithmically, per the Unicode spec. +	 * +	 * Check if two current characters are L and V. +	 */ +	if (start >= LBASE && start < LBASE + LCOUNT && +		code >= VBASE && code < VBASE + VCOUNT) +	{ +		/* make syllable of form LV */ +		uint32		lindex = start - LBASE; +		uint32		vindex = code - VBASE; + +		*result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; +		return true; +	} +	/* Check if two current characters are LV and T */ +	else if (start >= SBASE && start < (SBASE + SCOUNT) && +			 ((start - SBASE) % TCOUNT) == 0 && +			 code >= TBASE && code < (TBASE + TCOUNT)) +	{ +		/* make syllable of form LVT */ +		uint32		tindex = code - TBASE; + +		*result = start + tindex; +		return true; +	} +	else +	{ +		const pg_unicode_decomposition *entry; + +		/* +		 * Do an inverse lookup of the decomposition tables to see if anything +		 * matches. The comparison just needs to be a perfect match on the +		 * sub-table of size two, because the start character has already been +		 * recomposed partially.  This lookup uses a perfect hash function for +		 * the backend code. +		 */ +#ifndef FRONTEND + +		int			h, +					inv_lookup_index; +		uint64		hashkey; +		pg_unicode_recompinfo recompinfo = UnicodeRecompInfo; + +		/* +		 * Compute the hash function. The hash key is formed by concatenating +		 * bytes of the two codepoints in network order. See also +		 * src/common/unicode/generate-unicode_norm_table.pl. +		 */ +		hashkey = pg_hton64(((uint64) start << 32) | (uint64) code); +		h = recompinfo.hash(&hashkey); + +		/* An out-of-range result implies no match */ +		if (h < 0 || h >= recompinfo.num_recomps) +			return false; + +		inv_lookup_index = recompinfo.inverse_lookup[h]; +		entry = &UnicodeDecompMain[inv_lookup_index]; + +		if (start == UnicodeDecomp_codepoints[entry->dec_index] && +			code == UnicodeDecomp_codepoints[entry->dec_index + 1]) +		{ +			*result = entry->codepoint; +			return true; +		} + +#else + +		int			i; + +		for (i = 0; i < lengthof(UnicodeDecompMain); i++) +		{ +			entry = &UnicodeDecompMain[i]; + +			if (DECOMPOSITION_SIZE(entry) != 2) +				continue; + +			if (DECOMPOSITION_NO_COMPOSE(entry)) +				continue; + +			if (start == UnicodeDecomp_codepoints[entry->dec_index] && +				code == UnicodeDecomp_codepoints[entry->dec_index + 1]) +			{ +				*result = entry->codepoint; +				return true; +			} +		} +#endif							/* !FRONTEND */ +	} + +	return false; +} + +/* + * Decompose the given code into the array given by caller. The + * decomposition begins at the position given by caller, saving one + * lookup on the decomposition table. The current position needs to be + * updated here to let the caller know from where to continue filling + * in the array result. + */ +static void +decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current) +{ +	const pg_unicode_decomposition *entry; +	int			i; +	const uint32 *decomp; +	int			dec_size; + +	/* +	 * Fast path for Hangul characters not stored in tables to save memory as +	 * decomposition is algorithmic. See +	 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details +	 * on the matter. +	 */ +	if (code >= SBASE && code < SBASE + SCOUNT) +	{ +		uint32		l, +					v, +					tindex, +					sindex; +		pg_wchar   *res = *result; + +		sindex = code - SBASE; +		l = LBASE + sindex / (VCOUNT * TCOUNT); +		v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT; +		tindex = sindex % TCOUNT; + +		res[*current] = l; +		(*current)++; +		res[*current] = v; +		(*current)++; + +		if (tindex != 0) +		{ +			res[*current] = TBASE + tindex; +			(*current)++; +		} + +		return; +	} + +	entry = get_code_entry(code); + +	/* +	 * Just fill in with the current decomposition if there are no +	 * decomposition codes to recurse to.  A NULL entry is equivalent to a +	 * character with class 0 and no decompositions, so just leave also in +	 * this case. +	 */ +	if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 || +		(!compat && DECOMPOSITION_IS_COMPAT(entry))) +	{ +		pg_wchar   *res = *result; + +		res[*current] = code; +		(*current)++; +		return; +	} + +	/* +	 * If this entry has other decomposition codes look at them as well. +	 */ +	decomp = get_code_decomposition(entry, &dec_size); +	for (i = 0; i < dec_size; i++) +	{ +		pg_wchar	lcode = (pg_wchar) decomp[i]; + +		/* Leave if no more decompositions */ +		decompose_code(lcode, compat, result, current); +	} +} + +/* + * unicode_normalize - Normalize a Unicode string to the specified form. + * + * The input is a 0-terminated array of codepoints. + * + * In frontend, returns a 0-terminated array of codepoints, allocated with + * malloc. Or NULL if we run out of memory. In backend, the returned + * string is palloc'd instead, and OOM is reported with ereport(). + */ +pg_wchar * +unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) +{ +	bool		compat = (form == UNICODE_NFKC || form == UNICODE_NFKD); +	bool		recompose = (form == UNICODE_NFC || form == UNICODE_NFKC); +	pg_wchar   *decomp_chars; +	pg_wchar   *recomp_chars; +	int			decomp_size, +				current_size; +	int			count; +	const pg_wchar *p; + +	/* variables for recomposition */ +	int			last_class; +	int			starter_pos; +	int			target_pos; +	uint32		starter_ch; + +	/* First, do character decomposition */ + +	/* +	 * Calculate how many characters long the decomposed version will be. +	 */ +	decomp_size = 0; +	for (p = input; *p; p++) +		decomp_size += get_decomposed_size(*p, compat); + +	decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); +	if (decomp_chars == NULL) +		return NULL; + +	/* +	 * Now fill in each entry recursively. This needs a second pass on the +	 * decomposition table. +	 */ +	current_size = 0; +	for (p = input; *p; p++) +		decompose_code(*p, compat, &decomp_chars, ¤t_size); +	decomp_chars[decomp_size] = '\0'; +	Assert(decomp_size == current_size); + +	/* Leave if there is nothing to decompose */ +	if (decomp_size == 0) +		return decomp_chars; + +	/* +	 * Now apply canonical ordering. +	 */ +	for (count = 1; count < decomp_size; count++) +	{ +		pg_wchar	prev = decomp_chars[count - 1]; +		pg_wchar	next = decomp_chars[count]; +		pg_wchar	tmp; +		const uint8 prevClass = get_canonical_class(prev); +		const uint8 nextClass = get_canonical_class(next); + +		/* +		 * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) +		 * annex 4, a sequence of two adjacent characters in a string is an +		 * exchangeable pair if the combining class (from the Unicode +		 * Character Database) for the first character is greater than the +		 * combining class for the second, and the second is not a starter.  A +		 * character is a starter if its combining class is 0. +		 */ +		if (prevClass == 0 || nextClass == 0) +			continue; + +		if (prevClass <= nextClass) +			continue; + +		/* exchange can happen */ +		tmp = decomp_chars[count - 1]; +		decomp_chars[count - 1] = decomp_chars[count]; +		decomp_chars[count] = tmp; + +		/* backtrack to check again */ +		if (count > 1) +			count -= 2; +	} + +	if (!recompose) +		return decomp_chars; + +	/* +	 * The last phase of NFC and NFKC is the recomposition of the reordered +	 * Unicode string using combining classes. The recomposed string cannot be +	 * longer than the decomposed one, so make the allocation of the output +	 * string based on that assumption. +	 */ +	recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); +	if (!recomp_chars) +	{ +		FREE(decomp_chars); +		return NULL; +	} + +	last_class = -1;			/* this eliminates a special check */ +	starter_pos = 0; +	target_pos = 1; +	starter_ch = recomp_chars[0] = decomp_chars[0]; + +	for (count = 1; count < decomp_size; count++) +	{ +		pg_wchar	ch = decomp_chars[count]; +		int			ch_class = get_canonical_class(ch); +		pg_wchar	composite; + +		if (last_class < ch_class && +			recompose_code(starter_ch, ch, &composite)) +		{ +			recomp_chars[starter_pos] = composite; +			starter_ch = composite; +		} +		else if (ch_class == 0) +		{ +			starter_pos = target_pos; +			starter_ch = ch; +			last_class = -1; +			recomp_chars[target_pos++] = ch; +		} +		else +		{ +			last_class = ch_class; +			recomp_chars[target_pos++] = ch; +		} +	} +	recomp_chars[target_pos] = (pg_wchar) '\0'; + +	FREE(decomp_chars); + +	return recomp_chars; +} + +/* + * Normalization "quick check" algorithm; see + * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms> + */ + +/* We only need this in the backend. */ +#ifndef FRONTEND + +static const pg_unicode_normprops * +qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo) +{ +	int			h; +	uint32		hashkey; + +	/* +	 * Compute the hash function. The hash key is the codepoint with the bytes +	 * in network order. +	 */ +	hashkey = pg_hton32(ch); +	h = norminfo->hash(&hashkey); + +	/* An out-of-range result implies no match */ +	if (h < 0 || h >= norminfo->num_normprops) +		return NULL; + +	/* +	 * Since it's a perfect hash, we need only match to the specific codepoint +	 * it identifies. +	 */ +	if (ch != norminfo->normprops[h].codepoint) +		return NULL; + +	/* Success! */ +	return &norminfo->normprops[h]; +} + +/* + * Look up the normalization quick check character property + */ +static UnicodeNormalizationQC +qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch) +{ +	const pg_unicode_normprops *found = NULL; + +	switch (form) +	{ +		case UNICODE_NFC: +			found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC); +			break; +		case UNICODE_NFKC: +			found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC); +			break; +		default: +			Assert(false); +			break; +	} + +	if (found) +		return found->quickcheck; +	else +		return UNICODE_NORM_QC_YES; +} + +UnicodeNormalizationQC +unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input) +{ +	uint8		lastCanonicalClass = 0; +	UnicodeNormalizationQC result = UNICODE_NORM_QC_YES; + +	/* +	 * For the "D" forms, we don't run the quickcheck.  We don't include the +	 * lookup tables for those because they are huge, checking for these +	 * particular forms is less common, and running the slow path is faster +	 * for the "D" forms than the "C" forms because you don't need to +	 * recompose, which is slow. +	 */ +	if (form == UNICODE_NFD || form == UNICODE_NFKD) +		return UNICODE_NORM_QC_MAYBE; + +	for (const pg_wchar *p = input; *p; p++) +	{ +		pg_wchar	ch = *p; +		uint8		canonicalClass; +		UnicodeNormalizationQC check; + +		canonicalClass = get_canonical_class(ch); +		if (lastCanonicalClass > canonicalClass && canonicalClass != 0) +			return UNICODE_NORM_QC_NO; + +		check = qc_is_allowed(form, ch); +		if (check == UNICODE_NORM_QC_NO) +			return UNICODE_NORM_QC_NO; +		else if (check == UNICODE_NORM_QC_MAYBE) +			result = UNICODE_NORM_QC_MAYBE; + +		lastCanonicalClass = canonicalClass; +	} +	return result; +} + +#endif							/* !FRONTEND */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/username.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/username.c new file mode 100644 index 00000000000..e8ac4c49770 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/username.c @@ -0,0 +1,87 @@ +/*------------------------------------------------------------------------- + * + * username.c + *	  get user name + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + *	  src/common/username.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <pwd.h> +#include <unistd.h> + +#include "common/username.h" + +/* + * Returns the current user name in a static buffer + * On error, returns NULL and sets *errstr to point to a palloc'd message + */ +const char * +get_user_name(char **errstr) +{ +#ifndef WIN32 +	struct passwd *pw; +	uid_t		user_id = geteuid(); + +	*errstr = NULL; + +	errno = 0;					/* clear errno before call */ +	pw = getpwuid(user_id); +	if (!pw) +	{ +		*errstr = psprintf(_("could not look up effective user ID %ld: %s"), +						   (long) user_id, +						   errno ? strerror(errno) : _("user does not exist")); +		return NULL; +	} + +	return pw->pw_name; +#else +	/* Microsoft recommends buffer size of UNLEN+1, where UNLEN = 256 */ +	/* "static" variable remains after function exit */ +	static char username[256 + 1]; +	DWORD		len = sizeof(username); + +	*errstr = NULL; + +	if (!GetUserName(username, &len)) +	{ +		*errstr = psprintf(_("user name lookup failure: error code %lu"), +						   GetLastError()); +		return NULL; +	} + +	return username; +#endif +} + + +/* + * Returns the current user name in a static buffer or exits + */ +const char * +get_user_name_or_exit(const char *progname) +{ +	const char *user_name; +	char	   *errstr; + +	user_name = get_user_name(&errstr); + +	if (!user_name) +	{ +		fprintf(stderr, "%s: %s\n", progname, errstr); +		exit(1); +	} +	return user_name; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wait_error.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wait_error.c new file mode 100644 index 00000000000..a90b745f077 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wait_error.c @@ -0,0 +1,148 @@ +/*------------------------------------------------------------------------- + * + * wait_error.c + *		Convert a wait/waitpid(2) result code to a human-readable string + * + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + *	  src/common/wait_error.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <signal.h> +#include <sys/wait.h> + +/* + * Return a human-readable string explaining the reason a child process + * terminated. The argument is a return code returned by wait(2) or + * waitpid(2), which also applies to pclose(3) and system(3). The result is a + * translated, palloc'd or malloc'd string. + */ +char * +wait_result_to_str(int exitstatus) +{ +	char		str[512]; + +	/* +	 * To simplify using this after pclose() and system(), handle status -1 +	 * first.  In that case, there is no wait result but some error indicated +	 * by errno. +	 */ +	if (exitstatus == -1) +	{ +		snprintf(str, sizeof(str), "%m"); +	} +	else if (WIFEXITED(exitstatus)) +	{ +		/* +		 * Give more specific error message for some common exit codes that +		 * have a special meaning in shells. +		 */ +		switch (WEXITSTATUS(exitstatus)) +		{ +			case 126: +				snprintf(str, sizeof(str), _("command not executable")); +				break; + +			case 127: +				snprintf(str, sizeof(str), _("command not found")); +				break; + +			default: +				snprintf(str, sizeof(str), +						 _("child process exited with exit code %d"), +						 WEXITSTATUS(exitstatus)); +		} +	} +	else if (WIFSIGNALED(exitstatus)) +	{ +#if defined(WIN32) +		snprintf(str, sizeof(str), +				 _("child process was terminated by exception 0x%X"), +				 WTERMSIG(exitstatus)); +#else +		snprintf(str, sizeof(str), +				 _("child process was terminated by signal %d: %s"), +				 WTERMSIG(exitstatus), pg_strsignal(WTERMSIG(exitstatus))); +#endif +	} +	else +		snprintf(str, sizeof(str), +				 _("child process exited with unrecognized status %d"), +				 exitstatus); + +	return pstrdup(str); +} + +/* + * Return true if a wait(2) result indicates that the child process + * died due to the specified signal. + * + * The reason this is worth having a wrapper function for is that + * there are two cases: the signal might have been received by our + * immediate child process, or there might've been a shell process + * between us and the child that died.  The shell will, per POSIX, + * report the child death using exit code 128 + signal number. + * + * If there is no possibility of an intermediate shell, this function + * need not (and probably should not) be used. + */ +bool +wait_result_is_signal(int exit_status, int signum) +{ +	if (WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum) +		return true; +	if (WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == 128 + signum) +		return true; +	return false; +} + +/* + * Return true if a wait(2) result indicates that the child process + * died due to any signal.  We consider either direct child death + * or a shell report of child process death as matching the condition. + * + * If include_command_not_found is true, also return true for shell + * exit codes indicating "command not found" and the like + * (specifically, exit codes 126 and 127; see above). + */ +bool +wait_result_is_any_signal(int exit_status, bool include_command_not_found) +{ +	if (WIFSIGNALED(exit_status)) +		return true; +	if (WIFEXITED(exit_status) && +		WEXITSTATUS(exit_status) > (include_command_not_found ? 125 : 128)) +		return true; +	return false; +} + +/* + * Return the shell exit code (normally 0 to 255) that corresponds to the + * given wait status.  The argument is a wait status as returned by wait(2) + * or waitpid(2), which also applies to pclose(3) and system(3).  To support + * the latter two cases, we pass through "-1" unchanged. + */ +int +wait_result_to_exit_code(int exit_status) +{ +	if (exit_status == -1) +		return -1;				/* failure of pclose() or system() */ +	if (WIFEXITED(exit_status)) +		return WEXITSTATUS(exit_status); +	if (WIFSIGNALED(exit_status)) +		return 128 + WTERMSIG(exit_status); +	/* On many systems, this is unreachable */ +	return -1; +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c new file mode 100644 index 00000000000..fbac11deb4d --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c @@ -0,0 +1,2194 @@ +/*------------------------------------------------------------------------- + * + * wchar.c + *	  Functions for working with multibyte characters in various encodings. + * + * Portions Copyright (c) 1998-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + *	  src/common/wchar.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "mb/pg_wchar.h" +#include "utils/ascii.h" + + +/* + * Operations on multi-byte encodings are driven by a table of helper + * functions. + * + * To add an encoding support, define mblen(), dsplen(), verifychar() and + * verifystr() for the encoding.  For server-encodings, also define mb2wchar() + * and wchar2mb() conversion functions. + * + * These functions generally assume that their input is validly formed. + * The "verifier" functions, further down in the file, have to be more + * paranoid. + * + * We expect that mblen() does not need to examine more than the first byte + * of the character to discover the correct length.  GB18030 is an exception + * to that rule, though, as it also looks at second byte.  But even that + * behaves in a predictable way, if you only pass the first byte: it will + * treat 4-byte encoded characters as two 2-byte encoded characters, which is + * good enough for all current uses. + * + * Note: for the display output of psql to work properly, the return values + * of the dsplen functions must conform to the Unicode standard. In particular + * the NUL character is zero width and control characters are generally + * width -1. It is recommended that non-ASCII encodings refer their ASCII + * subset to the ASCII routines to ensure consistency. + */ + +/* + * SQL/ASCII + */ +static int +pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		*to++ = *from++; +		len--; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +static int +pg_ascii_mblen(const unsigned char *s) +{ +	return 1; +} + +static int +pg_ascii_dsplen(const unsigned char *s) +{ +	if (*s == '\0') +		return 0; +	if (*s < 0x20 || *s == 0x7f) +		return -1; + +	return 1; +} + +/* + * EUC + */ +static int +pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		if (*from == SS2 && len >= 2)	/* JIS X 0201 (so called "1 byte +										 * KANA") */ +		{ +			from++; +			*to = (SS2 << 8) | *from++; +			len -= 2; +		} +		else if (*from == SS3 && len >= 3)	/* JIS X 0212 KANJI */ +		{ +			from++; +			*to = (SS3 << 16) | (*from++ << 8); +			*to |= *from++; +			len -= 3; +		} +		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ +		{ +			*to = *from++ << 8; +			*to |= *from++; +			len -= 2; +		} +		else					/* must be ASCII */ +		{ +			*to = *from++; +			len--; +		} +		to++; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +static inline int +pg_euc_mblen(const unsigned char *s) +{ +	int			len; + +	if (*s == SS2) +		len = 2; +	else if (*s == SS3) +		len = 3; +	else if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = 1; +	return len; +} + +static inline int +pg_euc_dsplen(const unsigned char *s) +{ +	int			len; + +	if (*s == SS2) +		len = 2; +	else if (*s == SS3) +		len = 2; +	else if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = pg_ascii_dsplen(s); +	return len; +} + +/* + * EUC_JP + */ +static int +pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	return pg_euc2wchar_with_len(from, to, len); +} + +static int +pg_eucjp_mblen(const unsigned char *s) +{ +	return pg_euc_mblen(s); +} + +static int +pg_eucjp_dsplen(const unsigned char *s) +{ +	int			len; + +	if (*s == SS2) +		len = 1; +	else if (*s == SS3) +		len = 2; +	else if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = pg_ascii_dsplen(s); +	return len; +} + +/* + * EUC_KR + */ +static int +pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	return pg_euc2wchar_with_len(from, to, len); +} + +static int +pg_euckr_mblen(const unsigned char *s) +{ +	return pg_euc_mblen(s); +} + +static int +pg_euckr_dsplen(const unsigned char *s) +{ +	return pg_euc_dsplen(s); +} + +/* + * EUC_CN + * + */ +static int +pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		if (*from == SS2 && len >= 3)	/* code set 2 (unused?) */ +		{ +			from++; +			*to = (SS2 << 16) | (*from++ << 8); +			*to |= *from++; +			len -= 3; +		} +		else if (*from == SS3 && len >= 3)	/* code set 3 (unused ?) */ +		{ +			from++; +			*to = (SS3 << 16) | (*from++ << 8); +			*to |= *from++; +			len -= 3; +		} +		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */ +		{ +			*to = *from++ << 8; +			*to |= *from++; +			len -= 2; +		} +		else +		{ +			*to = *from++; +			len--; +		} +		to++; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +static int +pg_euccn_mblen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = 1; +	return len; +} + +static int +pg_euccn_dsplen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = pg_ascii_dsplen(s); +	return len; +} + +/* + * EUC_TW + * + */ +static int +pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		if (*from == SS2 && len >= 4)	/* code set 2 */ +		{ +			from++; +			*to = (((uint32) SS2) << 24) | (*from++ << 16); +			*to |= *from++ << 8; +			*to |= *from++; +			len -= 4; +		} +		else if (*from == SS3 && len >= 3)	/* code set 3 (unused?) */ +		{ +			from++; +			*to = (SS3 << 16) | (*from++ << 8); +			*to |= *from++; +			len -= 3; +		} +		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ +		{ +			*to = *from++ << 8; +			*to |= *from++; +			len -= 2; +		} +		else +		{ +			*to = *from++; +			len--; +		} +		to++; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +static int +pg_euctw_mblen(const unsigned char *s) +{ +	int			len; + +	if (*s == SS2) +		len = 4; +	else if (*s == SS3) +		len = 3; +	else if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = 1; +	return len; +} + +static int +pg_euctw_dsplen(const unsigned char *s) +{ +	int			len; + +	if (*s == SS2) +		len = 2; +	else if (*s == SS3) +		len = 2; +	else if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = pg_ascii_dsplen(s); +	return len; +} + +/* + * Convert pg_wchar to EUC_* encoding. + * caller must allocate enough space for "to", including a trailing zero! + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		unsigned char c; + +		if ((c = (*from >> 24))) +		{ +			*to++ = c; +			*to++ = (*from >> 16) & 0xff; +			*to++ = (*from >> 8) & 0xff; +			*to++ = *from & 0xff; +			cnt += 4; +		} +		else if ((c = (*from >> 16))) +		{ +			*to++ = c; +			*to++ = (*from >> 8) & 0xff; +			*to++ = *from & 0xff; +			cnt += 3; +		} +		else if ((c = (*from >> 8))) +		{ +			*to++ = c; +			*to++ = *from & 0xff; +			cnt += 2; +		} +		else +		{ +			*to++ = *from; +			cnt++; +		} +		from++; +		len--; +	} +	*to = 0; +	return cnt; +} + + +/* + * JOHAB + */ +static int +pg_johab_mblen(const unsigned char *s) +{ +	return pg_euc_mblen(s); +} + +static int +pg_johab_dsplen(const unsigned char *s) +{ +	return pg_euc_dsplen(s); +} + +/* + * convert UTF8 string to pg_wchar (UCS-4) + * caller must allocate enough space for "to", including a trailing zero! + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; +	uint32		c1, +				c2, +				c3, +				c4; + +	while (len > 0 && *from) +	{ +		if ((*from & 0x80) == 0) +		{ +			*to = *from++; +			len--; +		} +		else if ((*from & 0xe0) == 0xc0) +		{ +			if (len < 2) +				break;			/* drop trailing incomplete char */ +			c1 = *from++ & 0x1f; +			c2 = *from++ & 0x3f; +			*to = (c1 << 6) | c2; +			len -= 2; +		} +		else if ((*from & 0xf0) == 0xe0) +		{ +			if (len < 3) +				break;			/* drop trailing incomplete char */ +			c1 = *from++ & 0x0f; +			c2 = *from++ & 0x3f; +			c3 = *from++ & 0x3f; +			*to = (c1 << 12) | (c2 << 6) | c3; +			len -= 3; +		} +		else if ((*from & 0xf8) == 0xf0) +		{ +			if (len < 4) +				break;			/* drop trailing incomplete char */ +			c1 = *from++ & 0x07; +			c2 = *from++ & 0x3f; +			c3 = *from++ & 0x3f; +			c4 = *from++ & 0x3f; +			*to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4; +			len -= 4; +		} +		else +		{ +			/* treat a bogus char as length 1; not ours to raise error */ +			*to = *from++; +			len--; +		} +		to++; +		cnt++; +	} +	*to = 0; +	return cnt; +} + + +/* + * Map a Unicode code point to UTF-8.  utf8string must have 4 bytes of + * space allocated. + */ +unsigned char * +unicode_to_utf8(pg_wchar c, unsigned char *utf8string) +{ +	if (c <= 0x7F) +	{ +		utf8string[0] = c; +	} +	else if (c <= 0x7FF) +	{ +		utf8string[0] = 0xC0 | ((c >> 6) & 0x1F); +		utf8string[1] = 0x80 | (c & 0x3F); +	} +	else if (c <= 0xFFFF) +	{ +		utf8string[0] = 0xE0 | ((c >> 12) & 0x0F); +		utf8string[1] = 0x80 | ((c >> 6) & 0x3F); +		utf8string[2] = 0x80 | (c & 0x3F); +	} +	else +	{ +		utf8string[0] = 0xF0 | ((c >> 18) & 0x07); +		utf8string[1] = 0x80 | ((c >> 12) & 0x3F); +		utf8string[2] = 0x80 | ((c >> 6) & 0x3F); +		utf8string[3] = 0x80 | (c & 0x3F); +	} + +	return utf8string; +} + +/* + * Trivial conversion from pg_wchar to UTF-8. + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		int			char_len; + +		unicode_to_utf8(*from, to); +		char_len = pg_utf_mblen(to); +		cnt += char_len; +		to += char_len; +		from++; +		len--; +	} +	*to = 0; +	return cnt; +} + +/* + * Return the byte length of a UTF8 character pointed to by s + * + * Note: in the current implementation we do not support UTF8 sequences + * of more than 4 bytes; hence do NOT return a value larger than 4. + * We return "1" for any leading byte that is either flat-out illegal or + * indicates a length larger than we support. + * + * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps + * other places would need to be fixed to change this. + */ +int +pg_utf_mblen(const unsigned char *s) +{ +	int			len; + +	if ((*s & 0x80) == 0) +		len = 1; +	else if ((*s & 0xe0) == 0xc0) +		len = 2; +	else if ((*s & 0xf0) == 0xe0) +		len = 3; +	else if ((*s & 0xf8) == 0xf0) +		len = 4; +#ifdef NOT_USED +	else if ((*s & 0xfc) == 0xf8) +		len = 5; +	else if ((*s & 0xfe) == 0xfc) +		len = 6; +#endif +	else +		len = 1; +	return len; +} + +/* + * This is an implementation of wcwidth() and wcswidth() as defined in + * "The Single UNIX Specification, Version 2, The Open Group, 1997" + * <http://www.unix.org/online.html> + * + * Markus Kuhn -- 2001-09-08 -- public domain + * + * customised for PostgreSQL + * + * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +struct mbinterval +{ +	unsigned int first; +	unsigned int last; +}; + +/* auxiliary function for binary search in interval table */ +static int +mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max) +{ +	int			min = 0; +	int			mid; + +	if (ucs < table[0].first || ucs > table[max].last) +		return 0; +	while (max >= min) +	{ +		mid = (min + max) / 2; +		if (ucs > table[mid].last) +			min = mid + 1; +		else if (ucs < table[mid].first) +			max = mid - 1; +		else +			return 1; +	} + +	return 0; +} + + +/* The following functions define the column width of an ISO 10646 + * character as follows: + * + *	  - The null character (U+0000) has a column width of 0. + * + *	  - Other C0/C1 control characters and DEL will lead to a return + *		value of -1. + * + *	  - Non-spacing and enclosing combining characters (general + *		category code Mn, Me or Cf in the Unicode database) have a + *		column width of 0. + * + *	  - Spacing characters in the East Asian Wide (W) or East Asian + *		FullWidth (F) category as defined in Unicode Technical + *		Report #11 have a column width of 2. + * + *	  - All remaining characters (including all printable + *		ISO 8859-1 and WGL4 characters, Unicode control characters, + *		etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +static int +ucs_wcwidth(pg_wchar ucs) +{ +#include "common/unicode_nonspacing_table.h" +#include "common/unicode_east_asian_fw_table.h" + +	/* test for 8-bit control characters */ +	if (ucs == 0) +		return 0; + +	if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff) +		return -1; + +	/* +	 * binary search in table of non-spacing characters +	 * +	 * XXX: In the official Unicode sources, it is possible for a character to +	 * be described as both non-spacing and wide at the same time. As of +	 * Unicode 13.0, treating the non-spacing property as the determining +	 * factor for display width leads to the correct behavior, so do that +	 * search first. +	 */ +	if (mbbisearch(ucs, nonspacing, +				   sizeof(nonspacing) / sizeof(struct mbinterval) - 1)) +		return 0; + +	/* binary search in table of wide characters */ +	if (mbbisearch(ucs, east_asian_fw, +				   sizeof(east_asian_fw) / sizeof(struct mbinterval) - 1)) +		return 2; + +	return 1; +} + +/* + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * No error checks here, c must point to a long-enough string. + */ +pg_wchar +utf8_to_unicode(const unsigned char *c) +{ +	if ((*c & 0x80) == 0) +		return (pg_wchar) c[0]; +	else if ((*c & 0xe0) == 0xc0) +		return (pg_wchar) (((c[0] & 0x1f) << 6) | +						   (c[1] & 0x3f)); +	else if ((*c & 0xf0) == 0xe0) +		return (pg_wchar) (((c[0] & 0x0f) << 12) | +						   ((c[1] & 0x3f) << 6) | +						   (c[2] & 0x3f)); +	else if ((*c & 0xf8) == 0xf0) +		return (pg_wchar) (((c[0] & 0x07) << 18) | +						   ((c[1] & 0x3f) << 12) | +						   ((c[2] & 0x3f) << 6) | +						   (c[3] & 0x3f)); +	else +		/* that is an invalid code on purpose */ +		return 0xffffffff; +} + +static int +pg_utf_dsplen(const unsigned char *s) +{ +	return ucs_wcwidth(utf8_to_unicode(s)); +} + +/* + * convert mule internal code to pg_wchar + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		if (IS_LC1(*from) && len >= 2) +		{ +			*to = *from++ << 16; +			*to |= *from++; +			len -= 2; +		} +		else if (IS_LCPRV1(*from) && len >= 3) +		{ +			from++; +			*to = *from++ << 16; +			*to |= *from++; +			len -= 3; +		} +		else if (IS_LC2(*from) && len >= 3) +		{ +			*to = *from++ << 16; +			*to |= *from++ << 8; +			*to |= *from++; +			len -= 3; +		} +		else if (IS_LCPRV2(*from) && len >= 4) +		{ +			from++; +			*to = *from++ << 16; +			*to |= *from++ << 8; +			*to |= *from++; +			len -= 4; +		} +		else +		{						/* assume ASCII */ +			*to = (unsigned char) *from++; +			len--; +		} +		to++; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +/* + * convert pg_wchar to mule internal code + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		unsigned char lb; + +		lb = (*from >> 16) & 0xff; +		if (IS_LC1(lb)) +		{ +			*to++ = lb; +			*to++ = *from & 0xff; +			cnt += 2; +		} +		else if (IS_LC2(lb)) +		{ +			*to++ = lb; +			*to++ = (*from >> 8) & 0xff; +			*to++ = *from & 0xff; +			cnt += 3; +		} +		else if (IS_LCPRV1_A_RANGE(lb)) +		{ +			*to++ = LCPRV1_A; +			*to++ = lb; +			*to++ = *from & 0xff; +			cnt += 3; +		} +		else if (IS_LCPRV1_B_RANGE(lb)) +		{ +			*to++ = LCPRV1_B; +			*to++ = lb; +			*to++ = *from & 0xff; +			cnt += 3; +		} +		else if (IS_LCPRV2_A_RANGE(lb)) +		{ +			*to++ = LCPRV2_A; +			*to++ = lb; +			*to++ = (*from >> 8) & 0xff; +			*to++ = *from & 0xff; +			cnt += 4; +		} +		else if (IS_LCPRV2_B_RANGE(lb)) +		{ +			*to++ = LCPRV2_B; +			*to++ = lb; +			*to++ = (*from >> 8) & 0xff; +			*to++ = *from & 0xff; +			cnt += 4; +		} +		else +		{ +			*to++ = *from & 0xff; +			cnt += 1; +		} +		from++; +		len--; +	} +	*to = 0; +	return cnt; +} + +/* exported for direct use by conv.c */ +int +pg_mule_mblen(const unsigned char *s) +{ +	int			len; + +	if (IS_LC1(*s)) +		len = 2; +	else if (IS_LCPRV1(*s)) +		len = 3; +	else if (IS_LC2(*s)) +		len = 3; +	else if (IS_LCPRV2(*s)) +		len = 4; +	else +		len = 1;				/* assume ASCII */ +	return len; +} + +static int +pg_mule_dsplen(const unsigned char *s) +{ +	int			len; + +	/* +	 * Note: it's not really appropriate to assume that all multibyte charsets +	 * are double-wide on screen.  But this seems an okay approximation for +	 * the MULE charsets we currently support. +	 */ + +	if (IS_LC1(*s)) +		len = 1; +	else if (IS_LCPRV1(*s)) +		len = 1; +	else if (IS_LC2(*s)) +		len = 2; +	else if (IS_LCPRV2(*s)) +		len = 2; +	else +		len = 1;				/* assume ASCII */ + +	return len; +} + +/* + * ISO8859-1 + */ +static int +pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		*to++ = *from++; +		len--; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +/* + * Trivial conversion from pg_wchar to single byte encoding. Just ignores + * high bits. + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static int +pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len) +{ +	int			cnt = 0; + +	while (len > 0 && *from) +	{ +		*to++ = *from++; +		len--; +		cnt++; +	} +	*to = 0; +	return cnt; +} + +static int +pg_latin1_mblen(const unsigned char *s) +{ +	return 1; +} + +static int +pg_latin1_dsplen(const unsigned char *s) +{ +	return pg_ascii_dsplen(s); +} + +/* + * SJIS + */ +static int +pg_sjis_mblen(const unsigned char *s) +{ +	int			len; + +	if (*s >= 0xa1 && *s <= 0xdf) +		len = 1;				/* 1 byte kana? */ +	else if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = 1;				/* should be ASCII */ +	return len; +} + +static int +pg_sjis_dsplen(const unsigned char *s) +{ +	int			len; + +	if (*s >= 0xa1 && *s <= 0xdf) +		len = 1;				/* 1 byte kana? */ +	else if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = pg_ascii_dsplen(s);	/* should be ASCII */ +	return len; +} + +/* + * Big5 + */ +static int +pg_big5_mblen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = 1;				/* should be ASCII */ +	return len; +} + +static int +pg_big5_dsplen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = pg_ascii_dsplen(s);	/* should be ASCII */ +	return len; +} + +/* + * GBK + */ +static int +pg_gbk_mblen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = 1;				/* should be ASCII */ +	return len; +} + +static int +pg_gbk_dsplen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* kanji? */ +	else +		len = pg_ascii_dsplen(s);	/* should be ASCII */ +	return len; +} + +/* + * UHC + */ +static int +pg_uhc_mblen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* 2byte? */ +	else +		len = 1;				/* should be ASCII */ +	return len; +} + +static int +pg_uhc_dsplen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2;				/* 2byte? */ +	else +		len = pg_ascii_dsplen(s);	/* should be ASCII */ +	return len; +} + +/* + * GB18030 + *	Added by Bill Huang <[email protected]>,<[email protected]> + */ + +/* + * Unlike all other mblen() functions, this also looks at the second byte of + * the input.  However, if you only pass the first byte of a multi-byte + * string, and \0 as the second byte, this still works in a predictable way: + * a 4-byte character will be reported as two 2-byte characters.  That's + * enough for all current uses, as a client-only encoding.  It works that + * way, because in any valid 4-byte GB18030-encoded character, the third and + * fourth byte look like a 2-byte encoded character, when looked at + * separately. + */ +static int +pg_gb18030_mblen(const unsigned char *s) +{ +	int			len; + +	if (!IS_HIGHBIT_SET(*s)) +		len = 1;				/* ASCII */ +	else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39) +		len = 4; +	else +		len = 2; +	return len; +} + +static int +pg_gb18030_dsplen(const unsigned char *s) +{ +	int			len; + +	if (IS_HIGHBIT_SET(*s)) +		len = 2; +	else +		len = pg_ascii_dsplen(s);	/* ASCII */ +	return len; +} + +/* + *------------------------------------------------------------------- + * multibyte sequence validators + * + * The verifychar functions accept "s", a pointer to the first byte of a + * string, and "len", the remaining length of the string.  If there is a + * validly encoded character beginning at *s, return its length in bytes; + * else return -1. + * + * The verifystr functions also accept "s", a pointer to a string and "len", + * the length of the string.  They verify the whole string, and return the + * number of input bytes (<= len) that are valid.  In other words, if the + * whole string is valid, verifystr returns "len", otherwise it returns the + * byte offset of the first invalid character.  The verifystr functions must + * test for and reject zeroes in the input. + * + * The verifychar functions can assume that len > 0 and that *s != '\0', but + * they must test for and reject zeroes in any additional bytes of a + * multibyte character.  Note that this definition allows the function for a + * single-byte encoding to be just "return 1". + *------------------------------------------------------------------- + */ +static int +pg_ascii_verifychar(const unsigned char *s, int len) +{ +	return 1; +} + +static int +pg_ascii_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *nullpos = memchr(s, 0, len); + +	if (nullpos == NULL) +		return len; +	else +		return nullpos - s; +} + +#define IS_EUC_RANGE_VALID(c)	((c) >= 0xa1 && (c) <= 0xfe) + +static int +pg_eucjp_verifychar(const unsigned char *s, int len) +{ +	int			l; +	unsigned char c1, +				c2; + +	c1 = *s++; + +	switch (c1) +	{ +		case SS2:				/* JIS X 0201 */ +			l = 2; +			if (l > len) +				return -1; +			c2 = *s++; +			if (c2 < 0xa1 || c2 > 0xdf) +				return -1; +			break; + +		case SS3:				/* JIS X 0212 */ +			l = 3; +			if (l > len) +				return -1; +			c2 = *s++; +			if (!IS_EUC_RANGE_VALID(c2)) +				return -1; +			c2 = *s++; +			if (!IS_EUC_RANGE_VALID(c2)) +				return -1; +			break; + +		default: +			if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ +			{ +				l = 2; +				if (l > len) +					return -1; +				if (!IS_EUC_RANGE_VALID(c1)) +					return -1; +				c2 = *s++; +				if (!IS_EUC_RANGE_VALID(c2)) +					return -1; +			} +			else +				/* must be ASCII */ +			{ +				l = 1; +			} +			break; +	} + +	return l; +} + +static int +pg_eucjp_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_eucjp_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_euckr_verifychar(const unsigned char *s, int len) +{ +	int			l; +	unsigned char c1, +				c2; + +	c1 = *s++; + +	if (IS_HIGHBIT_SET(c1)) +	{ +		l = 2; +		if (l > len) +			return -1; +		if (!IS_EUC_RANGE_VALID(c1)) +			return -1; +		c2 = *s++; +		if (!IS_EUC_RANGE_VALID(c2)) +			return -1; +	} +	else +		/* must be ASCII */ +	{ +		l = 1; +	} + +	return l; +} + +static int +pg_euckr_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_euckr_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +/* EUC-CN byte sequences are exactly same as EUC-KR */ +#define pg_euccn_verifychar	pg_euckr_verifychar +#define pg_euccn_verifystr	pg_euckr_verifystr + +static int +pg_euctw_verifychar(const unsigned char *s, int len) +{ +	int			l; +	unsigned char c1, +				c2; + +	c1 = *s++; + +	switch (c1) +	{ +		case SS2:				/* CNS 11643 Plane 1-7 */ +			l = 4; +			if (l > len) +				return -1; +			c2 = *s++; +			if (c2 < 0xa1 || c2 > 0xa7) +				return -1; +			c2 = *s++; +			if (!IS_EUC_RANGE_VALID(c2)) +				return -1; +			c2 = *s++; +			if (!IS_EUC_RANGE_VALID(c2)) +				return -1; +			break; + +		case SS3:				/* unused */ +			return -1; + +		default: +			if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */ +			{ +				l = 2; +				if (l > len) +					return -1; +				/* no further range check on c1? */ +				c2 = *s++; +				if (!IS_EUC_RANGE_VALID(c2)) +					return -1; +			} +			else +				/* must be ASCII */ +			{ +				l = 1; +			} +			break; +	} +	return l; +} + +static int +pg_euctw_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_euctw_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_johab_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; +	unsigned char c; + +	l = mbl = pg_johab_mblen(s); + +	if (len < l) +		return -1; + +	if (!IS_HIGHBIT_SET(*s)) +		return mbl; + +	while (--l > 0) +	{ +		c = *++s; +		if (!IS_EUC_RANGE_VALID(c)) +			return -1; +	} +	return mbl; +} + +static int +pg_johab_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_johab_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_mule_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; +	unsigned char c; + +	l = mbl = pg_mule_mblen(s); + +	if (len < l) +		return -1; + +	while (--l > 0) +	{ +		c = *++s; +		if (!IS_HIGHBIT_SET(c)) +			return -1; +	} +	return mbl; +} + +static int +pg_mule_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_mule_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_latin1_verifychar(const unsigned char *s, int len) +{ +	return 1; +} + +static int +pg_latin1_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *nullpos = memchr(s, 0, len); + +	if (nullpos == NULL) +		return len; +	else +		return nullpos - s; +} + +static int +pg_sjis_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; +	unsigned char c1, +				c2; + +	l = mbl = pg_sjis_mblen(s); + +	if (len < l) +		return -1; + +	if (l == 1)					/* pg_sjis_mblen already verified it */ +		return mbl; + +	c1 = *s++; +	c2 = *s; +	if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2)) +		return -1; +	return mbl; +} + +static int +pg_sjis_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_sjis_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_big5_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; + +	l = mbl = pg_big5_mblen(s); + +	if (len < l) +		return -1; + +	while (--l > 0) +	{ +		if (*++s == '\0') +			return -1; +	} + +	return mbl; +} + +static int +pg_big5_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_big5_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_gbk_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; + +	l = mbl = pg_gbk_mblen(s); + +	if (len < l) +		return -1; + +	while (--l > 0) +	{ +		if (*++s == '\0') +			return -1; +	} + +	return mbl; +} + +static int +pg_gbk_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_gbk_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_uhc_verifychar(const unsigned char *s, int len) +{ +	int			l, +				mbl; + +	l = mbl = pg_uhc_mblen(s); + +	if (len < l) +		return -1; + +	while (--l > 0) +	{ +		if (*++s == '\0') +			return -1; +	} + +	return mbl; +} + +static int +pg_uhc_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_uhc_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_gb18030_verifychar(const unsigned char *s, int len) +{ +	int			l; + +	if (!IS_HIGHBIT_SET(*s)) +		l = 1;					/* ASCII */ +	else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39) +	{ +		/* Should be 4-byte, validate remaining bytes */ +		if (*s >= 0x81 && *s <= 0xfe && +			*(s + 2) >= 0x81 && *(s + 2) <= 0xfe && +			*(s + 3) >= 0x30 && *(s + 3) <= 0x39) +			l = 4; +		else +			l = -1; +	} +	else if (len >= 2 && *s >= 0x81 && *s <= 0xfe) +	{ +		/* Should be 2-byte, validate */ +		if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || +			(*(s + 1) >= 0x80 && *(s + 1) <= 0xfe)) +			l = 2; +		else +			l = -1; +	} +	else +		l = -1; +	return l; +} + +static int +pg_gb18030_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; + +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_gb18030_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +static int +pg_utf8_verifychar(const unsigned char *s, int len) +{ +	int			l; + +	if ((*s & 0x80) == 0) +	{ +		if (*s == '\0') +			return -1; +		return 1; +	} +	else if ((*s & 0xe0) == 0xc0) +		l = 2; +	else if ((*s & 0xf0) == 0xe0) +		l = 3; +	else if ((*s & 0xf8) == 0xf0) +		l = 4; +	else +		l = 1; + +	if (l > len) +		return -1; + +	if (!pg_utf8_islegal(s, l)) +		return -1; + +	return l; +} + +/* + * The fast path of the UTF-8 verifier uses a deterministic finite automaton + * (DFA) for multibyte characters. In a traditional table-driven DFA, the + * input byte and current state are used to compute an index into an array of + * state transitions. Since the address of the next transition is dependent + * on this computation, there is latency in executing the load instruction, + * and the CPU is not kept busy. + * + * Instead, we use a "shift-based" DFA as described by Per Vognsen: + * + * https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 + * + * In a shift-based DFA, the input byte is an index into array of integers + * whose bit pattern encodes the state transitions. To compute the next + * state, we simply right-shift the integer by the current state and apply a + * mask. In this scheme, the address of the transition only depends on the + * input byte, so there is better pipelining. + * + * The naming convention for states and transitions was adopted from a UTF-8 + * to UTF-16/32 transcoder, whose table is reproduced below: + * + * https://github.com/BobSteagall/utf_utils/blob/6b7a465265de2f5fa6133d653df0c9bdd73bbcf8/src/utf_utils.cpp + * + * ILL  ASC  CR1  CR2  CR3  L2A  L3A  L3B  L3C  L4A  L4B  L4C CLASS / STATE + * ========================================================================== + * err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B,      | BGN/END + * err, err, err, err, err, err, err, err, err, err, err, err,      | ERR + *                                                                  | + * err, err, END, END, END, err, err, err, err, err, err, err,      | CS1 + * err, err, CS1, CS1, CS1, err, err, err, err, err, err, err,      | CS2 + * err, err, CS2, CS2, CS2, err, err, err, err, err, err, err,      | CS3 + *                                                                  | + * err, err, err, err, CS1, err, err, err, err, err, err, err,      | P3A + * err, err, CS1, CS1, err, err, err, err, err, err, err, err,      | P3B + *                                                                  | + * err, err, err, CS2, CS2, err, err, err, err, err, err, err,      | P4A + * err, err, CS2, err, err, err, err, err, err, err, err, err,      | P4B + * + * In the most straightforward implementation, a shift-based DFA for UTF-8 + * requires 64-bit integers to encode the transitions, but with an SMT solver + * it's possible to find state numbers such that the transitions fit within + * 32-bit integers, as Dougall Johnson demonstrated: + * + * https://gist.github.com/dougallj/166e326de6ad4cf2c94be97a204c025f + * + * This packed representation is the reason for the seemingly odd choice of + * state values below. + */ + +/* Error */ +#define	ERR  0 +/* Begin */ +#define	BGN 11 +/* Continuation states, expect 1/2/3 continuation bytes */ +#define	CS1 16 +#define	CS2  1 +#define	CS3  5 +/* Partial states, where the first continuation byte has a restricted range */ +#define	P3A  6					/* Lead was E0, check for 3-byte overlong */ +#define	P3B 20					/* Lead was ED, check for surrogate */ +#define	P4A 25					/* Lead was F0, check for 4-byte overlong */ +#define	P4B 30					/* Lead was F4, check for too-large */ +/* Begin and End are the same state */ +#define	END BGN + +/* the encoded state transitions for the lookup table */ + +/* ASCII */ +#define ASC (END << BGN) +/* 2-byte lead */ +#define L2A (CS1 << BGN) +/* 3-byte lead */ +#define L3A (P3A << BGN) +#define L3B (CS2 << BGN) +#define L3C (P3B << BGN) +/* 4-byte lead */ +#define L4A (P4A << BGN) +#define L4B (CS3 << BGN) +#define L4C (P4B << BGN) +/* continuation byte */ +#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B) +#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A) +#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A) +/* invalid byte */ +#define ILL ERR + +static const uint32 Utf8Transition[256] = +{ +	/* ASCII */ + +	ILL, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, +	ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, + +	/* continuation bytes */ + +	/* 80..8F */ +	CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, +	CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, + +	/* 90..9F */ +	CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, +	CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, + +	/* A0..BF */ +	CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, +	CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, +	CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, +	CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, + +	/* leading bytes */ + +	/* C0..DF */ +	ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A, +	L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, +	L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, +	L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, + +	/* E0..EF */ +	L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B, +	L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B, + +	/* F0..FF */ +	L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL, +	ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL +}; + +static void +utf8_advance(const unsigned char *s, uint32 *state, int len) +{ +	/* Note: We deliberately don't check the state's value here. */ +	while (len > 0) +	{ +		/* +		 * It's important that the mask value is 31: In most instruction sets, +		 * a shift by a 32-bit operand is understood to be a shift by its mod +		 * 32, so the compiler should elide the mask operation. +		 */ +		*state = Utf8Transition[*s++] >> (*state & 31); +		len--; +	} + +	*state &= 31; +} + +static int +pg_utf8_verifystr(const unsigned char *s, int len) +{ +	const unsigned char *start = s; +	const int	orig_len = len; +	uint32		state = BGN; + +/* + * With a stride of two vector widths, gcc will unroll the loop. Even if + * the compiler can unroll a longer loop, it's not worth it because we + * must fall back to the byte-wise algorithm if we find any non-ASCII. + */ +#define STRIDE_LENGTH (2 * sizeof(Vector8)) + +	if (len >= STRIDE_LENGTH) +	{ +		while (len >= STRIDE_LENGTH) +		{ +			/* +			 * If the chunk is all ASCII, we can skip the full UTF-8 check, +			 * but we must first check for a non-END state, which means the +			 * previous chunk ended in the middle of a multibyte sequence. +			 */ +			if (state != END || !is_valid_ascii(s, STRIDE_LENGTH)) +				utf8_advance(s, &state, STRIDE_LENGTH); + +			s += STRIDE_LENGTH; +			len -= STRIDE_LENGTH; +		} + +		/* The error state persists, so we only need to check for it here. */ +		if (state == ERR) +		{ +			/* +			 * Start over from the beginning with the slow path so we can +			 * count the valid bytes. +			 */ +			len = orig_len; +			s = start; +		} +		else if (state != END) +		{ +			/* +			 * The fast path exited in the middle of a multibyte sequence. +			 * Walk backwards to find the leading byte so that the slow path +			 * can resume checking from there. We must always backtrack at +			 * least one byte, since the current byte could be e.g. an ASCII +			 * byte after a 2-byte lead, which is invalid. +			 */ +			do +			{ +				Assert(s > start); +				s--; +				len++; +				Assert(IS_HIGHBIT_SET(*s)); +			} while (pg_utf_mblen(s) <= 1); +		} +	} + +	/* check remaining bytes */ +	while (len > 0) +	{ +		int			l; + +		/* fast path for ASCII-subset characters */ +		if (!IS_HIGHBIT_SET(*s)) +		{ +			if (*s == '\0') +				break; +			l = 1; +		} +		else +		{ +			l = pg_utf8_verifychar(s, len); +			if (l == -1) +				break; +		} +		s += l; +		len -= l; +	} + +	return s - start; +} + +/* + * Check for validity of a single UTF-8 encoded character + * + * This directly implements the rules in RFC3629.  The bizarre-looking + * restrictions on the second byte are meant to ensure that there isn't + * more than one encoding of a given Unicode character point; that is, + * you may not use a longer-than-necessary byte sequence with high order + * zero bits to represent a character that would fit in fewer bytes. + * To do otherwise is to create security hazards (eg, create an apparent + * non-ASCII character that decodes to plain ASCII). + * + * length is assumed to have been obtained by pg_utf_mblen(), and the + * caller must have checked that that many bytes are present in the buffer. + */ +bool +pg_utf8_islegal(const unsigned char *source, int length) +{ +	unsigned char a; + +	switch (length) +	{ +		default: +			/* reject lengths 5 and 6 for now */ +			return false; +		case 4: +			a = source[3]; +			if (a < 0x80 || a > 0xBF) +				return false; +			/* FALL THRU */ +		case 3: +			a = source[2]; +			if (a < 0x80 || a > 0xBF) +				return false; +			/* FALL THRU */ +		case 2: +			a = source[1]; +			switch (*source) +			{ +				case 0xE0: +					if (a < 0xA0 || a > 0xBF) +						return false; +					break; +				case 0xED: +					if (a < 0x80 || a > 0x9F) +						return false; +					break; +				case 0xF0: +					if (a < 0x90 || a > 0xBF) +						return false; +					break; +				case 0xF4: +					if (a < 0x80 || a > 0x8F) +						return false; +					break; +				default: +					if (a < 0x80 || a > 0xBF) +						return false; +					break; +			} +			/* FALL THRU */ +		case 1: +			a = *source; +			if (a >= 0x80 && a < 0xC2) +				return false; +			if (a > 0xF4) +				return false; +			break; +	} +	return true; +} + + +/* + *------------------------------------------------------------------- + * encoding info table + * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) + *------------------------------------------------------------------- + */ +const pg_wchar_tbl pg_wchar_table[] = { +	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1},	/* PG_SQL_ASCII */ +	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},	/* PG_EUC_JP */ +	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2},	/* PG_EUC_CN */ +	{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3},	/* PG_EUC_KR */ +	{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},	/* PG_EUC_TW */ +	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},	/* PG_EUC_JIS_2004 */ +	{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4},	/* PG_UTF8 */ +	{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4},	/* PG_MULE_INTERNAL */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN1 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN2 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN3 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN4 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN5 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN6 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN7 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN8 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN9 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_LATIN10 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1256 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1258 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN866 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN874 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_KOI8R */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1251 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1252 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* ISO-8859-5 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* ISO-8859-6 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* ISO-8859-7 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* ISO-8859-8 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1250 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1253 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1254 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1255 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_WIN1257 */ +	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},	/* PG_KOI8U */ +	{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2},	/* PG_SJIS */ +	{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifychar, pg_big5_verifystr, 2},	/* PG_BIG5 */ +	{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifychar, pg_gbk_verifystr, 2},	/* PG_GBK */ +	{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifychar, pg_uhc_verifystr, 2},	/* PG_UHC */ +	{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifychar, pg_gb18030_verifystr, 4},	/* PG_GB18030 */ +	{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifychar, pg_johab_verifystr, 3},	/* PG_JOHAB */ +	{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifychar, pg_sjis_verifystr, 2} /* PG_SHIFT_JIS_2004 */ +}; + +/* + * Returns the byte length of a multibyte character. + * + * Caution: when dealing with text that is not certainly valid in the + * specified encoding, the result may exceed the actual remaining + * string length.  Callers that are not prepared to deal with that + * should use pg_encoding_mblen_bounded() instead. + */ +int +pg_encoding_mblen(int encoding, const char *mbstr) +{ +	return (PG_VALID_ENCODING(encoding) ? +			pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) : +			pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr)); +} + +/* + * Returns the byte length of a multibyte character; but not more than + * the distance to end of string. + */ +int +pg_encoding_mblen_bounded(int encoding, const char *mbstr) +{ +	return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr)); +} + +/* + * Returns the display length of a multibyte character. + */ +int +pg_encoding_dsplen(int encoding, const char *mbstr) +{ +	return (PG_VALID_ENCODING(encoding) ? +			pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) : +			pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr)); +} + +/* + * Verify the first multibyte character of the given string. + * Return its byte length if good, -1 if bad.  (See comments above for + * full details of the mbverifychar API.) + */ +int +pg_encoding_verifymbchar(int encoding, const char *mbstr, int len) +{ +	return (PG_VALID_ENCODING(encoding) ? +			pg_wchar_table[encoding].mbverifychar((const unsigned char *) mbstr, len) : +			pg_wchar_table[PG_SQL_ASCII].mbverifychar((const unsigned char *) mbstr, len)); +} + +/* + * Verify that a string is valid for the given encoding. + * Returns the number of input bytes (<= len) that form a valid string. + * (See comments above for full details of the mbverifystr API.) + */ +int +pg_encoding_verifymbstr(int encoding, const char *mbstr, int len) +{ +	return (PG_VALID_ENCODING(encoding) ? +			pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len) : +			pg_wchar_table[PG_SQL_ASCII].mbverifystr((const unsigned char *) mbstr, len)); +} + +/* + * fetch maximum length of a given encoding + */ +int +pg_encoding_max_length(int encoding) +{ +	Assert(PG_VALID_ENCODING(encoding)); + +	return pg_wchar_table[encoding].maxmblen; +}  | 
