diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-21 10:09:19 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-21 10:19:17 +0300 |
commit | 31031664807af57b64c42818e69cf4fafdcd6b75 (patch) | |
tree | 0c3c5008d7c14d6eefe21d4e6237e22625516282 | |
parent | a42e56882cf1b2f89af4a517a8e25fe3a67cebfc (diff) | |
download | ydb-31031664807af57b64c42818e69cf4fafdcd6b75.tar.gz |
Intermediate changes
21 files changed, 1116 insertions, 309 deletions
diff --git a/contrib/libs/libpq/COPYRIGHT b/contrib/libs/libpq/COPYRIGHT index d33e0f599d..16f9b46a3f 100644 --- a/contrib/libs/libpq/COPYRIGHT +++ b/contrib/libs/libpq/COPYRIGHT @@ -1,7 +1,7 @@ PostgreSQL Database Management System (formerly known as Postgres, then as Postgres95) -Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group +Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group Portions Copyright (c) 1994, The Regents of the University of California diff --git a/contrib/libs/libpq/src/common/scram-common.c b/contrib/libs/libpq/src/common/scram-common.c index ef997ef684..22ad929218 100644 --- a/contrib/libs/libpq/src/common/scram-common.c +++ b/contrib/libs/libpq/src/common/scram-common.c @@ -22,6 +22,9 @@ #include "common/base64.h" #include "common/hmac.h" #include "common/scram-common.h" +#ifndef FRONTEND +#error #include "miscadmin.h" +#endif #include "port/pg_bswap.h" /* @@ -73,6 +76,14 @@ scram_SaltedPassword(const char *password, /* Subsequent iterations */ for (i = 2; i <= iterations; i++) { +#ifndef FRONTEND + /* + * Make sure that this is interruptible as scram_iterations could be + * set to a large value. + */ + CHECK_FOR_INTERRUPTS(); +#endif + if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 || pg_hmac_update(hmac_ctx, (uint8 *) Ui_prev, key_length) < 0 || pg_hmac_final(hmac_ctx, Ui, key_length) < 0) diff --git a/contrib/libs/libpq/src/common/wchar.c b/contrib/libs/libpq/src/common/wchar.c index fb9d9f5c85..fbac11deb4 100644 --- a/contrib/libs/libpq/src/common/wchar.c +++ b/contrib/libs/libpq/src/common/wchar.c @@ -13,6 +13,7 @@ #include "c.h" #include "mb/pg_wchar.h" +#include "utils/ascii.h" /* diff --git a/contrib/libs/libpq/src/include/mb/pg_wchar.h b/contrib/libs/libpq/src/include/mb/pg_wchar.h index 25276b199f..06a56a41bb 100644 --- a/contrib/libs/libpq/src/include/mb/pg_wchar.h +++ b/contrib/libs/libpq/src/include/mb/pg_wchar.h @@ -19,8 +19,6 @@ #ifndef PG_WCHAR_H #define PG_WCHAR_H -#include "port/simd.h" - /* * The pg_wchar type */ @@ -702,71 +700,4 @@ extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p, extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len); #endif - -/* - * Verify a chunk of bytes for valid ASCII. - * - * Returns false if the input contains any zero bytes or bytes with the - * high-bit set. Input len must be a multiple of the chunk size (8 or 16). - */ -static inline bool -is_valid_ascii(const unsigned char *s, int len) -{ - const unsigned char *const s_end = s + len; - Vector8 chunk; - Vector8 highbit_cum = vector8_broadcast(0); -#ifdef USE_NO_SIMD - Vector8 zero_cum = vector8_broadcast(0x80); -#endif - - Assert(len % sizeof(chunk) == 0); - - while (s < s_end) - { - vector8_load(&chunk, s); - - /* Capture any zero bytes in this chunk. */ -#ifdef USE_NO_SIMD - - /* - * First, add 0x7f to each byte. This sets the high bit in each byte, - * unless it was a zero. If any resulting high bits are zero, the - * corresponding high bits in the zero accumulator will be cleared. - * - * If none of the bytes in the chunk had the high bit set, the max - * value each byte can have after the addition is 0x7f + 0x7f = 0xfe, - * and we don't need to worry about carrying over to the next byte. If - * any input bytes did have the high bit set, it doesn't matter - * because we check for those separately. - */ - zero_cum &= (chunk + vector8_broadcast(0x7F)); -#else - - /* - * Set all bits in each lane of the highbit accumulator where input - * bytes are zero. - */ - highbit_cum = vector8_or(highbit_cum, - vector8_eq(chunk, vector8_broadcast(0))); -#endif - - /* Capture all set bits in this chunk. */ - highbit_cum = vector8_or(highbit_cum, chunk); - - s += sizeof(chunk); - } - - /* Check if any high bits in the high bit accumulator got set. */ - if (vector8_is_highbit_set(highbit_cum)) - return false; - -#ifdef USE_NO_SIMD - /* Check if any high bits in the zero accumulator got cleared. */ - if (zero_cum != vector8_broadcast(0x80)) - return false; -#endif - - return true; -} - #endif /* PG_WCHAR_H */ diff --git a/contrib/libs/libpq/src/include/pg_config-linux.h b/contrib/libs/libpq/src/include/pg_config-linux.h index afcf2967c5..b4592fa70a 100644 --- a/contrib/libs/libpq/src/include/pg_config-linux.h +++ b/contrib/libs/libpq/src/include/pg_config-linux.h @@ -32,7 +32,7 @@ #define BLCKSZ 8192 /* Saved arguments from configure */ -#define CONFIGURE_ARGS " '--prefix=/var/empty/postgresql-16.1' '--with-openssl' '--with-libxml' '--with-icu' '--sysconfdir=/etc' '--libdir=$(lib)/lib' '--with-system-tzdata=/var/empty/tzdata-2022f/share/zoneinfo' '--enable-debug' '--with-systemd' '--with-ossp-uuid' '--with-lz4' '--with-gssapi' '--without-gssapi' 'CC=cc' 'CXX=g++' 'PKG_CONFIG=pkg-config' 'PKG_CONFIG_PATH=/var/empty/libxcrypt-4.4.30/lib/pkgconfig:/var/empty/zlib-1.2.13-dev/lib/pkgconfig:/var/empty/ncurses-6.3-p20220507-dev/lib/pkgconfig:/var/empty/openssl-3.0.7-dev/lib/pkgconfig:/var/empty/libxml2-2.10.3-dev/lib/pkgconfig:/var/empty/icu4c-72.1-dev/lib/pkgconfig:/var/empty/lz4-1.9.4-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/share/pkgconfig:/var/empty/libkrb5-1.20-dev/lib/pkgconfig:/var/empty/libossp-uuid-1.6.2/lib/pkgconfig'" +#define CONFIGURE_ARGS " '--prefix=/var/empty/postgresql-16.2' '--with-openssl' '--with-libxml' '--with-icu' '--sysconfdir=/etc' '--libdir=$(lib)/lib' '--with-system-tzdata=/var/empty/tzdata-2022f/share/zoneinfo' '--enable-debug' '--with-systemd' '--with-ossp-uuid' '--with-lz4' '--with-gssapi' '--without-gssapi' 'CC=cc' 'CXX=g++' 'PKG_CONFIG=pkg-config' 'PKG_CONFIG_PATH=/var/empty/libxcrypt-4.4.30/lib/pkgconfig:/var/empty/zlib-1.2.13-dev/lib/pkgconfig:/var/empty/ncurses-6.3-p20220507-dev/lib/pkgconfig:/var/empty/openssl-3.0.7-dev/lib/pkgconfig:/var/empty/libxml2-2.10.3-dev/lib/pkgconfig:/var/empty/icu4c-72.1-dev/lib/pkgconfig:/var/empty/lz4-1.9.4-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/share/pkgconfig:/var/empty/libkrb5-1.20-dev/lib/pkgconfig:/var/empty/libossp-uuid-1.6.2/lib/pkgconfig'" /* Define to the default TCP port number on which the server listens and to which clients will try to connect. This can be overridden at run-time, but @@ -71,9 +71,6 @@ /* Define to 1 if you have the `backtrace_symbols' function. */ #define HAVE_BACKTRACE_SYMBOLS 1 -/* Define to 1 if you have the `BIO_get_data' function. */ -#define HAVE_BIO_GET_DATA 1 - /* Define to 1 if you have the `BIO_meth_new' function. */ #define HAVE_BIO_METH_NEW 1 @@ -607,7 +604,7 @@ #define PACKAGE_NAME "PostgreSQL" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PostgreSQL 16.1" +#define PACKAGE_STRING "PostgreSQL 16.2" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "postgresql" @@ -616,7 +613,7 @@ #define PACKAGE_URL "https://www.postgresql.org/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "16.1" +#define PACKAGE_VERSION "16.2" /* Define to the name of a signed 128-bit integer type. */ #define PG_INT128_TYPE __int128 @@ -635,7 +632,7 @@ #define PG_MAJORVERSION_NUM 16 /* PostgreSQL minor version number */ -#define PG_MINORVERSION_NUM 1 +#define PG_MINORVERSION_NUM 2 /* Define to best printf format archetype, usually gnu_printf if available. */ #define PG_PRINTF_ATTRIBUTE gnu_printf @@ -644,13 +641,13 @@ #define PG_USE_STDBOOL 1 /* PostgreSQL version as a string */ -#define PG_VERSION "16.1" +#define PG_VERSION "16.2" /* PostgreSQL version as a number */ -#define PG_VERSION_NUM 160001 +#define PG_VERSION_NUM 160002 /* A string containing the version number, platform, and C compiler */ -#define PG_VERSION_STR "PostgreSQL 16.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 11.3.0, 64-bit" +#define PG_VERSION_STR "PostgreSQL 16.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 11.3.0, 64-bit" /* Define to 1 to allow profiling output to be saved separately for each process. */ diff --git a/contrib/libs/libpq/src/include/utils/ascii.h b/contrib/libs/libpq/src/include/utils/ascii.h new file mode 100644 index 0000000000..7df024dad3 --- /dev/null +++ b/contrib/libs/libpq/src/include/utils/ascii.h @@ -0,0 +1,84 @@ +/*----------------------------------------------------------------------- + * ascii.h + * + * Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group + * + * src/include/utils/ascii.h + * + *----------------------------------------------------------------------- + */ + +#ifndef _ASCII_H_ +#define _ASCII_H_ + +#include "port/simd.h" + +extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz); + +/* + * Verify a chunk of bytes for valid ASCII. + * + * Returns false if the input contains any zero bytes or bytes with the + * high-bit set. Input len must be a multiple of the chunk size (8 or 16). + */ +static inline bool +is_valid_ascii(const unsigned char *s, int len) +{ + const unsigned char *const s_end = s + len; + Vector8 chunk; + Vector8 highbit_cum = vector8_broadcast(0); +#ifdef USE_NO_SIMD + Vector8 zero_cum = vector8_broadcast(0x80); +#endif + + Assert(len % sizeof(chunk) == 0); + + while (s < s_end) + { + vector8_load(&chunk, s); + + /* Capture any zero bytes in this chunk. */ +#ifdef USE_NO_SIMD + + /* + * First, add 0x7f to each byte. This sets the high bit in each byte, + * unless it was a zero. If any resulting high bits are zero, the + * corresponding high bits in the zero accumulator will be cleared. + * + * If none of the bytes in the chunk had the high bit set, the max + * value each byte can have after the addition is 0x7f + 0x7f = 0xfe, + * and we don't need to worry about carrying over to the next byte. If + * any input bytes did have the high bit set, it doesn't matter + * because we check for those separately. + */ + zero_cum &= (chunk + vector8_broadcast(0x7F)); +#else + + /* + * Set all bits in each lane of the highbit accumulator where input + * bytes are zero. + */ + highbit_cum = vector8_or(highbit_cum, + vector8_eq(chunk, vector8_broadcast(0))); +#endif + + /* Capture all set bits in this chunk. */ + highbit_cum = vector8_or(highbit_cum, chunk); + + s += sizeof(chunk); + } + + /* Check if any high bits in the high bit accumulator got set. */ + if (vector8_is_highbit_set(highbit_cum)) + return false; + +#ifdef USE_NO_SIMD + /* Check if any high bits in the zero accumulator got cleared. */ + if (zero_cum != vector8_broadcast(0x80)) + return false; +#endif + + return true; +} + +#endif /* _ASCII_H_ */ diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c b/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c index 26564955d0..fb11997eff 100644 --- a/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c +++ b/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c @@ -841,6 +841,8 @@ pqSaveWriteError(PGconn *conn) * using whatever is in conn->errorMessage. In any case, clear the async * result storage, and update our notion of how much error text has been * returned to the application. + * + * Note that in no case (not even OOM) do we return NULL. */ PGresult * pqPrepareAsyncResult(PGconn *conn) @@ -2112,19 +2114,12 @@ PQgetResult(PGconn *conn) break; case PGASYNC_READY: - - /* - * For any query type other than simple query protocol, we advance - * the command queue here. This is because for simple query - * protocol we can get the READY state multiple times before the - * command is actually complete, since the command string can - * contain many queries. In simple query protocol, the queue - * advance is done by fe-protocol3 when it receives ReadyForQuery. - */ - if (conn->cmd_queue_head && - conn->cmd_queue_head->queryclass != PGQUERY_SIMPLE) - pqCommandQueueAdvance(conn); res = pqPrepareAsyncResult(conn); + + /* Advance the queue as appropriate */ + pqCommandQueueAdvance(conn, false, + res->resultStatus == PGRES_PIPELINE_SYNC); + if (conn->pipelineStatus != PQ_PIPELINE_OFF) { /* @@ -2144,7 +2139,7 @@ PQgetResult(PGconn *conn) * (In other words: we don't return a NULL after a pipeline * sync.) */ - if (res && res->resultStatus == PGRES_PIPELINE_SYNC) + if (res->resultStatus == PGRES_PIPELINE_SYNC) pqPipelineProcessQueue(conn); } else @@ -3012,18 +3007,44 @@ PQexitPipelineMode(PGconn *conn) /* * pqCommandQueueAdvance - * Remove one query from the command queue, when we receive - * all results from the server that pertain to it. + * Remove one query from the command queue, if appropriate. + * + * If we have received all results corresponding to the head element + * in the command queue, remove it. + * + * In simple query protocol we must not advance the command queue until the + * ReadyForQuery message has been received. This is because in simple mode a + * command can have multiple queries, and we must process result for all of + * them before moving on to the next command. + * + * Another consideration is synchronization during error processing in + * extended query protocol: we refuse to advance the queue past a SYNC queue + * element, unless the result we've received is also a SYNC. In particular + * this protects us from advancing when an error is received at an + * inappropriate moment. */ void -pqCommandQueueAdvance(PGconn *conn) +pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, bool gotSync) { PGcmdQueueEntry *prevquery; if (conn->cmd_queue_head == NULL) return; - /* delink from queue */ + /* + * If processing a query of simple query protocol, we only advance the + * queue when we receive the ReadyForQuery message for it. + */ + if (conn->cmd_queue_head->queryclass == PGQUERY_SIMPLE && !isReadyForQuery) + return; + + /* + * If we're waiting for a SYNC, don't advance the queue until we get one. + */ + if (conn->cmd_queue_head->queryclass == PGQUERY_SYNC && !gotSync) + return; + + /* delink element from queue */ prevquery = conn->cmd_queue_head; conn->cmd_queue_head = conn->cmd_queue_head->next; @@ -3031,7 +3052,7 @@ pqCommandQueueAdvance(PGconn *conn) if (conn->cmd_queue_head == NULL) conn->cmd_queue_tail = NULL; - /* and make it recyclable */ + /* and make the queue element recyclable */ prevquery->next = NULL; pqRecycleCmdQueueEntry(conn, prevquery); } @@ -3240,6 +3261,14 @@ PQsendFlushRequest(PGconn *conn) return 0; } + /* + * Give the data a push (in pipeline mode, only if we're past the size + * threshold). In nonblock mode, don't complain if we're unable to send + * it all; PQgetResult() will do any additional flushing needed. + */ + if (pqPipelineFlush(conn) < 0) + return 0; + return 1; } diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c b/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c index 32b66d561c..9c4aa7e2c7 100644 --- a/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c +++ b/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c @@ -236,13 +236,8 @@ pqParseInput3(PGconn *conn) } else { - /* - * In simple query protocol, advance the command queue - * (see PQgetResult). - */ - if (conn->cmd_queue_head && - conn->cmd_queue_head->queryclass == PGQUERY_SIMPLE) - pqCommandQueueAdvance(conn); + /* Advance the command queue and set us idle */ + pqCommandQueueAdvance(conn, true, false); conn->asyncStatus = PGASYNC_IDLE; } break; diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c b/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c index 5c220e0e69..8b845994a1 100644 --- a/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c +++ b/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c @@ -207,7 +207,7 @@ rloop: */ goto rloop; case SSL_ERROR_SYSCALL: - if (n < 0) + if (n < 0 && SOCK_ERRNO != 0) { result_errno = SOCK_ERRNO; if (result_errno == EPIPE || @@ -308,7 +308,13 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len) n = 0; break; case SSL_ERROR_SYSCALL: - if (n < 0) + + /* + * If errno is still zero then assume it's a read EOF situation, + * and report EOF. (This seems possible because SSL_write can + * also do reads.) + */ + if (n < 0 && SOCK_ERRNO != 0) { result_errno = SOCK_ERRNO; if (result_errno == EPIPE || result_errno == ECONNRESET) @@ -1523,11 +1529,12 @@ open_client_SSL(PGconn *conn) * was using the system CA pool. For other errors, log * them using the normal SYSCALL logging. */ - if (!save_errno && vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY && + if (save_errno == 0 && + vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY && strcmp(conn->sslrootcert, "system") == 0) libpq_append_conn_error(conn, "SSL error: certificate verify failed: %s", X509_verify_cert_error_string(vcode)); - else if (r == -1) + else if (r == -1 && save_errno != 0) libpq_append_conn_error(conn, "SSL SYSCALL error: %s", SOCK_STRERROR(save_errno, sebuf, sizeof(sebuf))); else @@ -1834,11 +1841,7 @@ PQsslAttribute(PGconn *conn, const char *attribute_name) * to retry; do we need to adopt their logic for that? */ -#ifndef HAVE_BIO_GET_DATA -#define BIO_get_data(bio) (bio->ptr) -#define BIO_set_data(bio, data) (bio->ptr = data) -#endif - +/* protected by ssl_config_mutex */ static BIO_METHOD *my_bio_methods; static int @@ -1846,7 +1849,7 @@ my_sock_read(BIO *h, char *buf, int size) { int res; - res = pqsecure_raw_read((PGconn *) BIO_get_data(h), buf, size); + res = pqsecure_raw_read((PGconn *) BIO_get_app_data(h), buf, size); BIO_clear_retry_flags(h); if (res < 0) { @@ -1876,7 +1879,7 @@ my_sock_write(BIO *h, const char *buf, int size) { int res; - res = pqsecure_raw_write((PGconn *) BIO_get_data(h), buf, size); + res = pqsecure_raw_write((PGconn *) BIO_get_app_data(h), buf, size); BIO_clear_retry_flags(h); if (res < 0) { @@ -1904,6 +1907,15 @@ my_sock_write(BIO *h, const char *buf, int size) static BIO_METHOD * my_BIO_s_socket(void) { + BIO_METHOD *res; + +#ifdef ENABLE_THREAD_SAFETY + if (pthread_mutex_lock(&ssl_config_mutex)) + return NULL; +#endif + + res = my_bio_methods; + if (!my_bio_methods) { BIO_METHOD *biom = (BIO_METHOD *) BIO_s_socket(); @@ -1912,39 +1924,58 @@ my_BIO_s_socket(void) my_bio_index = BIO_get_new_index(); if (my_bio_index == -1) - return NULL; + goto err; my_bio_index |= (BIO_TYPE_DESCRIPTOR | BIO_TYPE_SOURCE_SINK); - my_bio_methods = BIO_meth_new(my_bio_index, "libpq socket"); - if (!my_bio_methods) - return NULL; + res = BIO_meth_new(my_bio_index, "libpq socket"); + if (!res) + goto err; /* * As of this writing, these functions never fail. But check anyway, * like OpenSSL's own examples do. */ - if (!BIO_meth_set_write(my_bio_methods, my_sock_write) || - !BIO_meth_set_read(my_bio_methods, my_sock_read) || - !BIO_meth_set_gets(my_bio_methods, BIO_meth_get_gets(biom)) || - !BIO_meth_set_puts(my_bio_methods, BIO_meth_get_puts(biom)) || - !BIO_meth_set_ctrl(my_bio_methods, BIO_meth_get_ctrl(biom)) || - !BIO_meth_set_create(my_bio_methods, BIO_meth_get_create(biom)) || - !BIO_meth_set_destroy(my_bio_methods, BIO_meth_get_destroy(biom)) || - !BIO_meth_set_callback_ctrl(my_bio_methods, BIO_meth_get_callback_ctrl(biom))) + if (!BIO_meth_set_write(res, my_sock_write) || + !BIO_meth_set_read(res, my_sock_read) || + !BIO_meth_set_gets(res, BIO_meth_get_gets(biom)) || + !BIO_meth_set_puts(res, BIO_meth_get_puts(biom)) || + !BIO_meth_set_ctrl(res, BIO_meth_get_ctrl(biom)) || + !BIO_meth_set_create(res, BIO_meth_get_create(biom)) || + !BIO_meth_set_destroy(res, BIO_meth_get_destroy(biom)) || + !BIO_meth_set_callback_ctrl(res, BIO_meth_get_callback_ctrl(biom))) { - BIO_meth_free(my_bio_methods); - my_bio_methods = NULL; - return NULL; + goto err; } #else - my_bio_methods = malloc(sizeof(BIO_METHOD)); - if (!my_bio_methods) - return NULL; - memcpy(my_bio_methods, biom, sizeof(BIO_METHOD)); - my_bio_methods->bread = my_sock_read; - my_bio_methods->bwrite = my_sock_write; + res = malloc(sizeof(BIO_METHOD)); + if (!res) + goto err; + memcpy(res, biom, sizeof(BIO_METHOD)); + res->bread = my_sock_read; + res->bwrite = my_sock_write; #endif } - return my_bio_methods; + + my_bio_methods = res; + +#ifdef ENABLE_THREAD_SAFETY + pthread_mutex_unlock(&ssl_config_mutex); +#endif + + return res; + +err: +#ifdef HAVE_BIO_METH_NEW + if (res) + BIO_meth_free(res); +#else + if (res) + free(res); +#endif + +#ifdef ENABLE_THREAD_SAFETY + pthread_mutex_unlock(&ssl_config_mutex); +#endif + return NULL; } /* This should exactly match OpenSSL's SSL_set_fd except for using my BIO */ @@ -1967,7 +1998,7 @@ my_SSL_set_fd(PGconn *conn, int fd) SSLerr(SSL_F_SSL_SET_FD, ERR_R_BUF_LIB); goto err; } - BIO_set_data(bio, conn); + BIO_set_app_data(bio, conn); SSL_set_bio(conn->ssl, bio, bio); BIO_set_fd(bio, fd, BIO_NOCLOSE); diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c b/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c index 8069e38142..8444f19f08 100644 --- a/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c +++ b/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c @@ -233,6 +233,8 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) int result_errno = 0; char sebuf[PG_STRERROR_R_BUFLEN]; + SOCK_ERRNO_SET(0); + n = recv(conn->sock, ptr, len, 0); if (n < 0) @@ -259,6 +261,11 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) "\tbefore or while processing the request."); break; + case 0: + /* If errno didn't get set, treat it as regular EOF */ + n = 0; + break; + default: libpq_append_conn_error(conn, "could not receive data from server: %s", SOCK_STRERROR(result_errno, diff --git a/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h b/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h index 0045f83cbf..a951f4992b 100644 --- a/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h +++ b/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h @@ -585,8 +585,8 @@ struct pg_conn int gss_SendLength; /* End of data available in gss_SendBuffer */ int gss_SendNext; /* Next index to send a byte from * gss_SendBuffer */ - int gss_SendConsumed; /* Number of *unencrypted* bytes consumed - * for current contents of gss_SendBuffer */ + int gss_SendConsumed; /* Number of source bytes encrypted but + * not yet reported as sent */ char *gss_RecvBuffer; /* Received, encrypted data */ int gss_RecvLength; /* End of data available in gss_RecvBuffer */ char *gss_ResultBuffer; /* Decryption of data in gss_RecvBuffer */ @@ -705,7 +705,8 @@ extern void pqSaveMessageField(PGresult *res, char code, extern void pqSaveParameterStatus(PGconn *conn, const char *name, const char *value); extern int pqRowProcessor(PGconn *conn, const char **errmsgp); -extern void pqCommandQueueAdvance(PGconn *conn); +extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, + bool gotSync); extern int PQsendQueryContinue(PGconn *conn, const char *query); /* === in fe-protocol3.c === */ diff --git a/contrib/libs/libpq/src/port/pg_config_paths.h b/contrib/libs/libpq/src/port/pg_config_paths.h index 2e2db6a40c..ba4db149b6 100644 --- a/contrib/libs/libpq/src/port/pg_config_paths.h +++ b/contrib/libs/libpq/src/port/pg_config_paths.h @@ -1,12 +1,12 @@ -#define PGBINDIR "/var/empty/postgresql-16.1/bin" -#define PGSHAREDIR "/var/empty/postgresql-16.1/share" +#define PGBINDIR "/var/empty/postgresql-16.2/bin" +#define PGSHAREDIR "/var/empty/postgresql-16.2/share" #define SYSCONFDIR "/etc/postgresql" -#define INCLUDEDIR "/var/empty/postgresql-16.1/include" -#define PKGINCLUDEDIR "/var/empty/postgresql-16.1/include" -#define INCLUDEDIRSERVER "/var/empty/postgresql-16.1/include/server" +#define INCLUDEDIR "/var/empty/postgresql-16.2/include" +#define PKGINCLUDEDIR "/var/empty/postgresql-16.2/include" +#define INCLUDEDIRSERVER "/var/empty/postgresql-16.2/include/server" #define LIBDIR "/var/empty/tmp/out/lib" #define PKGLIBDIR "/var/empty/tmp/out/lib/postgresql" -#define LOCALEDIR "/var/empty/postgresql-16.1/share/locale" -#define DOCDIR "/var/empty/postgresql-16.1/share/doc/" -#define HTMLDIR "/var/empty/postgresql-16.1/share/doc/" -#define MANDIR "/var/empty/postgresql-16.1/share/man" +#define LOCALEDIR "/var/empty/postgresql-16.2/share/locale" +#define DOCDIR "/var/empty/postgresql-16.2/share/doc/" +#define HTMLDIR "/var/empty/postgresql-16.2/share/doc/" +#define MANDIR "/var/empty/postgresql-16.2/share/man" diff --git a/contrib/libs/libpq/ya.make b/contrib/libs/libpq/ya.make index d7acd293f3..66923a8a11 100644 --- a/contrib/libs/libpq/ya.make +++ b/contrib/libs/libpq/ya.make @@ -13,9 +13,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(16.1) +VERSION(16.2) -ORIGINAL_SOURCE(https://github.com/postgres/postgres/archive/REL_16_1.tar.gz) +ORIGINAL_SOURCE(https://github.com/postgres/postgres/archive/REL_16_2.tar.gz) PEERDIR( contrib/libs/libc_compat diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index 01f8ae0a0a..21f727a546 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.98.0 +Version: 6.98.2 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index bc850254b9..1f56d2ccf3 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -30,6 +30,7 @@ from typing import ( Set, Tuple, Type, + TypedDict, TypeVar, Union, ) @@ -163,6 +164,8 @@ NASTY_FLOATS.extend([-x for x in NASTY_FLOATS]) FLOAT_INIT_LOGIC_CACHE = LRUReusedCache(4096) +DRAW_STRING_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" + class Example: """Examples track the hierarchical structure of draws from the byte stream, @@ -794,6 +797,34 @@ global_test_counter = 0 MAX_DEPTH = 100 +class IntegerKWargs(TypedDict): + min_value: Optional[int] + max_value: Optional[int] + weights: Optional[Sequence[float]] + shrink_towards: int + + +class FloatKWargs(TypedDict): + min_value: float + max_value: float + allow_nan: bool + smallest_nonzero_magnitude: float + + +class StringKWargs(TypedDict): + intervals: IntervalSet + min_size: int + max_size: Optional[int] + + +class BytesKWargs(TypedDict): + size: int + + +class BooleanKWargs(TypedDict): + p: float + + class DataObserver: """Observer class for recording the behaviour of a ConjectureData object, primarily used for tracking @@ -810,18 +841,34 @@ class DataObserver: Note that this is called after ``freeze`` has completed. """ - def draw_bits(self, n_bits: int, *, forced: bool, value: int) -> None: - """Called when ``draw_bits`` is called on on the - observed ``ConjectureData``. - * ``n_bits`` is the number of bits drawn. - * ``forced`` is True if the corresponding - draw was forced or ``False`` otherwise. - * ``value`` is the result that ``draw_bits`` returned. - """ - def kill_branch(self) -> None: """Mark this part of the tree as not worth re-exploring.""" + def draw_integer( + self, value: int, *, was_forced: bool, kwargs: IntegerKWargs + ) -> None: + pass + + def draw_float( + self, value: float, *, was_forced: bool, kwargs: FloatKWargs + ) -> None: + pass + + def draw_string( + self, value: str, *, was_forced: bool, kwargs: StringKWargs + ) -> None: + pass + + def draw_bytes( + self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs + ) -> None: + pass + + def draw_boolean( + self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs + ) -> None: + pass + @dataclass_transform() @attr.s(slots=True) @@ -995,7 +1042,7 @@ class PrimitiveProvider: assert min_value is not None assert max_value is not None - sampler = Sampler(weights) + sampler = Sampler(weights, observe=False) gap = max_value - shrink_towards forced_idx = None @@ -1023,7 +1070,7 @@ class PrimitiveProvider: probe = shrink_towards + self._draw_unbounded_integer( forced=None if forced is None else forced - shrink_towards ) - self._cd.stop_example(discard=max_value < probe) + self._cd.stop_example() return probe if max_value is None: @@ -1034,7 +1081,7 @@ class PrimitiveProvider: probe = shrink_towards + self._draw_unbounded_integer( forced=None if forced is None else forced - shrink_towards ) - self._cd.stop_example(discard=probe < min_value) + self._cd.stop_example() return probe return self._draw_bounded_integer( @@ -1091,7 +1138,7 @@ class PrimitiveProvider: assert pos_clamper is not None clamped = pos_clamper(result) if clamped != result and not (math.isnan(result) and allow_nan): - self._cd.stop_example(discard=True) + self._cd.stop_example() self._cd.start_example(DRAW_FLOAT_LABEL) self._draw_float(forced=clamped) result = clamped @@ -1113,7 +1160,7 @@ class PrimitiveProvider: forced: Optional[str] = None, ) -> str: if max_size is None: - max_size = 10**10 # "arbitrarily large" + max_size = DRAW_STRING_DEFAULT_MAX_SIZE assert forced is None or min_size <= len(forced) <= max_size @@ -1129,6 +1176,7 @@ class PrimitiveProvider: max_size=max_size, average_size=average_size, forced=None if forced is None else len(forced), + observe=False, ) while elements.more(): forced_i: Optional[int] = None @@ -1264,7 +1312,7 @@ class PrimitiveProvider: probe = self._cd.draw_bits( bits, forced=None if forced is None else abs(forced - center) ) - self._cd.stop_example(discard=probe > gap) + self._cd.stop_example() if above: result = center + probe @@ -1356,7 +1404,7 @@ class PrimitiveProvider: ] nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)] weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats) - sampler = Sampler(weights) if nasty_floats else None + sampler = Sampler(weights, observe=False) if nasty_floats else None pos_clamper = neg_clamper = None if sign_aware_lte(0.0, max_value): @@ -1465,6 +1513,19 @@ class ConjectureData: ", frozen" if self.frozen else "", ) + # A bit of explanation of the `observe` argument in our draw_* functions. + # + # There are two types of draws: sub-ir and super-ir. For instance, some ir + # nodes use `many`, which in turn calls draw_boolean. But some strategies + # also use many, at the super-ir level. We don't want to write sub-ir draws + # to the DataTree (and consequently use them when computing novel prefixes), + # since they are fully recorded by writing the ir node itself. + # But super-ir draws are not included in the ir node, so we do want to write + # these to the tree. + # + # `observe` formalizes this distinction. The draw will only be written to + # the DataTree if observe is True. + def draw_integer( self, min_value: Optional[int] = None, @@ -1474,6 +1535,7 @@ class ConjectureData: weights: Optional[Sequence[float]] = None, shrink_towards: int = 0, forced: Optional[int] = None, + observe: bool = True, ) -> int: # Validate arguments if weights is not None: @@ -1494,13 +1556,18 @@ class ConjectureData: if forced is not None and max_value is not None: assert forced <= max_value - return self.provider.draw_integer( - min_value=min_value, - max_value=max_value, - weights=weights, - shrink_towards=shrink_towards, - forced=forced, - ) + kwargs: IntegerKWargs = { + "min_value": min_value, + "max_value": max_value, + "weights": weights, + "shrink_towards": shrink_towards, + } + value = self.provider.draw_integer(**kwargs, forced=forced) + if observe: + self.observer.draw_integer( + value, was_forced=forced is not None, kwargs=kwargs + ) + return value def draw_float( self, @@ -1514,6 +1581,7 @@ class ConjectureData: # width: Literal[16, 32, 64] = 64, # exclude_min and exclude_max handled higher up, forced: Optional[float] = None, + observe: bool = True, ) -> float: assert smallest_nonzero_magnitude > 0 assert not math.isnan(min_value) @@ -1523,13 +1591,18 @@ class ConjectureData: assert allow_nan or not math.isnan(forced) assert math.isnan(forced) or min_value <= forced <= max_value - return self.provider.draw_float( - min_value=min_value, - max_value=max_value, - allow_nan=allow_nan, - smallest_nonzero_magnitude=smallest_nonzero_magnitude, - forced=forced, - ) + kwargs: FloatKWargs = { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + } + value = self.provider.draw_float(**kwargs, forced=forced) + if observe: + self.observer.draw_float( + value, kwargs=kwargs, was_forced=forced is not None + ) + return value def draw_string( self, @@ -1538,19 +1611,44 @@ class ConjectureData: min_size: int = 0, max_size: Optional[int] = None, forced: Optional[str] = None, + observe: bool = True, ) -> str: assert forced is None or min_size <= len(forced) - return self.provider.draw_string( - intervals, min_size=min_size, max_size=max_size, forced=forced - ) - def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes: + kwargs: StringKWargs = { + "intervals": intervals, + "min_size": min_size, + "max_size": max_size, + } + value = self.provider.draw_string(**kwargs, forced=forced) + if observe: + self.observer.draw_string( + value, kwargs=kwargs, was_forced=forced is not None + ) + return value + + def draw_bytes( + self, + # TODO move to min_size and max_size here. + size: int, + *, + forced: Optional[bytes] = None, + observe: bool = True, + ) -> bytes: assert forced is None or len(forced) == size assert size >= 0 - return self.provider.draw_bytes(size, forced=forced) + kwargs: BytesKWargs = {"size": size} + value = self.provider.draw_bytes(**kwargs, forced=forced) + if observe: + self.observer.draw_bytes( + value, kwargs=kwargs, was_forced=forced is not None + ) + return value - def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool: + def draw_boolean( + self, p: float = 0.5, *, forced: Optional[bool] = None, observe: bool = True + ) -> bool: # Internally, we treat probabilities lower than 1 / 2**64 as # unconditionally false. # @@ -1561,7 +1659,13 @@ class ConjectureData: if forced is False: assert p < (1 - 2 ** (-64)) - return self.provider.draw_boolean(p, forced=forced) + kwargs: BooleanKWargs = {"p": p} + value = self.provider.draw_boolean(**kwargs, forced=forced) + if observe: + self.observer.draw_boolean( + value, kwargs=kwargs, was_forced=forced is not None + ) + return value def as_result(self) -> Union[ConjectureResult, _Overrun]: """Convert the result of running this test into @@ -1735,9 +1839,15 @@ class ConjectureData: self.buffer = bytes(self.buffer) self.observer.conclude_test(self.status, self.interesting_origin) - def choice(self, values: Sequence[T], *, forced: Optional[T] = None) -> T: + def choice( + self, + values: Sequence[T], + *, + forced: Optional[T] = None, + observe: bool = True, + ) -> T: forced_i = None if forced is None else values.index(forced) - i = self.draw_integer(0, len(values) - 1, forced=forced_i) + i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe) return values[i] def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int: @@ -1774,7 +1884,6 @@ class ConjectureData: buf = bytes(buf) result = int_from_bytes(buf) - self.observer.draw_bits(n, forced=forced is not None, value=result) self.__example_record.draw_bits(n, forced) initial = self.index diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py index d82ed3ca67..a9a6e5b196 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py @@ -8,17 +8,38 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import itertools +import math +from typing import TYPE_CHECKING, List, Literal, Optional, Union + import attr from hypothesis.errors import Flaky, HypothesisException, StopTest +from hypothesis.internal import floats as flt from hypothesis.internal.compat import int_to_bytes from hypothesis.internal.conjecture.data import ( + BooleanKWargs, + BytesKWargs, ConjectureData, DataObserver, + FloatKWargs, + IntegerKWargs, Status, - bits_to_bytes, + StringKWargs, ) -from hypothesis.internal.conjecture.junkdrawer import IntList +from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float + +if TYPE_CHECKING: + from typing import TypeAlias +else: + TypeAlias = object + +IRType: TypeAlias = Union[int, str, bool, float, bytes] +IRKWargsType: TypeAlias = Union[ + IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs +] +# this would be "IRTypeType", but that's just confusing. +IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"] class PreviouslyUnseenBehaviour(HypothesisException): @@ -51,12 +72,15 @@ class Branch: """Represents a transition where multiple choices can be made as to what to drawn.""" - bit_length = attr.ib() + kwargs = attr.ib() + ir_type = attr.ib() children = attr.ib(repr=False) @property def max_children(self): - return 1 << self.bit_length + max_children = compute_max_children(self.ir_type, self.kwargs) + assert max_children > 0 + return max_children @attr.s(slots=True, frozen=True) @@ -67,61 +91,276 @@ class Conclusion: interesting_origin = attr.ib() +# The number of max children where, beyond this, it is practically impossible +# for hypothesis to saturate / explore all children nodes in a reasonable time +# frame. We use this to bail out of expensive max children computations early, +# where the numbers involved are so large that we know they will be larger than +# this number. +# +# Note that it's ok for us to underestimate the number of max children of a node +# by using this. We just may think the node is exhausted when in fact it has more +# possible children to be explored. This has the potential to finish generation +# early due to exhausting the entire tree, but that is quite unlikely: (1) the +# number of examples would have to be quite high, and (2) the tree would have to +# contain only one or two nodes, or generate_novel_prefix would simply switch to +# exploring another non-exhausted node. +# +# Also note that we may sometimes compute max children above this value. In other +# words, this is *not* a hard maximum on the computed max children. It's the point +# where further computation is not beneficial - but sometimes doing that computation +# unconditionally is cheaper than estimating against this value. +# +# The one case where this may be detrimental is fuzzing, where the throughput of +# examples is so high that it really may saturate important nodes. We'll cross +# that bridge when we come to it. +MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000 + + +def compute_max_children(ir_type, kwargs): + from hypothesis.internal.conjecture.data import DRAW_STRING_DEFAULT_MAX_SIZE + + if ir_type == "integer": + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + weights = kwargs["weights"] + + if min_value is None and max_value is None: + # full 128 bit range. + return 2**128 - 1 + if min_value is not None and max_value is not None: + # count between min/max value. + n = max_value - min_value + 1 + # remove any values with a zero probability of being drawn (weight=0). + if weights is not None: + n -= sum(weight == 0 for weight in weights) + return n + + # hard case: only one bound was specified. Here we probe either upwards + # or downwards with our full 128 bit generation, but only half of these + # (plus one for the case of generating zero) result in a probe in the + # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127 + assert (min_value is None) ^ (max_value is None) + return 2**127 + elif ir_type == "boolean": + p = kwargs["p"] + # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice. + if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)): + return 1 + return 2 + elif ir_type == "bytes": + return 2 ** (8 * kwargs["size"]) + elif ir_type == "string": + min_size = kwargs["min_size"] + max_size = kwargs["max_size"] + intervals = kwargs["intervals"] + + if max_size is None: + max_size = DRAW_STRING_DEFAULT_MAX_SIZE + + if len(intervals) == 0: + # Special-case the empty alphabet to avoid an error in math.log(0). + # Only possibility is the empty string. + return 1 + + # We want to estimate if we're going to have more children than + # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially + # extremely expensive pow. We'll check if the number of strings in + # the largest string size alone is enough to put us over this limit. + # We'll also employ a trick of estimating against log, which is cheaper + # than computing a pow. + # + # x = max_size + # y = len(intervals) + # n = MAX_CHILDREN_EFFECTIVELY_INFINITE + # + # x**y > n + # <=> log(x**y) > log(n) + # <=> y * log(x) > log(n) + + # avoid math.log(1) == 0 and incorrectly failing the below estimate, + # even when we definitely are too large. + if len(intervals) == 1: + definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE + else: + definitely_too_large = max_size * math.log(len(intervals)) > math.log( + MAX_CHILDREN_EFFECTIVELY_INFINITE + ) + + if definitely_too_large: + return MAX_CHILDREN_EFFECTIVELY_INFINITE + + # number of strings of length k, for each k in [min_size, max_size]. + return sum(len(intervals) ** k for k in range(min_size, max_size + 1)) + + elif ir_type == "float": + return count_between_floats(kwargs["min_value"], kwargs["max_value"]) + + raise NotImplementedError(f"unhandled ir_type {ir_type}") + + +# In theory, this is a strict superset of the functionality of compute_max_children; +# +# assert len(all_children(ir_type, kwargs)) == compute_max_children(ir_type, kwargs) +# +# In practice, we maintain two distinct implementations for efficiency and space +# reasons. If you just need the number of children, it is cheaper to use +# compute_max_children than to reify the list of children (only to immediately +# throw it away). +def all_children(ir_type, kwargs): + if ir_type == "integer": + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + weights = kwargs["weights"] + # it's a bit annoying (but completely feasible) to implement the cases + # other than "both sides bounded" here. We haven't needed to yet because + # in practice we don't struggle with unbounded integer generation. + assert min_value is not None + assert max_value is not None + + if weights is None: + yield from range(min_value, max_value + 1) + else: + # skip any values with a corresponding weight of 0 (can never be drawn). + for weight, n in zip(weights, range(min_value, max_value + 1)): + if weight == 0: + continue + yield n + + if ir_type == "boolean": + p = kwargs["p"] + if p <= 2 ** (-64): + yield False + elif p >= (1 - 2 ** (-64)): + yield True + else: + yield from [False, True] + if ir_type == "bytes": + size = kwargs["size"] + yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size))) + if ir_type == "string": + min_size = kwargs["min_size"] + max_size = kwargs["max_size"] + intervals = kwargs["intervals"] + + # written unidiomatically in order to handle the case of max_size=inf. + size = min_size + while size <= max_size: + for ords in itertools.product(intervals, repeat=size): + yield "".join(chr(n) for n in ords) + size += 1 + if ir_type == "float": + + def floats_between(a, b): + for n in range(float_to_int(a), float_to_int(b) + 1): + yield int_to_float(n) + + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + + if flt.is_negative(min_value): + if flt.is_negative(max_value): + # if both are negative, have to invert order + yield from floats_between(max_value, min_value) + else: + yield from floats_between(-0.0, min_value) + yield from floats_between(0.0, max_value) + else: + yield from floats_between(min_value, max_value) + + @attr.s(slots=True) class TreeNode: - """Node in a tree that corresponds to previous interactions with - a ``ConjectureData`` object according to some fixed test function. - - This is functionally a variant patricia trie. - See https://en.wikipedia.org/wiki/Radix_tree for the general idea, - but what this means in particular here is that we have a very deep - but very lightly branching tree and rather than store this as a fully - recursive structure we flatten prefixes and long branches into - lists. This significantly compacts the storage requirements. - - A single ``TreeNode`` corresponds to a previously seen sequence - of calls to ``ConjectureData`` which we have never seen branch, - followed by a ``transition`` which describes what happens next. """ + A node, or collection of directly descended nodes, in a DataTree. + + We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree), + which means that nodes that are the only child of their parent are collapsed + into their parent to save space. + + Conceptually, you can unfold a single TreeNode storing n values in its lists + into a sequence of n nodes, each a child of the last. In other words, + (kwargs[i], values[i], ir_types[i]) corresponds to the single node at index + i. + + Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted + via the radix tree definition), then its lists will be empty and it will + store a `Branch` representing that choce in its `transition`. + + Examples + -------- + + Consider sequentially drawing a boolean, then an integer. + + data.draw_boolean() + data.draw_integer(1, 3) + + If we draw True and then 2, the tree may conceptually look like this. + + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + │ True │ + └──┬───┘ + ┌──┴───┐ + │ 2 │ + └──────┘ + + But since 2 is the only child of True, we will compact these nodes and store + them as a single TreeNode. + + ┌──────┐ + │ root │ + └──┬───┘ + ┌────┴──────┐ + │ [True, 2] │ + └───────────┘ + + If we then draw True and then 3, True will have multiple children and we + can no longer store this compacted representation. We would call split_at(0) + on the [True, 2] node to indicate that we need to add a choice at 0-index + node (True). - # Records the previous sequence of calls to ``data.draw_bits``, - # with the ``n_bits`` argument going in ``bit_lengths`` and the - # values seen in ``values``. These should always have the same - # length. - bit_lengths = attr.ib(factory=IntList) - values = attr.ib(factory=IntList) - - # The indices of of the calls to ``draw_bits`` that we have stored - # where ``forced`` is not None. Stored as None if no indices - # have been forced, purely for space saving reasons (we force - # quite rarely). - __forced = attr.ib(default=None, init=False) - - # What happens next after observing this sequence of calls. - # Either: + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + ┌─┤ True ├─┐ + │ └──────┘ │ + ┌─┴─┐ ┌─┴─┐ + │ 2 │ │ 3 │ + └───┘ └───┘ + """ + + # The kwargs, value, and ir_types of the nodes stored here. These always + # have the same length. The values at index i belong to node i. + kwargs: List[IRKWargsType] = attr.ib(factory=list) + values: List[IRType] = attr.ib(factory=list) + ir_types: List[IRLiteralType] = attr.ib(factory=list) + + # The indices of nodes which had forced values. # - # * ``None``, indicating we don't know yet. - # * A ``Branch`` object indicating that there is a ``draw_bits`` - # call that we have seen take multiple outcomes there. - # * A ``Conclusion`` object indicating that ``conclude_test`` - # was called here. - transition = attr.ib(default=None) - - # A tree node is exhausted if every possible sequence of - # draws below it has been explored. We store this information - # on a field and update it when performing operations that - # could change the answer. + # Stored as None if no indices have been forced, purely for space saving + # reasons (we force quite rarely). + __forced: Optional[set] = attr.ib(default=None, init=False) + + # What happens next after drawing these nodes. (conceptually, "what is the + # child/children of the last node stored here"). # - # A node may start exhausted, e.g. because it it leads - # immediately to a conclusion, but can only go from - # non-exhausted to exhausted when one of its children - # becomes exhausted or it is marked as a conclusion. + # One of: + # - None (we don't know yet) + # - Branch (we have seen multiple possible outcomes here) + # - Conclusion (ConjectureData.conclude_test was called here) + # - Killed (this branch is valid and may even have children, but should not + # be explored when generating novel prefixes) + transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None) + + # A tree node is exhausted if every possible sequence of draws below it has + # been explored. We only update this when performing operations that could + # change the answer. # - # Therefore we only need to check whether we need to update - # this field when the node is first created in ``split_at`` - # or when we have walked a path through this node to a - # conclusion in ``TreeRecordingObserver``. - is_exhausted = attr.ib(default=False, init=False) + # See also TreeNode.check_exhausted. + is_exhausted: bool = attr.ib(default=False, init=False) @property def forced(self): @@ -130,17 +369,21 @@ class TreeNode: return self.__forced def mark_forced(self, i): - """Note that the value at index ``i`` was forced.""" + """ + Note that the draw at node i was forced. + """ assert 0 <= i < len(self.values) if self.__forced is None: self.__forced = set() self.__forced.add(i) def split_at(self, i): - """Splits the tree so that it can incorporate - a decision at the ``draw_bits`` call corresponding - to position ``i``, or raises ``Flaky`` if that was - meant to be a forced node.""" + """ + Splits the tree so that it can incorporate a decision at the draw call + corresponding to the node at position i. + + Raises Flaky if node i was forced. + """ if i in self.forced: inconsistent_generation() @@ -150,26 +393,58 @@ class TreeNode: key = self.values[i] child = TreeNode( - bit_lengths=self.bit_lengths[i + 1 :], + ir_types=self.ir_types[i + 1 :], + kwargs=self.kwargs[i + 1 :], values=self.values[i + 1 :], transition=self.transition, ) - self.transition = Branch(bit_length=self.bit_lengths[i], children={key: child}) + self.transition = Branch( + kwargs=self.kwargs[i], ir_type=self.ir_types[i], children={key: child} + ) if self.__forced is not None: child.__forced = {j - i - 1 for j in self.__forced if j > i} self.__forced = {j for j in self.__forced if j < i} child.check_exhausted() + del self.ir_types[i:] del self.values[i:] - del self.bit_lengths[i:] - assert len(self.values) == len(self.bit_lengths) == i + del self.kwargs[i:] + assert len(self.values) == len(self.kwargs) == len(self.ir_types) == i def check_exhausted(self): - """Recalculates ``self.is_exhausted`` if necessary then returns - it.""" + """ + Recalculates is_exhausted if necessary, and then returns it. + + A node is exhausted if: + - Its transition is Conclusion or Killed + - It has the maximum number of children (i.e. we have found all of its + possible children), and all its children are exhausted + + Therefore, we only need to compute this for a node when: + - We first create it in split_at + - We set its transition to either Conclusion or Killed + (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch) + - We exhaust any of its children + """ + if ( + # a node cannot go from is_exhausted -> not is_exhausted. not self.is_exhausted - and len(self.forced) == len(self.values) + # if we don't know what happens after this node, we don't have + # enough information to tell if it's exhausted. and self.transition is not None + # if there are still any nodes left which are the only child of their + # parent (len(self.values) > 0), then this TreeNode must be not + # exhausted, unless all of those nodes were forced. + # + # This is because we maintain an invariant of only adding nodes to + # DataTree which have at least 2 possible values, so we know that if + # they do not have any siblings that we still have more choices to + # discover. + # + # (We actually *do* currently add single-valued nodes to the tree, + # but immediately split them into a transition to avoid falsifying + # this check. this is a bit of a hack.) + and len(self.forced) == len(self.values) ): if isinstance(self.transition, (Conclusion, Killed)): self.is_exhausted = True @@ -181,16 +456,159 @@ class TreeNode: class DataTree: - """Tracks the tree structure of a collection of ConjectureData - objects, for use in ConjectureRunner.""" + """ + A DataTree tracks the structured history of draws in some test function, + across multiple ConjectureData objects. + + This information is used by ConjectureRunner to generate novel prefixes of + this tree (see generate_novel_prefix). A novel prefix is a sequence of draws + which the tree has not seen before, and therefore the ConjectureRunner has + not generated as an input to the test function before. + + DataTree tracks the following: + + - Draws, at the ir level (with some ir_type, e.g. "integer") + - ConjectureData.draw_integer() + - ConjectureData.draw_float() + - ConjectureData.draw_string() + - ConjectureData.draw_boolean() + - ConjectureData.draw_bytes() + - Test conclusions (with some Status, e.g. Status.VALID) + - ConjectureData.conclude_test() + + A DataTree is — surprise — a *tree*. A node in this tree is either a draw with + some value, a test conclusion with some Status, or a special `Killed` value, + which denotes that further draws may exist beyond this node but should not be + considered worth exploring when generating novel prefixes. A node is a leaf + iff it is a conclusion or Killed. + + A branch from node A to node B indicates that we have previously seen some + sequence (a, b) of draws, where a and b are the values in nodes A and B. + Similar intuition holds for conclusion and Killed nodes. + + Examples + -------- + + To see how a DataTree gets built through successive sets of draws, consider + the following code that calls through to some ConjecutreData object `data`. + The first call can be either True or False, and the second call can be any + integer in the range [1, 3]. + + data.draw_boolean() + data.draw_integer(1, 3) + + To start, the corresponding DataTree object is completely empty. + + ┌──────┐ + │ root │ + └──────┘ + + We happen to draw True and then 2 in the above code. The tree tracks this. + (2 also connects to a child Conclusion node with Status.VALID since it's the + final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.) + + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + │ True │ + └──┬───┘ + ┌──┴───┐ + │ 2 │ + └──────┘ + + This is a very boring tree so far! But now we happen to draw False and + then 1. This causes a split in the tree. Remember, DataTree tracks history + over all invocations of a function, not just one. The end goal is to know + what invocations haven't been tried yet, after all. + + ┌──────┐ + ┌───┤ root ├───┐ + │ └──────┘ │ + ┌──┴───┐ ┌─┴─────┐ + │ True │ │ False │ + └──┬───┘ └──┬────┘ + ┌─┴─┐ ┌─┴─┐ + │ 2 │ │ 1 │ + └───┘ └───┘ + + If we were to ask DataTree for a novel prefix at this point, it might + generate any of (True, 1), (True, 3), (False, 2), or (False, 3). + + Note that the novel prefix stops as soon as it generates a novel node. For + instance, if we had generated a novel prefix back when the tree was only + root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or + (False). But we could *not* have gotten (False, n), because both False and + n were novel at that point, and we stop at the first novel node — False. + + I won't belabor this example. Here's what the tree looks like when fully + explored: + + ┌──────┐ + ┌──────┤ root ├──────┐ + │ └──────┘ │ + ┌──┴───┐ ┌─┴─────┐ + ┌──┤ True ├──┐ ┌───┤ False ├──┐ + │ └──┬───┘ │ │ └──┬────┘ │ + ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │ + └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + + You could imagine much more complicated trees than this arising in practice, + and indeed they do. In particular, the tree need not be balanced or 'nice' + like the tree above. For instance, + + b = data.draw_boolean() + if b: + data.draw_integer(1, 3) + + results in a tree with the entire right part lopped off, and False leading + straight to a conclusion node with Status.VALID. As another example, + + n = data.draw_integers() + assume(n >= 3) + data.draw_string() + + results in a tree with the 0, 1, and 2 nodes leading straight to a + conclusion node with Status.INVALID, and the rest branching off into all + the possibilities of draw_string. + + Notes + ----- + + The above examples are slightly simplified and are intended to convey + intuition. In practice, there are some implementation details to be aware + of. + + - In draw nodes, we store the kwargs used in addition to the value drawn. + E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5) + would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for + other kwargs omitted). + + The kwargs parameters have the potential to change both the range of + possible outputs of a node, and the probability distribution within that + range, so we need to use these when drawing in DataTree as well. We draw + values using these kwargs when (1) generating a novel value for a node + and (2) choosing a random child when traversing the tree. + + - For space efficiency, rather than tracking the full tree structure, we + store DataTree as a radix tree. This is conceptually equivalent (radix + trees can always be "unfolded" to the full tree) but it means the internal + representation may differ in practice. + + See TreeNode for more information. + """ def __init__(self): self.root = TreeNode() + self._children_cache = {} @property def is_exhausted(self): - """Returns True if every possible node is dead and thus the language - described must have been fully explored.""" + """ + Returns True if every node is exhausted, and therefore the tree has + been fully explored. + """ return self.root.is_exhausted def generate_novel_prefix(self, random): @@ -201,26 +619,43 @@ class DataTree: for it to be uniform at random, but previous attempts to do that have proven too expensive. """ + assert not self.is_exhausted novel_prefix = bytearray() - def append_int(n_bits, value): - novel_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits))) + def append_buf(buf): + novel_prefix.extend(buf) current_node = self.root while True: assert not current_node.is_exhausted - for i, (n_bits, value) in enumerate( - zip(current_node.bit_lengths, current_node.values) + for i, (ir_type, kwargs, value) in enumerate( + zip(current_node.ir_types, current_node.kwargs, current_node.values) ): if i in current_node.forced: - append_int(n_bits, value) + if ir_type == "float": + value = int_to_float(value) + (_value, buf) = self._draw( + ir_type, kwargs, forced=value, random=random + ) + append_buf(buf) else: + attempts = 0 while True: - k = random.getrandbits(n_bits) - if k != value: - append_int(n_bits, k) + if attempts <= 10: + (v, buf) = self._draw(ir_type, kwargs, random=random) + else: + (v, buf) = self._draw_from_cache( + ir_type, kwargs, key=id(current_node), random=random + ) + + if v != value: + append_buf(buf) break + attempts += 1 + self._reject_child( + ir_type, kwargs, child=v, key=id(current_node) + ) # We've now found a value that is allowed to # vary, so what follows is not fixed. return bytes(novel_prefix) @@ -230,27 +665,37 @@ class DataTree: return bytes(novel_prefix) branch = current_node.transition assert isinstance(branch, Branch) - n_bits = branch.bit_length - check_counter = 0 + attempts = 0 while True: - k = random.getrandbits(n_bits) + if attempts <= 10: + (v, buf) = self._draw( + branch.ir_type, branch.kwargs, random=random + ) + else: + (v, buf) = self._draw_from_cache( + branch.ir_type, branch.kwargs, key=id(branch), random=random + ) try: - child = branch.children[k] + child = branch.children[v] except KeyError: - append_int(n_bits, k) + append_buf(buf) return bytes(novel_prefix) if not child.is_exhausted: - append_int(n_bits, k) + append_buf(buf) current_node = child break - check_counter += 1 + attempts += 1 + self._reject_child( + branch.ir_type, branch.kwargs, child=v, key=id(branch) + ) + # We don't expect this assertion to ever fire, but coverage # wants the loop inside to run if you have branch checking # on, hence the pragma. assert ( # pragma: no cover - check_counter != 1000 - or len(branch.children) < (2**n_bits) + attempts != 1000 + or len(branch.children) < branch.max_children or any(not v.is_exhausted for v in branch.children.values()) ) @@ -274,13 +719,22 @@ class DataTree: or ``start_example`` as these are not currently recorded in the tree. This will likely change in future.""" node = self.root + + def draw(ir_type, kwargs, *, forced=None): + draw_func = getattr(data, f"draw_{ir_type}") + value = draw_func(**kwargs, forced=forced) + + if ir_type == "float": + value = float_to_int(value) + return value + try: while True: - for i, (n_bits, previous) in enumerate( - zip(node.bit_lengths, node.values) + for i, (ir_type, kwargs, previous) in enumerate( + zip(node.ir_types, node.kwargs, node.values) ): - v = data.draw_bits( - n_bits, forced=node.values[i] if i in node.forced else None + v = draw( + ir_type, kwargs, forced=previous if i in node.forced else None ) if v != previous: raise PreviouslyUnseenBehaviour @@ -290,7 +744,7 @@ class DataTree: elif node.transition is None: raise PreviouslyUnseenBehaviour elif isinstance(node.transition, Branch): - v = data.draw_bits(node.transition.bit_length) + v = draw(node.transition.ir_type, node.transition.kwargs) try: node = node.transition.children[v] except KeyError as err: @@ -305,6 +759,97 @@ class DataTree: def new_observer(self): return TreeRecordingObserver(self) + def _draw(self, ir_type, kwargs, *, random, forced=None): + # we should possibly pull out BUFFER_SIZE to a common file to avoid this + # circular import. + from hypothesis.internal.conjecture.engine import BUFFER_SIZE + + cd = ConjectureData(max_length=BUFFER_SIZE, prefix=b"", random=random) + draw_func = getattr(cd, f"draw_{ir_type}") + + value = draw_func(**kwargs, forced=forced) + buf = cd.buffer + + # using floats as keys into branch.children breaks things, because + # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are + # in fact distinct child branches. + # To distinguish floats here we'll use their bits representation. This + # entails some bookkeeping such that we're careful about when the + # float key is in its bits form (as a key into branch.children) and + # when it is in its float form (as a value we want to write to the + # buffer), and converting between the two forms as appropriate. + if ir_type == "float": + value = float_to_int(value) + return (value, buf) + + def _get_children_cache(self, ir_type, kwargs, *, key): + # cache the state of the children generator per node/branch (passed as + # `key` here), such that we track which children we've already tried + # for this branch across draws. + # We take advantage of python generators here as one-way iterables, + # so each time we iterate we implicitly store our position in the + # children generator and don't re-draw children. `children` is the + # concrete list of children draw from the generator that we will work + # with. Whenever we need to top up this list, we will draw a new value + # from the generator. + if key not in self._children_cache: + generator = all_children(ir_type, kwargs) + children = [] + rejected = set() + self._children_cache[key] = (generator, children, rejected) + + return self._children_cache[key] + + def _draw_from_cache(self, ir_type, kwargs, *, key, random): + (generator, children, rejected) = self._get_children_cache( + ir_type, kwargs, key=key + ) + # Keep a stock of 100 potentially-valid children at all times. + # This number is chosen to balance memory/speed vs randomness. Ideally + # we would sample uniformly from all not-yet-rejected children, but + # computing and storing said children is not free. + # no-branch because coverage of the fall-through case here is a bit + # annoying. + if len(children) < 100: # pragma: no branch + for v in generator: + if ir_type == "float": + v = float_to_int(v) + if v in rejected: + continue + children.append(v) + if len(children) >= 100: + break + + forced = random.choice(children) + if ir_type == "float": + forced = int_to_float(forced) + (value, buf) = self._draw(ir_type, kwargs, forced=forced, random=random) + return (value, buf) + + def _reject_child(self, ir_type, kwargs, *, child, key): + (_generator, children, rejected) = self._get_children_cache( + ir_type, kwargs, key=key + ) + rejected.add(child) + # we remove a child from the list of possible children *only* when it is + # rejected, and not when it is initially drawn in _draw_from_cache. The + # reason is that a child being drawn does not guarantee that child will + # be used in a way such that it is written back to the tree, so it needs + # to be available for future draws until we are certain it has been + # used. + # + # For instance, if we generated novel prefixes in a loop (but never used + # those prefixes to generate new values!) then we don't want to remove + # the drawn children from the available pool until they are actually + # used. + # + # This does result in a small inefficiency: we may draw a child, + # immediately use it (so we know it cannot be drawn again), but still + # wait to draw and reject it here, because DataTree cannot guarantee + # the drawn child has been used. + if child in children: + children.remove(child) + class TreeRecordingObserver(DataObserver): def __init__(self, tree): @@ -313,13 +858,49 @@ class TreeRecordingObserver(DataObserver): self.__trail = [self.__current_node] self.killed = False - def draw_bits(self, n_bits, forced, value): + def draw_integer( + self, value: int, *, was_forced: bool, kwargs: IntegerKWargs + ) -> None: + self.draw_value("integer", value, was_forced=was_forced, kwargs=kwargs) + + def draw_float( + self, value: float, *, was_forced: bool, kwargs: FloatKWargs + ) -> None: + self.draw_value("float", value, was_forced=was_forced, kwargs=kwargs) + + def draw_string( + self, value: str, *, was_forced: bool, kwargs: StringKWargs + ) -> None: + self.draw_value("string", value, was_forced=was_forced, kwargs=kwargs) + + def draw_bytes( + self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs + ) -> None: + self.draw_value("bytes", value, was_forced=was_forced, kwargs=kwargs) + + def draw_boolean( + self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs + ) -> None: + self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs) + + def draw_value( + self, + ir_type: IRLiteralType, + value: IRType, + *, + was_forced: bool, + kwargs: IRKWargsType, + ) -> None: i = self.__index_in_current_node self.__index_in_current_node += 1 node = self.__current_node - assert len(node.bit_lengths) == len(node.values) - if i < len(node.bit_lengths): - if n_bits != node.bit_lengths[i]: + + if isinstance(value, float): + value = float_to_int(value) + + assert len(node.kwargs) == len(node.values) == len(node.ir_types) + if i < len(node.values): + if ir_type != node.ir_types[i] or kwargs != node.kwargs[i]: inconsistent_generation() # Note that we don't check whether a previously # forced value is now free. That will be caught @@ -327,23 +908,43 @@ class TreeRecordingObserver(DataObserver): # may pass silently. This is acceptable because it # means we skip a hash set lookup on every # draw and that's a pretty niche failure mode. - if forced and i not in node.forced: + if was_forced and i not in node.forced: inconsistent_generation() if value != node.values[i]: node.split_at(i) assert i == len(node.values) new_node = TreeNode() - branch = node.transition - branch.children[value] = new_node + node.transition.children[value] = new_node self.__current_node = new_node self.__index_in_current_node = 0 else: trans = node.transition if trans is None: - node.bit_lengths.append(n_bits) + node.ir_types.append(ir_type) + node.kwargs.append(kwargs) node.values.append(value) - if forced: + if was_forced: node.mark_forced(i) + # generate_novel_prefix assumes the following invariant: any one + # of the series of draws in a particular node can vary, i.e. the + # max number of children is at least 2. However, some draws are + # pseudo-choices and only have a single value, such as + # integers(0, 0). + # + # Currently, we address this by forcefully splitting such + # single-valued nodes into a transition when we see them. An + # exception to this is if it was forced: forced pseudo-choices + # do not cause the above issue because they inherently cannot + # vary, and moreover they trip other invariants about never + # splitting forced nodes. + # + # An alternative is not writing such choices to the tree at + # all, and thus guaranteeing that each node has at least 2 max + # children. + if compute_max_children(ir_type, kwargs) == 1 and not was_forced: + node.split_at(i) + self.__current_node = node.transition.children[value] + self.__index_in_current_node = 0 elif isinstance(trans, Conclusion): assert trans.status != Status.OVERRUN # We tried to draw where history says we should have @@ -351,7 +952,7 @@ class TreeRecordingObserver(DataObserver): inconsistent_generation() else: assert isinstance(trans, Branch), trans - if n_bits != trans.bit_length: + if ir_type != trans.ir_type or kwargs != trans.kwargs: inconsistent_generation() try: self.__current_node = trans.children[value] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py index 61f9d742bb..5e77437a78 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py @@ -101,13 +101,12 @@ class Sampler: table: List[Tuple[int, int, float]] # (base_idx, alt_idx, alt_chance) - def __init__(self, weights: Sequence[float]): - n = len(weights) + def __init__(self, weights: Sequence[float], *, observe: bool = True): + self.observe = observe + n = len(weights) table: "list[list[int | float | None]]" = [[i, None, None] for i in range(n)] - total = sum(weights) - num_type = type(total) zero = num_type(0) # type: ignore @@ -179,7 +178,7 @@ class Sampler: ) ) base, alternate, alternate_chance = data.choice( - self.table, forced=forced_choice + self.table, forced=forced_choice, observe=self.observe ) forced_use_alternate = None if forced is not None: @@ -189,7 +188,9 @@ class Sampler: forced_use_alternate = forced == alternate and alternate_chance > 0 assert forced == base or forced_use_alternate - use_alternate = data.draw_boolean(alternate_chance, forced=forced_use_alternate) + use_alternate = data.draw_boolean( + alternate_chance, forced=forced_use_alternate, observe=self.observe + ) data.stop_example() if use_alternate: assert forced is None or alternate == forced, (forced, alternate) @@ -200,7 +201,7 @@ class Sampler: INT_SIZES = (8, 16, 32, 64, 128) -INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5)) +INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False) class many: @@ -223,6 +224,7 @@ class many: average_size: Union[int, float], *, forced: Optional[int] = None, + observe: bool = True, ) -> None: assert 0 <= min_size <= average_size <= max_size assert forced is None or min_size <= forced <= max_size @@ -236,17 +238,17 @@ class many: self.drawn = False self.force_stop = False self.rejected = False + self.observe = observe def more(self) -> bool: """Should I draw another element to add to the collection?""" if self.drawn: - self.data.stop_example(discard=self.rejected) + self.data.stop_example() self.drawn = True self.rejected = False self.data.start_example(ONE_FROM_MANY_LABEL) - if self.min_size == self.max_size: # if we have to hit an exact size, draw unconditionally until that # point, and no further. @@ -265,7 +267,7 @@ class many: elif self.forced_size is not None: forced_result = self.count < self.forced_size should_continue = self.data.draw_boolean( - self.p_continue, forced=forced_result + self.p_continue, forced=forced_result, observe=self.observe ) if should_continue: diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py index 8ab02d0b39..97dd2bf9b3 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py @@ -117,7 +117,7 @@ from hypothesis.strategies._internal.collections import ( from hypothesis.strategies._internal.deferred import DeferredStrategy from hypothesis.strategies._internal.functions import FunctionStrategy from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies -from hypothesis.strategies._internal.misc import just, none, nothing +from hypothesis.strategies._internal.misc import BooleansStrategy, just, none, nothing from hypothesis.strategies._internal.numbers import ( IntegersStrategy, Real, @@ -158,14 +158,14 @@ else: @cacheable -@defines_strategy() +@defines_strategy(force_reusable_values=True) def booleans() -> SearchStrategy[bool]: """Returns a strategy which generates instances of :class:`python:bool`. Examples from this strategy will shrink towards ``False`` (i.e. shrinking will replace ``True`` with ``False`` where possible). """ - return SampledFromStrategy([False, True], repr_="booleans()") + return BooleansStrategy() @overload diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py index ad37107f73..3d0b0c97e0 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py @@ -116,3 +116,11 @@ def nothing() -> SearchStrategy: Examples from this strategy do not shrink (because there are none). """ return NOTHING + + +class BooleansStrategy(SearchStrategy): + def do_draw(self, data): + return data.draw_boolean() + + def __repr__(self): + return "booleans()" diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 3d7dd6774b..843784dc96 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 98, 0) +__version_info__ = (6, 98, 2) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 5b197c3343..03a62edeb0 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.98.0) +VERSION(6.98.2) LICENSE(MPL-2.0) |