aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-02-21 10:09:19 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-02-21 10:19:17 +0300
commit31031664807af57b64c42818e69cf4fafdcd6b75 (patch)
tree0c3c5008d7c14d6eefe21d4e6237e22625516282
parenta42e56882cf1b2f89af4a517a8e25fe3a67cebfc (diff)
downloadydb-31031664807af57b64c42818e69cf4fafdcd6b75.tar.gz
Intermediate changes
-rw-r--r--contrib/libs/libpq/COPYRIGHT2
-rw-r--r--contrib/libs/libpq/src/common/scram-common.c11
-rw-r--r--contrib/libs/libpq/src/common/wchar.c1
-rw-r--r--contrib/libs/libpq/src/include/mb/pg_wchar.h69
-rw-r--r--contrib/libs/libpq/src/include/pg_config-linux.h17
-rw-r--r--contrib/libs/libpq/src/include/utils/ascii.h84
-rw-r--r--contrib/libs/libpq/src/interfaces/libpq/fe-exec.c65
-rw-r--r--contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c9
-rw-r--r--contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c99
-rw-r--r--contrib/libs/libpq/src/interfaces/libpq/fe-secure.c7
-rw-r--r--contrib/libs/libpq/src/interfaces/libpq/libpq-int.h7
-rw-r--r--contrib/libs/libpq/src/port/pg_config_paths.h18
-rw-r--r--contrib/libs/libpq/ya.make4
-rw-r--r--contrib/python/hypothesis/py3/.dist-info/METADATA2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py189
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py801
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py22
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py6
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py8
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/version.py2
-rw-r--r--contrib/python/hypothesis/py3/ya.make2
21 files changed, 1116 insertions, 309 deletions
diff --git a/contrib/libs/libpq/COPYRIGHT b/contrib/libs/libpq/COPYRIGHT
index d33e0f599d..16f9b46a3f 100644
--- a/contrib/libs/libpq/COPYRIGHT
+++ b/contrib/libs/libpq/COPYRIGHT
@@ -1,7 +1,7 @@
PostgreSQL Database Management System
(formerly known as Postgres, then as Postgres95)
-Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
Portions Copyright (c) 1994, The Regents of the University of California
diff --git a/contrib/libs/libpq/src/common/scram-common.c b/contrib/libs/libpq/src/common/scram-common.c
index ef997ef684..22ad929218 100644
--- a/contrib/libs/libpq/src/common/scram-common.c
+++ b/contrib/libs/libpq/src/common/scram-common.c
@@ -22,6 +22,9 @@
#include "common/base64.h"
#include "common/hmac.h"
#include "common/scram-common.h"
+#ifndef FRONTEND
+#error #include "miscadmin.h"
+#endif
#include "port/pg_bswap.h"
/*
@@ -73,6 +76,14 @@ scram_SaltedPassword(const char *password,
/* Subsequent iterations */
for (i = 2; i <= iterations; i++)
{
+#ifndef FRONTEND
+ /*
+ * Make sure that this is interruptible as scram_iterations could be
+ * set to a large value.
+ */
+ CHECK_FOR_INTERRUPTS();
+#endif
+
if (pg_hmac_init(hmac_ctx, (uint8 *) password, password_len) < 0 ||
pg_hmac_update(hmac_ctx, (uint8 *) Ui_prev, key_length) < 0 ||
pg_hmac_final(hmac_ctx, Ui, key_length) < 0)
diff --git a/contrib/libs/libpq/src/common/wchar.c b/contrib/libs/libpq/src/common/wchar.c
index fb9d9f5c85..fbac11deb4 100644
--- a/contrib/libs/libpq/src/common/wchar.c
+++ b/contrib/libs/libpq/src/common/wchar.c
@@ -13,6 +13,7 @@
#include "c.h"
#include "mb/pg_wchar.h"
+#include "utils/ascii.h"
/*
diff --git a/contrib/libs/libpq/src/include/mb/pg_wchar.h b/contrib/libs/libpq/src/include/mb/pg_wchar.h
index 25276b199f..06a56a41bb 100644
--- a/contrib/libs/libpq/src/include/mb/pg_wchar.h
+++ b/contrib/libs/libpq/src/include/mb/pg_wchar.h
@@ -19,8 +19,6 @@
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
-#include "port/simd.h"
-
/*
* The pg_wchar type
*/
@@ -702,71 +700,4 @@ extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
#endif
-
-/*
- * Verify a chunk of bytes for valid ASCII.
- *
- * Returns false if the input contains any zero bytes or bytes with the
- * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
- */
-static inline bool
-is_valid_ascii(const unsigned char *s, int len)
-{
- const unsigned char *const s_end = s + len;
- Vector8 chunk;
- Vector8 highbit_cum = vector8_broadcast(0);
-#ifdef USE_NO_SIMD
- Vector8 zero_cum = vector8_broadcast(0x80);
-#endif
-
- Assert(len % sizeof(chunk) == 0);
-
- while (s < s_end)
- {
- vector8_load(&chunk, s);
-
- /* Capture any zero bytes in this chunk. */
-#ifdef USE_NO_SIMD
-
- /*
- * First, add 0x7f to each byte. This sets the high bit in each byte,
- * unless it was a zero. If any resulting high bits are zero, the
- * corresponding high bits in the zero accumulator will be cleared.
- *
- * If none of the bytes in the chunk had the high bit set, the max
- * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
- * and we don't need to worry about carrying over to the next byte. If
- * any input bytes did have the high bit set, it doesn't matter
- * because we check for those separately.
- */
- zero_cum &= (chunk + vector8_broadcast(0x7F));
-#else
-
- /*
- * Set all bits in each lane of the highbit accumulator where input
- * bytes are zero.
- */
- highbit_cum = vector8_or(highbit_cum,
- vector8_eq(chunk, vector8_broadcast(0)));
-#endif
-
- /* Capture all set bits in this chunk. */
- highbit_cum = vector8_or(highbit_cum, chunk);
-
- s += sizeof(chunk);
- }
-
- /* Check if any high bits in the high bit accumulator got set. */
- if (vector8_is_highbit_set(highbit_cum))
- return false;
-
-#ifdef USE_NO_SIMD
- /* Check if any high bits in the zero accumulator got cleared. */
- if (zero_cum != vector8_broadcast(0x80))
- return false;
-#endif
-
- return true;
-}
-
#endif /* PG_WCHAR_H */
diff --git a/contrib/libs/libpq/src/include/pg_config-linux.h b/contrib/libs/libpq/src/include/pg_config-linux.h
index afcf2967c5..b4592fa70a 100644
--- a/contrib/libs/libpq/src/include/pg_config-linux.h
+++ b/contrib/libs/libpq/src/include/pg_config-linux.h
@@ -32,7 +32,7 @@
#define BLCKSZ 8192
/* Saved arguments from configure */
-#define CONFIGURE_ARGS " '--prefix=/var/empty/postgresql-16.1' '--with-openssl' '--with-libxml' '--with-icu' '--sysconfdir=/etc' '--libdir=$(lib)/lib' '--with-system-tzdata=/var/empty/tzdata-2022f/share/zoneinfo' '--enable-debug' '--with-systemd' '--with-ossp-uuid' '--with-lz4' '--with-gssapi' '--without-gssapi' 'CC=cc' 'CXX=g++' 'PKG_CONFIG=pkg-config' 'PKG_CONFIG_PATH=/var/empty/libxcrypt-4.4.30/lib/pkgconfig:/var/empty/zlib-1.2.13-dev/lib/pkgconfig:/var/empty/ncurses-6.3-p20220507-dev/lib/pkgconfig:/var/empty/openssl-3.0.7-dev/lib/pkgconfig:/var/empty/libxml2-2.10.3-dev/lib/pkgconfig:/var/empty/icu4c-72.1-dev/lib/pkgconfig:/var/empty/lz4-1.9.4-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/share/pkgconfig:/var/empty/libkrb5-1.20-dev/lib/pkgconfig:/var/empty/libossp-uuid-1.6.2/lib/pkgconfig'"
+#define CONFIGURE_ARGS " '--prefix=/var/empty/postgresql-16.2' '--with-openssl' '--with-libxml' '--with-icu' '--sysconfdir=/etc' '--libdir=$(lib)/lib' '--with-system-tzdata=/var/empty/tzdata-2022f/share/zoneinfo' '--enable-debug' '--with-systemd' '--with-ossp-uuid' '--with-lz4' '--with-gssapi' '--without-gssapi' 'CC=cc' 'CXX=g++' 'PKG_CONFIG=pkg-config' 'PKG_CONFIG_PATH=/var/empty/libxcrypt-4.4.30/lib/pkgconfig:/var/empty/zlib-1.2.13-dev/lib/pkgconfig:/var/empty/ncurses-6.3-p20220507-dev/lib/pkgconfig:/var/empty/openssl-3.0.7-dev/lib/pkgconfig:/var/empty/libxml2-2.10.3-dev/lib/pkgconfig:/var/empty/icu4c-72.1-dev/lib/pkgconfig:/var/empty/lz4-1.9.4-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/lib/pkgconfig:/var/empty/systemd-251.7-dev/share/pkgconfig:/var/empty/libkrb5-1.20-dev/lib/pkgconfig:/var/empty/libossp-uuid-1.6.2/lib/pkgconfig'"
/* Define to the default TCP port number on which the server listens and to
which clients will try to connect. This can be overridden at run-time, but
@@ -71,9 +71,6 @@
/* Define to 1 if you have the `backtrace_symbols' function. */
#define HAVE_BACKTRACE_SYMBOLS 1
-/* Define to 1 if you have the `BIO_get_data' function. */
-#define HAVE_BIO_GET_DATA 1
-
/* Define to 1 if you have the `BIO_meth_new' function. */
#define HAVE_BIO_METH_NEW 1
@@ -607,7 +604,7 @@
#define PACKAGE_NAME "PostgreSQL"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PostgreSQL 16.1"
+#define PACKAGE_STRING "PostgreSQL 16.2"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "postgresql"
@@ -616,7 +613,7 @@
#define PACKAGE_URL "https://www.postgresql.org/"
/* Define to the version of this package. */
-#define PACKAGE_VERSION "16.1"
+#define PACKAGE_VERSION "16.2"
/* Define to the name of a signed 128-bit integer type. */
#define PG_INT128_TYPE __int128
@@ -635,7 +632,7 @@
#define PG_MAJORVERSION_NUM 16
/* PostgreSQL minor version number */
-#define PG_MINORVERSION_NUM 1
+#define PG_MINORVERSION_NUM 2
/* Define to best printf format archetype, usually gnu_printf if available. */
#define PG_PRINTF_ATTRIBUTE gnu_printf
@@ -644,13 +641,13 @@
#define PG_USE_STDBOOL 1
/* PostgreSQL version as a string */
-#define PG_VERSION "16.1"
+#define PG_VERSION "16.2"
/* PostgreSQL version as a number */
-#define PG_VERSION_NUM 160001
+#define PG_VERSION_NUM 160002
/* A string containing the version number, platform, and C compiler */
-#define PG_VERSION_STR "PostgreSQL 16.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 11.3.0, 64-bit"
+#define PG_VERSION_STR "PostgreSQL 16.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 11.3.0, 64-bit"
/* Define to 1 to allow profiling output to be saved separately for each
process. */
diff --git a/contrib/libs/libpq/src/include/utils/ascii.h b/contrib/libs/libpq/src/include/utils/ascii.h
new file mode 100644
index 0000000000..7df024dad3
--- /dev/null
+++ b/contrib/libs/libpq/src/include/utils/ascii.h
@@ -0,0 +1,84 @@
+/*-----------------------------------------------------------------------
+ * ascii.h
+ *
+ * Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group
+ *
+ * src/include/utils/ascii.h
+ *
+ *-----------------------------------------------------------------------
+ */
+
+#ifndef _ASCII_H_
+#define _ASCII_H_
+
+#include "port/simd.h"
+
+extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);
+
+/*
+ * Verify a chunk of bytes for valid ASCII.
+ *
+ * Returns false if the input contains any zero bytes or bytes with the
+ * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
+ */
+static inline bool
+is_valid_ascii(const unsigned char *s, int len)
+{
+ const unsigned char *const s_end = s + len;
+ Vector8 chunk;
+ Vector8 highbit_cum = vector8_broadcast(0);
+#ifdef USE_NO_SIMD
+ Vector8 zero_cum = vector8_broadcast(0x80);
+#endif
+
+ Assert(len % sizeof(chunk) == 0);
+
+ while (s < s_end)
+ {
+ vector8_load(&chunk, s);
+
+ /* Capture any zero bytes in this chunk. */
+#ifdef USE_NO_SIMD
+
+ /*
+ * First, add 0x7f to each byte. This sets the high bit in each byte,
+ * unless it was a zero. If any resulting high bits are zero, the
+ * corresponding high bits in the zero accumulator will be cleared.
+ *
+ * If none of the bytes in the chunk had the high bit set, the max
+ * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
+ * and we don't need to worry about carrying over to the next byte. If
+ * any input bytes did have the high bit set, it doesn't matter
+ * because we check for those separately.
+ */
+ zero_cum &= (chunk + vector8_broadcast(0x7F));
+#else
+
+ /*
+ * Set all bits in each lane of the highbit accumulator where input
+ * bytes are zero.
+ */
+ highbit_cum = vector8_or(highbit_cum,
+ vector8_eq(chunk, vector8_broadcast(0)));
+#endif
+
+ /* Capture all set bits in this chunk. */
+ highbit_cum = vector8_or(highbit_cum, chunk);
+
+ s += sizeof(chunk);
+ }
+
+ /* Check if any high bits in the high bit accumulator got set. */
+ if (vector8_is_highbit_set(highbit_cum))
+ return false;
+
+#ifdef USE_NO_SIMD
+ /* Check if any high bits in the zero accumulator got cleared. */
+ if (zero_cum != vector8_broadcast(0x80))
+ return false;
+#endif
+
+ return true;
+}
+
+#endif /* _ASCII_H_ */
diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c b/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c
index 26564955d0..fb11997eff 100644
--- a/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c
+++ b/contrib/libs/libpq/src/interfaces/libpq/fe-exec.c
@@ -841,6 +841,8 @@ pqSaveWriteError(PGconn *conn)
* using whatever is in conn->errorMessage. In any case, clear the async
* result storage, and update our notion of how much error text has been
* returned to the application.
+ *
+ * Note that in no case (not even OOM) do we return NULL.
*/
PGresult *
pqPrepareAsyncResult(PGconn *conn)
@@ -2112,19 +2114,12 @@ PQgetResult(PGconn *conn)
break;
case PGASYNC_READY:
-
- /*
- * For any query type other than simple query protocol, we advance
- * the command queue here. This is because for simple query
- * protocol we can get the READY state multiple times before the
- * command is actually complete, since the command string can
- * contain many queries. In simple query protocol, the queue
- * advance is done by fe-protocol3 when it receives ReadyForQuery.
- */
- if (conn->cmd_queue_head &&
- conn->cmd_queue_head->queryclass != PGQUERY_SIMPLE)
- pqCommandQueueAdvance(conn);
res = pqPrepareAsyncResult(conn);
+
+ /* Advance the queue as appropriate */
+ pqCommandQueueAdvance(conn, false,
+ res->resultStatus == PGRES_PIPELINE_SYNC);
+
if (conn->pipelineStatus != PQ_PIPELINE_OFF)
{
/*
@@ -2144,7 +2139,7 @@ PQgetResult(PGconn *conn)
* (In other words: we don't return a NULL after a pipeline
* sync.)
*/
- if (res && res->resultStatus == PGRES_PIPELINE_SYNC)
+ if (res->resultStatus == PGRES_PIPELINE_SYNC)
pqPipelineProcessQueue(conn);
}
else
@@ -3012,18 +3007,44 @@ PQexitPipelineMode(PGconn *conn)
/*
* pqCommandQueueAdvance
- * Remove one query from the command queue, when we receive
- * all results from the server that pertain to it.
+ * Remove one query from the command queue, if appropriate.
+ *
+ * If we have received all results corresponding to the head element
+ * in the command queue, remove it.
+ *
+ * In simple query protocol we must not advance the command queue until the
+ * ReadyForQuery message has been received. This is because in simple mode a
+ * command can have multiple queries, and we must process result for all of
+ * them before moving on to the next command.
+ *
+ * Another consideration is synchronization during error processing in
+ * extended query protocol: we refuse to advance the queue past a SYNC queue
+ * element, unless the result we've received is also a SYNC. In particular
+ * this protects us from advancing when an error is received at an
+ * inappropriate moment.
*/
void
-pqCommandQueueAdvance(PGconn *conn)
+pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, bool gotSync)
{
PGcmdQueueEntry *prevquery;
if (conn->cmd_queue_head == NULL)
return;
- /* delink from queue */
+ /*
+ * If processing a query of simple query protocol, we only advance the
+ * queue when we receive the ReadyForQuery message for it.
+ */
+ if (conn->cmd_queue_head->queryclass == PGQUERY_SIMPLE && !isReadyForQuery)
+ return;
+
+ /*
+ * If we're waiting for a SYNC, don't advance the queue until we get one.
+ */
+ if (conn->cmd_queue_head->queryclass == PGQUERY_SYNC && !gotSync)
+ return;
+
+ /* delink element from queue */
prevquery = conn->cmd_queue_head;
conn->cmd_queue_head = conn->cmd_queue_head->next;
@@ -3031,7 +3052,7 @@ pqCommandQueueAdvance(PGconn *conn)
if (conn->cmd_queue_head == NULL)
conn->cmd_queue_tail = NULL;
- /* and make it recyclable */
+ /* and make the queue element recyclable */
prevquery->next = NULL;
pqRecycleCmdQueueEntry(conn, prevquery);
}
@@ -3240,6 +3261,14 @@ PQsendFlushRequest(PGconn *conn)
return 0;
}
+ /*
+ * Give the data a push (in pipeline mode, only if we're past the size
+ * threshold). In nonblock mode, don't complain if we're unable to send
+ * it all; PQgetResult() will do any additional flushing needed.
+ */
+ if (pqPipelineFlush(conn) < 0)
+ return 0;
+
return 1;
}
diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c b/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c
index 32b66d561c..9c4aa7e2c7 100644
--- a/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c
+++ b/contrib/libs/libpq/src/interfaces/libpq/fe-protocol3.c
@@ -236,13 +236,8 @@ pqParseInput3(PGconn *conn)
}
else
{
- /*
- * In simple query protocol, advance the command queue
- * (see PQgetResult).
- */
- if (conn->cmd_queue_head &&
- conn->cmd_queue_head->queryclass == PGQUERY_SIMPLE)
- pqCommandQueueAdvance(conn);
+ /* Advance the command queue and set us idle */
+ pqCommandQueueAdvance(conn, true, false);
conn->asyncStatus = PGASYNC_IDLE;
}
break;
diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c b/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c
index 5c220e0e69..8b845994a1 100644
--- a/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c
+++ b/contrib/libs/libpq/src/interfaces/libpq/fe-secure-openssl.c
@@ -207,7 +207,7 @@ rloop:
*/
goto rloop;
case SSL_ERROR_SYSCALL:
- if (n < 0)
+ if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
if (result_errno == EPIPE ||
@@ -308,7 +308,13 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
n = 0;
break;
case SSL_ERROR_SYSCALL:
- if (n < 0)
+
+ /*
+ * If errno is still zero then assume it's a read EOF situation,
+ * and report EOF. (This seems possible because SSL_write can
+ * also do reads.)
+ */
+ if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
if (result_errno == EPIPE || result_errno == ECONNRESET)
@@ -1523,11 +1529,12 @@ open_client_SSL(PGconn *conn)
* was using the system CA pool. For other errors, log
* them using the normal SYSCALL logging.
*/
- if (!save_errno && vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY &&
+ if (save_errno == 0 &&
+ vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY &&
strcmp(conn->sslrootcert, "system") == 0)
libpq_append_conn_error(conn, "SSL error: certificate verify failed: %s",
X509_verify_cert_error_string(vcode));
- else if (r == -1)
+ else if (r == -1 && save_errno != 0)
libpq_append_conn_error(conn, "SSL SYSCALL error: %s",
SOCK_STRERROR(save_errno, sebuf, sizeof(sebuf)));
else
@@ -1834,11 +1841,7 @@ PQsslAttribute(PGconn *conn, const char *attribute_name)
* to retry; do we need to adopt their logic for that?
*/
-#ifndef HAVE_BIO_GET_DATA
-#define BIO_get_data(bio) (bio->ptr)
-#define BIO_set_data(bio, data) (bio->ptr = data)
-#endif
-
+/* protected by ssl_config_mutex */
static BIO_METHOD *my_bio_methods;
static int
@@ -1846,7 +1849,7 @@ my_sock_read(BIO *h, char *buf, int size)
{
int res;
- res = pqsecure_raw_read((PGconn *) BIO_get_data(h), buf, size);
+ res = pqsecure_raw_read((PGconn *) BIO_get_app_data(h), buf, size);
BIO_clear_retry_flags(h);
if (res < 0)
{
@@ -1876,7 +1879,7 @@ my_sock_write(BIO *h, const char *buf, int size)
{
int res;
- res = pqsecure_raw_write((PGconn *) BIO_get_data(h), buf, size);
+ res = pqsecure_raw_write((PGconn *) BIO_get_app_data(h), buf, size);
BIO_clear_retry_flags(h);
if (res < 0)
{
@@ -1904,6 +1907,15 @@ my_sock_write(BIO *h, const char *buf, int size)
static BIO_METHOD *
my_BIO_s_socket(void)
{
+ BIO_METHOD *res;
+
+#ifdef ENABLE_THREAD_SAFETY
+ if (pthread_mutex_lock(&ssl_config_mutex))
+ return NULL;
+#endif
+
+ res = my_bio_methods;
+
if (!my_bio_methods)
{
BIO_METHOD *biom = (BIO_METHOD *) BIO_s_socket();
@@ -1912,39 +1924,58 @@ my_BIO_s_socket(void)
my_bio_index = BIO_get_new_index();
if (my_bio_index == -1)
- return NULL;
+ goto err;
my_bio_index |= (BIO_TYPE_DESCRIPTOR | BIO_TYPE_SOURCE_SINK);
- my_bio_methods = BIO_meth_new(my_bio_index, "libpq socket");
- if (!my_bio_methods)
- return NULL;
+ res = BIO_meth_new(my_bio_index, "libpq socket");
+ if (!res)
+ goto err;
/*
* As of this writing, these functions never fail. But check anyway,
* like OpenSSL's own examples do.
*/
- if (!BIO_meth_set_write(my_bio_methods, my_sock_write) ||
- !BIO_meth_set_read(my_bio_methods, my_sock_read) ||
- !BIO_meth_set_gets(my_bio_methods, BIO_meth_get_gets(biom)) ||
- !BIO_meth_set_puts(my_bio_methods, BIO_meth_get_puts(biom)) ||
- !BIO_meth_set_ctrl(my_bio_methods, BIO_meth_get_ctrl(biom)) ||
- !BIO_meth_set_create(my_bio_methods, BIO_meth_get_create(biom)) ||
- !BIO_meth_set_destroy(my_bio_methods, BIO_meth_get_destroy(biom)) ||
- !BIO_meth_set_callback_ctrl(my_bio_methods, BIO_meth_get_callback_ctrl(biom)))
+ if (!BIO_meth_set_write(res, my_sock_write) ||
+ !BIO_meth_set_read(res, my_sock_read) ||
+ !BIO_meth_set_gets(res, BIO_meth_get_gets(biom)) ||
+ !BIO_meth_set_puts(res, BIO_meth_get_puts(biom)) ||
+ !BIO_meth_set_ctrl(res, BIO_meth_get_ctrl(biom)) ||
+ !BIO_meth_set_create(res, BIO_meth_get_create(biom)) ||
+ !BIO_meth_set_destroy(res, BIO_meth_get_destroy(biom)) ||
+ !BIO_meth_set_callback_ctrl(res, BIO_meth_get_callback_ctrl(biom)))
{
- BIO_meth_free(my_bio_methods);
- my_bio_methods = NULL;
- return NULL;
+ goto err;
}
#else
- my_bio_methods = malloc(sizeof(BIO_METHOD));
- if (!my_bio_methods)
- return NULL;
- memcpy(my_bio_methods, biom, sizeof(BIO_METHOD));
- my_bio_methods->bread = my_sock_read;
- my_bio_methods->bwrite = my_sock_write;
+ res = malloc(sizeof(BIO_METHOD));
+ if (!res)
+ goto err;
+ memcpy(res, biom, sizeof(BIO_METHOD));
+ res->bread = my_sock_read;
+ res->bwrite = my_sock_write;
#endif
}
- return my_bio_methods;
+
+ my_bio_methods = res;
+
+#ifdef ENABLE_THREAD_SAFETY
+ pthread_mutex_unlock(&ssl_config_mutex);
+#endif
+
+ return res;
+
+err:
+#ifdef HAVE_BIO_METH_NEW
+ if (res)
+ BIO_meth_free(res);
+#else
+ if (res)
+ free(res);
+#endif
+
+#ifdef ENABLE_THREAD_SAFETY
+ pthread_mutex_unlock(&ssl_config_mutex);
+#endif
+ return NULL;
}
/* This should exactly match OpenSSL's SSL_set_fd except for using my BIO */
@@ -1967,7 +1998,7 @@ my_SSL_set_fd(PGconn *conn, int fd)
SSLerr(SSL_F_SSL_SET_FD, ERR_R_BUF_LIB);
goto err;
}
- BIO_set_data(bio, conn);
+ BIO_set_app_data(bio, conn);
SSL_set_bio(conn->ssl, bio, bio);
BIO_set_fd(bio, fd, BIO_NOCLOSE);
diff --git a/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c b/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c
index 8069e38142..8444f19f08 100644
--- a/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c
+++ b/contrib/libs/libpq/src/interfaces/libpq/fe-secure.c
@@ -233,6 +233,8 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
int result_errno = 0;
char sebuf[PG_STRERROR_R_BUFLEN];
+ SOCK_ERRNO_SET(0);
+
n = recv(conn->sock, ptr, len, 0);
if (n < 0)
@@ -259,6 +261,11 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
"\tbefore or while processing the request.");
break;
+ case 0:
+ /* If errno didn't get set, treat it as regular EOF */
+ n = 0;
+ break;
+
default:
libpq_append_conn_error(conn, "could not receive data from server: %s",
SOCK_STRERROR(result_errno,
diff --git a/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h b/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h
index 0045f83cbf..a951f4992b 100644
--- a/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h
+++ b/contrib/libs/libpq/src/interfaces/libpq/libpq-int.h
@@ -585,8 +585,8 @@ struct pg_conn
int gss_SendLength; /* End of data available in gss_SendBuffer */
int gss_SendNext; /* Next index to send a byte from
* gss_SendBuffer */
- int gss_SendConsumed; /* Number of *unencrypted* bytes consumed
- * for current contents of gss_SendBuffer */
+ int gss_SendConsumed; /* Number of source bytes encrypted but
+ * not yet reported as sent */
char *gss_RecvBuffer; /* Received, encrypted data */
int gss_RecvLength; /* End of data available in gss_RecvBuffer */
char *gss_ResultBuffer; /* Decryption of data in gss_RecvBuffer */
@@ -705,7 +705,8 @@ extern void pqSaveMessageField(PGresult *res, char code,
extern void pqSaveParameterStatus(PGconn *conn, const char *name,
const char *value);
extern int pqRowProcessor(PGconn *conn, const char **errmsgp);
-extern void pqCommandQueueAdvance(PGconn *conn);
+extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery,
+ bool gotSync);
extern int PQsendQueryContinue(PGconn *conn, const char *query);
/* === in fe-protocol3.c === */
diff --git a/contrib/libs/libpq/src/port/pg_config_paths.h b/contrib/libs/libpq/src/port/pg_config_paths.h
index 2e2db6a40c..ba4db149b6 100644
--- a/contrib/libs/libpq/src/port/pg_config_paths.h
+++ b/contrib/libs/libpq/src/port/pg_config_paths.h
@@ -1,12 +1,12 @@
-#define PGBINDIR "/var/empty/postgresql-16.1/bin"
-#define PGSHAREDIR "/var/empty/postgresql-16.1/share"
+#define PGBINDIR "/var/empty/postgresql-16.2/bin"
+#define PGSHAREDIR "/var/empty/postgresql-16.2/share"
#define SYSCONFDIR "/etc/postgresql"
-#define INCLUDEDIR "/var/empty/postgresql-16.1/include"
-#define PKGINCLUDEDIR "/var/empty/postgresql-16.1/include"
-#define INCLUDEDIRSERVER "/var/empty/postgresql-16.1/include/server"
+#define INCLUDEDIR "/var/empty/postgresql-16.2/include"
+#define PKGINCLUDEDIR "/var/empty/postgresql-16.2/include"
+#define INCLUDEDIRSERVER "/var/empty/postgresql-16.2/include/server"
#define LIBDIR "/var/empty/tmp/out/lib"
#define PKGLIBDIR "/var/empty/tmp/out/lib/postgresql"
-#define LOCALEDIR "/var/empty/postgresql-16.1/share/locale"
-#define DOCDIR "/var/empty/postgresql-16.1/share/doc/"
-#define HTMLDIR "/var/empty/postgresql-16.1/share/doc/"
-#define MANDIR "/var/empty/postgresql-16.1/share/man"
+#define LOCALEDIR "/var/empty/postgresql-16.2/share/locale"
+#define DOCDIR "/var/empty/postgresql-16.2/share/doc/"
+#define HTMLDIR "/var/empty/postgresql-16.2/share/doc/"
+#define MANDIR "/var/empty/postgresql-16.2/share/man"
diff --git a/contrib/libs/libpq/ya.make b/contrib/libs/libpq/ya.make
index d7acd293f3..66923a8a11 100644
--- a/contrib/libs/libpq/ya.make
+++ b/contrib/libs/libpq/ya.make
@@ -13,9 +13,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(16.1)
+VERSION(16.2)
-ORIGINAL_SOURCE(https://github.com/postgres/postgres/archive/REL_16_1.tar.gz)
+ORIGINAL_SOURCE(https://github.com/postgres/postgres/archive/REL_16_2.tar.gz)
PEERDIR(
contrib/libs/libc_compat
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA
index 01f8ae0a0a..21f727a546 100644
--- a/contrib/python/hypothesis/py3/.dist-info/METADATA
+++ b/contrib/python/hypothesis/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: hypothesis
-Version: 6.98.0
+Version: 6.98.2
Summary: A library for property-based testing
Home-page: https://hypothesis.works
Author: David R. MacIver and Zac Hatfield-Dodds
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
index bc850254b9..1f56d2ccf3 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
@@ -30,6 +30,7 @@ from typing import (
Set,
Tuple,
Type,
+ TypedDict,
TypeVar,
Union,
)
@@ -163,6 +164,8 @@ NASTY_FLOATS.extend([-x for x in NASTY_FLOATS])
FLOAT_INIT_LOGIC_CACHE = LRUReusedCache(4096)
+DRAW_STRING_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
+
class Example:
"""Examples track the hierarchical structure of draws from the byte stream,
@@ -794,6 +797,34 @@ global_test_counter = 0
MAX_DEPTH = 100
+class IntegerKWargs(TypedDict):
+ min_value: Optional[int]
+ max_value: Optional[int]
+ weights: Optional[Sequence[float]]
+ shrink_towards: int
+
+
+class FloatKWargs(TypedDict):
+ min_value: float
+ max_value: float
+ allow_nan: bool
+ smallest_nonzero_magnitude: float
+
+
+class StringKWargs(TypedDict):
+ intervals: IntervalSet
+ min_size: int
+ max_size: Optional[int]
+
+
+class BytesKWargs(TypedDict):
+ size: int
+
+
+class BooleanKWargs(TypedDict):
+ p: float
+
+
class DataObserver:
"""Observer class for recording the behaviour of a
ConjectureData object, primarily used for tracking
@@ -810,18 +841,34 @@ class DataObserver:
Note that this is called after ``freeze`` has completed.
"""
- def draw_bits(self, n_bits: int, *, forced: bool, value: int) -> None:
- """Called when ``draw_bits`` is called on on the
- observed ``ConjectureData``.
- * ``n_bits`` is the number of bits drawn.
- * ``forced`` is True if the corresponding
- draw was forced or ``False`` otherwise.
- * ``value`` is the result that ``draw_bits`` returned.
- """
-
def kill_branch(self) -> None:
"""Mark this part of the tree as not worth re-exploring."""
+ def draw_integer(
+ self, value: int, *, was_forced: bool, kwargs: IntegerKWargs
+ ) -> None:
+ pass
+
+ def draw_float(
+ self, value: float, *, was_forced: bool, kwargs: FloatKWargs
+ ) -> None:
+ pass
+
+ def draw_string(
+ self, value: str, *, was_forced: bool, kwargs: StringKWargs
+ ) -> None:
+ pass
+
+ def draw_bytes(
+ self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs
+ ) -> None:
+ pass
+
+ def draw_boolean(
+ self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs
+ ) -> None:
+ pass
+
@dataclass_transform()
@attr.s(slots=True)
@@ -995,7 +1042,7 @@ class PrimitiveProvider:
assert min_value is not None
assert max_value is not None
- sampler = Sampler(weights)
+ sampler = Sampler(weights, observe=False)
gap = max_value - shrink_towards
forced_idx = None
@@ -1023,7 +1070,7 @@ class PrimitiveProvider:
probe = shrink_towards + self._draw_unbounded_integer(
forced=None if forced is None else forced - shrink_towards
)
- self._cd.stop_example(discard=max_value < probe)
+ self._cd.stop_example()
return probe
if max_value is None:
@@ -1034,7 +1081,7 @@ class PrimitiveProvider:
probe = shrink_towards + self._draw_unbounded_integer(
forced=None if forced is None else forced - shrink_towards
)
- self._cd.stop_example(discard=probe < min_value)
+ self._cd.stop_example()
return probe
return self._draw_bounded_integer(
@@ -1091,7 +1138,7 @@ class PrimitiveProvider:
assert pos_clamper is not None
clamped = pos_clamper(result)
if clamped != result and not (math.isnan(result) and allow_nan):
- self._cd.stop_example(discard=True)
+ self._cd.stop_example()
self._cd.start_example(DRAW_FLOAT_LABEL)
self._draw_float(forced=clamped)
result = clamped
@@ -1113,7 +1160,7 @@ class PrimitiveProvider:
forced: Optional[str] = None,
) -> str:
if max_size is None:
- max_size = 10**10 # "arbitrarily large"
+ max_size = DRAW_STRING_DEFAULT_MAX_SIZE
assert forced is None or min_size <= len(forced) <= max_size
@@ -1129,6 +1176,7 @@ class PrimitiveProvider:
max_size=max_size,
average_size=average_size,
forced=None if forced is None else len(forced),
+ observe=False,
)
while elements.more():
forced_i: Optional[int] = None
@@ -1264,7 +1312,7 @@ class PrimitiveProvider:
probe = self._cd.draw_bits(
bits, forced=None if forced is None else abs(forced - center)
)
- self._cd.stop_example(discard=probe > gap)
+ self._cd.stop_example()
if above:
result = center + probe
@@ -1356,7 +1404,7 @@ class PrimitiveProvider:
]
nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)]
weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats)
- sampler = Sampler(weights) if nasty_floats else None
+ sampler = Sampler(weights, observe=False) if nasty_floats else None
pos_clamper = neg_clamper = None
if sign_aware_lte(0.0, max_value):
@@ -1465,6 +1513,19 @@ class ConjectureData:
", frozen" if self.frozen else "",
)
+ # A bit of explanation of the `observe` argument in our draw_* functions.
+ #
+ # There are two types of draws: sub-ir and super-ir. For instance, some ir
+ # nodes use `many`, which in turn calls draw_boolean. But some strategies
+ # also use many, at the super-ir level. We don't want to write sub-ir draws
+ # to the DataTree (and consequently use them when computing novel prefixes),
+ # since they are fully recorded by writing the ir node itself.
+ # But super-ir draws are not included in the ir node, so we do want to write
+ # these to the tree.
+ #
+ # `observe` formalizes this distinction. The draw will only be written to
+ # the DataTree if observe is True.
+
def draw_integer(
self,
min_value: Optional[int] = None,
@@ -1474,6 +1535,7 @@ class ConjectureData:
weights: Optional[Sequence[float]] = None,
shrink_towards: int = 0,
forced: Optional[int] = None,
+ observe: bool = True,
) -> int:
# Validate arguments
if weights is not None:
@@ -1494,13 +1556,18 @@ class ConjectureData:
if forced is not None and max_value is not None:
assert forced <= max_value
- return self.provider.draw_integer(
- min_value=min_value,
- max_value=max_value,
- weights=weights,
- shrink_towards=shrink_towards,
- forced=forced,
- )
+ kwargs: IntegerKWargs = {
+ "min_value": min_value,
+ "max_value": max_value,
+ "weights": weights,
+ "shrink_towards": shrink_towards,
+ }
+ value = self.provider.draw_integer(**kwargs, forced=forced)
+ if observe:
+ self.observer.draw_integer(
+ value, was_forced=forced is not None, kwargs=kwargs
+ )
+ return value
def draw_float(
self,
@@ -1514,6 +1581,7 @@ class ConjectureData:
# width: Literal[16, 32, 64] = 64,
# exclude_min and exclude_max handled higher up,
forced: Optional[float] = None,
+ observe: bool = True,
) -> float:
assert smallest_nonzero_magnitude > 0
assert not math.isnan(min_value)
@@ -1523,13 +1591,18 @@ class ConjectureData:
assert allow_nan or not math.isnan(forced)
assert math.isnan(forced) or min_value <= forced <= max_value
- return self.provider.draw_float(
- min_value=min_value,
- max_value=max_value,
- allow_nan=allow_nan,
- smallest_nonzero_magnitude=smallest_nonzero_magnitude,
- forced=forced,
- )
+ kwargs: FloatKWargs = {
+ "min_value": min_value,
+ "max_value": max_value,
+ "allow_nan": allow_nan,
+ "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
+ }
+ value = self.provider.draw_float(**kwargs, forced=forced)
+ if observe:
+ self.observer.draw_float(
+ value, kwargs=kwargs, was_forced=forced is not None
+ )
+ return value
def draw_string(
self,
@@ -1538,19 +1611,44 @@ class ConjectureData:
min_size: int = 0,
max_size: Optional[int] = None,
forced: Optional[str] = None,
+ observe: bool = True,
) -> str:
assert forced is None or min_size <= len(forced)
- return self.provider.draw_string(
- intervals, min_size=min_size, max_size=max_size, forced=forced
- )
- def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
+ kwargs: StringKWargs = {
+ "intervals": intervals,
+ "min_size": min_size,
+ "max_size": max_size,
+ }
+ value = self.provider.draw_string(**kwargs, forced=forced)
+ if observe:
+ self.observer.draw_string(
+ value, kwargs=kwargs, was_forced=forced is not None
+ )
+ return value
+
+ def draw_bytes(
+ self,
+ # TODO move to min_size and max_size here.
+ size: int,
+ *,
+ forced: Optional[bytes] = None,
+ observe: bool = True,
+ ) -> bytes:
assert forced is None or len(forced) == size
assert size >= 0
- return self.provider.draw_bytes(size, forced=forced)
+ kwargs: BytesKWargs = {"size": size}
+ value = self.provider.draw_bytes(**kwargs, forced=forced)
+ if observe:
+ self.observer.draw_bytes(
+ value, kwargs=kwargs, was_forced=forced is not None
+ )
+ return value
- def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
+ def draw_boolean(
+ self, p: float = 0.5, *, forced: Optional[bool] = None, observe: bool = True
+ ) -> bool:
# Internally, we treat probabilities lower than 1 / 2**64 as
# unconditionally false.
#
@@ -1561,7 +1659,13 @@ class ConjectureData:
if forced is False:
assert p < (1 - 2 ** (-64))
- return self.provider.draw_boolean(p, forced=forced)
+ kwargs: BooleanKWargs = {"p": p}
+ value = self.provider.draw_boolean(**kwargs, forced=forced)
+ if observe:
+ self.observer.draw_boolean(
+ value, kwargs=kwargs, was_forced=forced is not None
+ )
+ return value
def as_result(self) -> Union[ConjectureResult, _Overrun]:
"""Convert the result of running this test into
@@ -1735,9 +1839,15 @@ class ConjectureData:
self.buffer = bytes(self.buffer)
self.observer.conclude_test(self.status, self.interesting_origin)
- def choice(self, values: Sequence[T], *, forced: Optional[T] = None) -> T:
+ def choice(
+ self,
+ values: Sequence[T],
+ *,
+ forced: Optional[T] = None,
+ observe: bool = True,
+ ) -> T:
forced_i = None if forced is None else values.index(forced)
- i = self.draw_integer(0, len(values) - 1, forced=forced_i)
+ i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe)
return values[i]
def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
@@ -1774,7 +1884,6 @@ class ConjectureData:
buf = bytes(buf)
result = int_from_bytes(buf)
- self.observer.draw_bits(n, forced=forced is not None, value=result)
self.__example_record.draw_bits(n, forced)
initial = self.index
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
index d82ed3ca67..a9a6e5b196 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
@@ -8,17 +8,38 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
+import itertools
+import math
+from typing import TYPE_CHECKING, List, Literal, Optional, Union
+
import attr
from hypothesis.errors import Flaky, HypothesisException, StopTest
+from hypothesis.internal import floats as flt
from hypothesis.internal.compat import int_to_bytes
from hypothesis.internal.conjecture.data import (
+ BooleanKWargs,
+ BytesKWargs,
ConjectureData,
DataObserver,
+ FloatKWargs,
+ IntegerKWargs,
Status,
- bits_to_bytes,
+ StringKWargs,
)
-from hypothesis.internal.conjecture.junkdrawer import IntList
+from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float
+
+if TYPE_CHECKING:
+ from typing import TypeAlias
+else:
+ TypeAlias = object
+
+IRType: TypeAlias = Union[int, str, bool, float, bytes]
+IRKWargsType: TypeAlias = Union[
+ IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
+]
+# this would be "IRTypeType", but that's just confusing.
+IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
class PreviouslyUnseenBehaviour(HypothesisException):
@@ -51,12 +72,15 @@ class Branch:
"""Represents a transition where multiple choices can be made as to what
to drawn."""
- bit_length = attr.ib()
+ kwargs = attr.ib()
+ ir_type = attr.ib()
children = attr.ib(repr=False)
@property
def max_children(self):
- return 1 << self.bit_length
+ max_children = compute_max_children(self.ir_type, self.kwargs)
+ assert max_children > 0
+ return max_children
@attr.s(slots=True, frozen=True)
@@ -67,61 +91,276 @@ class Conclusion:
interesting_origin = attr.ib()
+# The number of max children where, beyond this, it is practically impossible
+# for hypothesis to saturate / explore all children nodes in a reasonable time
+# frame. We use this to bail out of expensive max children computations early,
+# where the numbers involved are so large that we know they will be larger than
+# this number.
+#
+# Note that it's ok for us to underestimate the number of max children of a node
+# by using this. We just may think the node is exhausted when in fact it has more
+# possible children to be explored. This has the potential to finish generation
+# early due to exhausting the entire tree, but that is quite unlikely: (1) the
+# number of examples would have to be quite high, and (2) the tree would have to
+# contain only one or two nodes, or generate_novel_prefix would simply switch to
+# exploring another non-exhausted node.
+#
+# Also note that we may sometimes compute max children above this value. In other
+# words, this is *not* a hard maximum on the computed max children. It's the point
+# where further computation is not beneficial - but sometimes doing that computation
+# unconditionally is cheaper than estimating against this value.
+#
+# The one case where this may be detrimental is fuzzing, where the throughput of
+# examples is so high that it really may saturate important nodes. We'll cross
+# that bridge when we come to it.
+MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000
+
+
+def compute_max_children(ir_type, kwargs):
+ from hypothesis.internal.conjecture.data import DRAW_STRING_DEFAULT_MAX_SIZE
+
+ if ir_type == "integer":
+ min_value = kwargs["min_value"]
+ max_value = kwargs["max_value"]
+ weights = kwargs["weights"]
+
+ if min_value is None and max_value is None:
+ # full 128 bit range.
+ return 2**128 - 1
+ if min_value is not None and max_value is not None:
+ # count between min/max value.
+ n = max_value - min_value + 1
+ # remove any values with a zero probability of being drawn (weight=0).
+ if weights is not None:
+ n -= sum(weight == 0 for weight in weights)
+ return n
+
+ # hard case: only one bound was specified. Here we probe either upwards
+ # or downwards with our full 128 bit generation, but only half of these
+ # (plus one for the case of generating zero) result in a probe in the
+ # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127
+ assert (min_value is None) ^ (max_value is None)
+ return 2**127
+ elif ir_type == "boolean":
+ p = kwargs["p"]
+ # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice.
+ if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)):
+ return 1
+ return 2
+ elif ir_type == "bytes":
+ return 2 ** (8 * kwargs["size"])
+ elif ir_type == "string":
+ min_size = kwargs["min_size"]
+ max_size = kwargs["max_size"]
+ intervals = kwargs["intervals"]
+
+ if max_size is None:
+ max_size = DRAW_STRING_DEFAULT_MAX_SIZE
+
+ if len(intervals) == 0:
+ # Special-case the empty alphabet to avoid an error in math.log(0).
+ # Only possibility is the empty string.
+ return 1
+
+ # We want to estimate if we're going to have more children than
+ # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
+ # extremely expensive pow. We'll check if the number of strings in
+ # the largest string size alone is enough to put us over this limit.
+ # We'll also employ a trick of estimating against log, which is cheaper
+ # than computing a pow.
+ #
+ # x = max_size
+ # y = len(intervals)
+ # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
+ #
+ # x**y > n
+ # <=> log(x**y) > log(n)
+ # <=> y * log(x) > log(n)
+
+ # avoid math.log(1) == 0 and incorrectly failing the below estimate,
+ # even when we definitely are too large.
+ if len(intervals) == 1:
+ definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
+ else:
+ definitely_too_large = max_size * math.log(len(intervals)) > math.log(
+ MAX_CHILDREN_EFFECTIVELY_INFINITE
+ )
+
+ if definitely_too_large:
+ return MAX_CHILDREN_EFFECTIVELY_INFINITE
+
+ # number of strings of length k, for each k in [min_size, max_size].
+ return sum(len(intervals) ** k for k in range(min_size, max_size + 1))
+
+ elif ir_type == "float":
+ return count_between_floats(kwargs["min_value"], kwargs["max_value"])
+
+ raise NotImplementedError(f"unhandled ir_type {ir_type}")
+
+
+# In theory, this is a strict superset of the functionality of compute_max_children;
+#
+# assert len(all_children(ir_type, kwargs)) == compute_max_children(ir_type, kwargs)
+#
+# In practice, we maintain two distinct implementations for efficiency and space
+# reasons. If you just need the number of children, it is cheaper to use
+# compute_max_children than to reify the list of children (only to immediately
+# throw it away).
+def all_children(ir_type, kwargs):
+ if ir_type == "integer":
+ min_value = kwargs["min_value"]
+ max_value = kwargs["max_value"]
+ weights = kwargs["weights"]
+ # it's a bit annoying (but completely feasible) to implement the cases
+ # other than "both sides bounded" here. We haven't needed to yet because
+ # in practice we don't struggle with unbounded integer generation.
+ assert min_value is not None
+ assert max_value is not None
+
+ if weights is None:
+ yield from range(min_value, max_value + 1)
+ else:
+ # skip any values with a corresponding weight of 0 (can never be drawn).
+ for weight, n in zip(weights, range(min_value, max_value + 1)):
+ if weight == 0:
+ continue
+ yield n
+
+ if ir_type == "boolean":
+ p = kwargs["p"]
+ if p <= 2 ** (-64):
+ yield False
+ elif p >= (1 - 2 ** (-64)):
+ yield True
+ else:
+ yield from [False, True]
+ if ir_type == "bytes":
+ size = kwargs["size"]
+ yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
+ if ir_type == "string":
+ min_size = kwargs["min_size"]
+ max_size = kwargs["max_size"]
+ intervals = kwargs["intervals"]
+
+ # written unidiomatically in order to handle the case of max_size=inf.
+ size = min_size
+ while size <= max_size:
+ for ords in itertools.product(intervals, repeat=size):
+ yield "".join(chr(n) for n in ords)
+ size += 1
+ if ir_type == "float":
+
+ def floats_between(a, b):
+ for n in range(float_to_int(a), float_to_int(b) + 1):
+ yield int_to_float(n)
+
+ min_value = kwargs["min_value"]
+ max_value = kwargs["max_value"]
+
+ if flt.is_negative(min_value):
+ if flt.is_negative(max_value):
+ # if both are negative, have to invert order
+ yield from floats_between(max_value, min_value)
+ else:
+ yield from floats_between(-0.0, min_value)
+ yield from floats_between(0.0, max_value)
+ else:
+ yield from floats_between(min_value, max_value)
+
+
@attr.s(slots=True)
class TreeNode:
- """Node in a tree that corresponds to previous interactions with
- a ``ConjectureData`` object according to some fixed test function.
-
- This is functionally a variant patricia trie.
- See https://en.wikipedia.org/wiki/Radix_tree for the general idea,
- but what this means in particular here is that we have a very deep
- but very lightly branching tree and rather than store this as a fully
- recursive structure we flatten prefixes and long branches into
- lists. This significantly compacts the storage requirements.
-
- A single ``TreeNode`` corresponds to a previously seen sequence
- of calls to ``ConjectureData`` which we have never seen branch,
- followed by a ``transition`` which describes what happens next.
"""
+ A node, or collection of directly descended nodes, in a DataTree.
+
+ We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree),
+ which means that nodes that are the only child of their parent are collapsed
+ into their parent to save space.
+
+ Conceptually, you can unfold a single TreeNode storing n values in its lists
+ into a sequence of n nodes, each a child of the last. In other words,
+ (kwargs[i], values[i], ir_types[i]) corresponds to the single node at index
+ i.
+
+ Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted
+ via the radix tree definition), then its lists will be empty and it will
+ store a `Branch` representing that choce in its `transition`.
+
+ Examples
+ --------
+
+ Consider sequentially drawing a boolean, then an integer.
+
+ data.draw_boolean()
+ data.draw_integer(1, 3)
+
+ If we draw True and then 2, the tree may conceptually look like this.
+
+ ┌──────┐
+ │ root │
+ └──┬───┘
+ ┌──┴───┐
+ │ True │
+ └──┬───┘
+ ┌──┴───┐
+ │ 2 │
+ └──────┘
+
+ But since 2 is the only child of True, we will compact these nodes and store
+ them as a single TreeNode.
+
+ ┌──────┐
+ │ root │
+ └──┬───┘
+ ┌────┴──────┐
+ │ [True, 2] │
+ └───────────┘
+
+ If we then draw True and then 3, True will have multiple children and we
+ can no longer store this compacted representation. We would call split_at(0)
+ on the [True, 2] node to indicate that we need to add a choice at 0-index
+ node (True).
- # Records the previous sequence of calls to ``data.draw_bits``,
- # with the ``n_bits`` argument going in ``bit_lengths`` and the
- # values seen in ``values``. These should always have the same
- # length.
- bit_lengths = attr.ib(factory=IntList)
- values = attr.ib(factory=IntList)
-
- # The indices of of the calls to ``draw_bits`` that we have stored
- # where ``forced`` is not None. Stored as None if no indices
- # have been forced, purely for space saving reasons (we force
- # quite rarely).
- __forced = attr.ib(default=None, init=False)
-
- # What happens next after observing this sequence of calls.
- # Either:
+ ┌──────┐
+ │ root │
+ └──┬───┘
+ ┌──┴───┐
+ ┌─┤ True ├─┐
+ │ └──────┘ │
+ ┌─┴─┐ ┌─┴─┐
+ │ 2 │ │ 3 │
+ └───┘ └───┘
+ """
+
+ # The kwargs, value, and ir_types of the nodes stored here. These always
+ # have the same length. The values at index i belong to node i.
+ kwargs: List[IRKWargsType] = attr.ib(factory=list)
+ values: List[IRType] = attr.ib(factory=list)
+ ir_types: List[IRLiteralType] = attr.ib(factory=list)
+
+ # The indices of nodes which had forced values.
#
- # * ``None``, indicating we don't know yet.
- # * A ``Branch`` object indicating that there is a ``draw_bits``
- # call that we have seen take multiple outcomes there.
- # * A ``Conclusion`` object indicating that ``conclude_test``
- # was called here.
- transition = attr.ib(default=None)
-
- # A tree node is exhausted if every possible sequence of
- # draws below it has been explored. We store this information
- # on a field and update it when performing operations that
- # could change the answer.
+ # Stored as None if no indices have been forced, purely for space saving
+ # reasons (we force quite rarely).
+ __forced: Optional[set] = attr.ib(default=None, init=False)
+
+ # What happens next after drawing these nodes. (conceptually, "what is the
+ # child/children of the last node stored here").
#
- # A node may start exhausted, e.g. because it it leads
- # immediately to a conclusion, but can only go from
- # non-exhausted to exhausted when one of its children
- # becomes exhausted or it is marked as a conclusion.
+ # One of:
+ # - None (we don't know yet)
+ # - Branch (we have seen multiple possible outcomes here)
+ # - Conclusion (ConjectureData.conclude_test was called here)
+ # - Killed (this branch is valid and may even have children, but should not
+ # be explored when generating novel prefixes)
+ transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None)
+
+ # A tree node is exhausted if every possible sequence of draws below it has
+ # been explored. We only update this when performing operations that could
+ # change the answer.
#
- # Therefore we only need to check whether we need to update
- # this field when the node is first created in ``split_at``
- # or when we have walked a path through this node to a
- # conclusion in ``TreeRecordingObserver``.
- is_exhausted = attr.ib(default=False, init=False)
+ # See also TreeNode.check_exhausted.
+ is_exhausted: bool = attr.ib(default=False, init=False)
@property
def forced(self):
@@ -130,17 +369,21 @@ class TreeNode:
return self.__forced
def mark_forced(self, i):
- """Note that the value at index ``i`` was forced."""
+ """
+ Note that the draw at node i was forced.
+ """
assert 0 <= i < len(self.values)
if self.__forced is None:
self.__forced = set()
self.__forced.add(i)
def split_at(self, i):
- """Splits the tree so that it can incorporate
- a decision at the ``draw_bits`` call corresponding
- to position ``i``, or raises ``Flaky`` if that was
- meant to be a forced node."""
+ """
+ Splits the tree so that it can incorporate a decision at the draw call
+ corresponding to the node at position i.
+
+ Raises Flaky if node i was forced.
+ """
if i in self.forced:
inconsistent_generation()
@@ -150,26 +393,58 @@ class TreeNode:
key = self.values[i]
child = TreeNode(
- bit_lengths=self.bit_lengths[i + 1 :],
+ ir_types=self.ir_types[i + 1 :],
+ kwargs=self.kwargs[i + 1 :],
values=self.values[i + 1 :],
transition=self.transition,
)
- self.transition = Branch(bit_length=self.bit_lengths[i], children={key: child})
+ self.transition = Branch(
+ kwargs=self.kwargs[i], ir_type=self.ir_types[i], children={key: child}
+ )
if self.__forced is not None:
child.__forced = {j - i - 1 for j in self.__forced if j > i}
self.__forced = {j for j in self.__forced if j < i}
child.check_exhausted()
+ del self.ir_types[i:]
del self.values[i:]
- del self.bit_lengths[i:]
- assert len(self.values) == len(self.bit_lengths) == i
+ del self.kwargs[i:]
+ assert len(self.values) == len(self.kwargs) == len(self.ir_types) == i
def check_exhausted(self):
- """Recalculates ``self.is_exhausted`` if necessary then returns
- it."""
+ """
+ Recalculates is_exhausted if necessary, and then returns it.
+
+ A node is exhausted if:
+ - Its transition is Conclusion or Killed
+ - It has the maximum number of children (i.e. we have found all of its
+ possible children), and all its children are exhausted
+
+ Therefore, we only need to compute this for a node when:
+ - We first create it in split_at
+ - We set its transition to either Conclusion or Killed
+ (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch)
+ - We exhaust any of its children
+ """
+
if (
+ # a node cannot go from is_exhausted -> not is_exhausted.
not self.is_exhausted
- and len(self.forced) == len(self.values)
+ # if we don't know what happens after this node, we don't have
+ # enough information to tell if it's exhausted.
and self.transition is not None
+ # if there are still any nodes left which are the only child of their
+ # parent (len(self.values) > 0), then this TreeNode must be not
+ # exhausted, unless all of those nodes were forced.
+ #
+ # This is because we maintain an invariant of only adding nodes to
+ # DataTree which have at least 2 possible values, so we know that if
+ # they do not have any siblings that we still have more choices to
+ # discover.
+ #
+ # (We actually *do* currently add single-valued nodes to the tree,
+ # but immediately split them into a transition to avoid falsifying
+ # this check. this is a bit of a hack.)
+ and len(self.forced) == len(self.values)
):
if isinstance(self.transition, (Conclusion, Killed)):
self.is_exhausted = True
@@ -181,16 +456,159 @@ class TreeNode:
class DataTree:
- """Tracks the tree structure of a collection of ConjectureData
- objects, for use in ConjectureRunner."""
+ """
+ A DataTree tracks the structured history of draws in some test function,
+ across multiple ConjectureData objects.
+
+ This information is used by ConjectureRunner to generate novel prefixes of
+ this tree (see generate_novel_prefix). A novel prefix is a sequence of draws
+ which the tree has not seen before, and therefore the ConjectureRunner has
+ not generated as an input to the test function before.
+
+ DataTree tracks the following:
+
+ - Draws, at the ir level (with some ir_type, e.g. "integer")
+ - ConjectureData.draw_integer()
+ - ConjectureData.draw_float()
+ - ConjectureData.draw_string()
+ - ConjectureData.draw_boolean()
+ - ConjectureData.draw_bytes()
+ - Test conclusions (with some Status, e.g. Status.VALID)
+ - ConjectureData.conclude_test()
+
+ A DataTree is — surprise — a *tree*. A node in this tree is either a draw with
+ some value, a test conclusion with some Status, or a special `Killed` value,
+ which denotes that further draws may exist beyond this node but should not be
+ considered worth exploring when generating novel prefixes. A node is a leaf
+ iff it is a conclusion or Killed.
+
+ A branch from node A to node B indicates that we have previously seen some
+ sequence (a, b) of draws, where a and b are the values in nodes A and B.
+ Similar intuition holds for conclusion and Killed nodes.
+
+ Examples
+ --------
+
+ To see how a DataTree gets built through successive sets of draws, consider
+ the following code that calls through to some ConjecutreData object `data`.
+ The first call can be either True or False, and the second call can be any
+ integer in the range [1, 3].
+
+ data.draw_boolean()
+ data.draw_integer(1, 3)
+
+ To start, the corresponding DataTree object is completely empty.
+
+ ┌──────┐
+ │ root │
+ └──────┘
+
+ We happen to draw True and then 2 in the above code. The tree tracks this.
+ (2 also connects to a child Conclusion node with Status.VALID since it's the
+ final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.)
+
+ ┌──────┐
+ │ root │
+ └──┬───┘
+ ┌──┴───┐
+ │ True │
+ └──┬───┘
+ ┌──┴───┐
+ │ 2 │
+ └──────┘
+
+ This is a very boring tree so far! But now we happen to draw False and
+ then 1. This causes a split in the tree. Remember, DataTree tracks history
+ over all invocations of a function, not just one. The end goal is to know
+ what invocations haven't been tried yet, after all.
+
+ ┌──────┐
+ ┌───┤ root ├───┐
+ │ └──────┘ │
+ ┌──┴───┐ ┌─┴─────┐
+ │ True │ │ False │
+ └──┬───┘ └──┬────┘
+ ┌─┴─┐ ┌─┴─┐
+ │ 2 │ │ 1 │
+ └───┘ └───┘
+
+ If we were to ask DataTree for a novel prefix at this point, it might
+ generate any of (True, 1), (True, 3), (False, 2), or (False, 3).
+
+ Note that the novel prefix stops as soon as it generates a novel node. For
+ instance, if we had generated a novel prefix back when the tree was only
+ root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or
+ (False). But we could *not* have gotten (False, n), because both False and
+ n were novel at that point, and we stop at the first novel node — False.
+
+ I won't belabor this example. Here's what the tree looks like when fully
+ explored:
+
+ ┌──────┐
+ ┌──────┤ root ├──────┐
+ │ └──────┘ │
+ ┌──┴───┐ ┌─┴─────┐
+ ┌──┤ True ├──┐ ┌───┤ False ├──┐
+ │ └──┬───┘ │ │ └──┬────┘ │
+ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐
+ │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │
+ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘
+
+ You could imagine much more complicated trees than this arising in practice,
+ and indeed they do. In particular, the tree need not be balanced or 'nice'
+ like the tree above. For instance,
+
+ b = data.draw_boolean()
+ if b:
+ data.draw_integer(1, 3)
+
+ results in a tree with the entire right part lopped off, and False leading
+ straight to a conclusion node with Status.VALID. As another example,
+
+ n = data.draw_integers()
+ assume(n >= 3)
+ data.draw_string()
+
+ results in a tree with the 0, 1, and 2 nodes leading straight to a
+ conclusion node with Status.INVALID, and the rest branching off into all
+ the possibilities of draw_string.
+
+ Notes
+ -----
+
+ The above examples are slightly simplified and are intended to convey
+ intuition. In practice, there are some implementation details to be aware
+ of.
+
+ - In draw nodes, we store the kwargs used in addition to the value drawn.
+ E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5)
+ would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for
+ other kwargs omitted).
+
+ The kwargs parameters have the potential to change both the range of
+ possible outputs of a node, and the probability distribution within that
+ range, so we need to use these when drawing in DataTree as well. We draw
+ values using these kwargs when (1) generating a novel value for a node
+ and (2) choosing a random child when traversing the tree.
+
+ - For space efficiency, rather than tracking the full tree structure, we
+ store DataTree as a radix tree. This is conceptually equivalent (radix
+ trees can always be "unfolded" to the full tree) but it means the internal
+ representation may differ in practice.
+
+ See TreeNode for more information.
+ """
def __init__(self):
self.root = TreeNode()
+ self._children_cache = {}
@property
def is_exhausted(self):
- """Returns True if every possible node is dead and thus the language
- described must have been fully explored."""
+ """
+ Returns True if every node is exhausted, and therefore the tree has
+ been fully explored.
+ """
return self.root.is_exhausted
def generate_novel_prefix(self, random):
@@ -201,26 +619,43 @@ class DataTree:
for it to be uniform at random, but previous attempts to do that
have proven too expensive.
"""
+
assert not self.is_exhausted
novel_prefix = bytearray()
- def append_int(n_bits, value):
- novel_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))
+ def append_buf(buf):
+ novel_prefix.extend(buf)
current_node = self.root
while True:
assert not current_node.is_exhausted
- for i, (n_bits, value) in enumerate(
- zip(current_node.bit_lengths, current_node.values)
+ for i, (ir_type, kwargs, value) in enumerate(
+ zip(current_node.ir_types, current_node.kwargs, current_node.values)
):
if i in current_node.forced:
- append_int(n_bits, value)
+ if ir_type == "float":
+ value = int_to_float(value)
+ (_value, buf) = self._draw(
+ ir_type, kwargs, forced=value, random=random
+ )
+ append_buf(buf)
else:
+ attempts = 0
while True:
- k = random.getrandbits(n_bits)
- if k != value:
- append_int(n_bits, k)
+ if attempts <= 10:
+ (v, buf) = self._draw(ir_type, kwargs, random=random)
+ else:
+ (v, buf) = self._draw_from_cache(
+ ir_type, kwargs, key=id(current_node), random=random
+ )
+
+ if v != value:
+ append_buf(buf)
break
+ attempts += 1
+ self._reject_child(
+ ir_type, kwargs, child=v, key=id(current_node)
+ )
# We've now found a value that is allowed to
# vary, so what follows is not fixed.
return bytes(novel_prefix)
@@ -230,27 +665,37 @@ class DataTree:
return bytes(novel_prefix)
branch = current_node.transition
assert isinstance(branch, Branch)
- n_bits = branch.bit_length
- check_counter = 0
+ attempts = 0
while True:
- k = random.getrandbits(n_bits)
+ if attempts <= 10:
+ (v, buf) = self._draw(
+ branch.ir_type, branch.kwargs, random=random
+ )
+ else:
+ (v, buf) = self._draw_from_cache(
+ branch.ir_type, branch.kwargs, key=id(branch), random=random
+ )
try:
- child = branch.children[k]
+ child = branch.children[v]
except KeyError:
- append_int(n_bits, k)
+ append_buf(buf)
return bytes(novel_prefix)
if not child.is_exhausted:
- append_int(n_bits, k)
+ append_buf(buf)
current_node = child
break
- check_counter += 1
+ attempts += 1
+ self._reject_child(
+ branch.ir_type, branch.kwargs, child=v, key=id(branch)
+ )
+
# We don't expect this assertion to ever fire, but coverage
# wants the loop inside to run if you have branch checking
# on, hence the pragma.
assert ( # pragma: no cover
- check_counter != 1000
- or len(branch.children) < (2**n_bits)
+ attempts != 1000
+ or len(branch.children) < branch.max_children
or any(not v.is_exhausted for v in branch.children.values())
)
@@ -274,13 +719,22 @@ class DataTree:
or ``start_example`` as these are not currently recorded in the
tree. This will likely change in future."""
node = self.root
+
+ def draw(ir_type, kwargs, *, forced=None):
+ draw_func = getattr(data, f"draw_{ir_type}")
+ value = draw_func(**kwargs, forced=forced)
+
+ if ir_type == "float":
+ value = float_to_int(value)
+ return value
+
try:
while True:
- for i, (n_bits, previous) in enumerate(
- zip(node.bit_lengths, node.values)
+ for i, (ir_type, kwargs, previous) in enumerate(
+ zip(node.ir_types, node.kwargs, node.values)
):
- v = data.draw_bits(
- n_bits, forced=node.values[i] if i in node.forced else None
+ v = draw(
+ ir_type, kwargs, forced=previous if i in node.forced else None
)
if v != previous:
raise PreviouslyUnseenBehaviour
@@ -290,7 +744,7 @@ class DataTree:
elif node.transition is None:
raise PreviouslyUnseenBehaviour
elif isinstance(node.transition, Branch):
- v = data.draw_bits(node.transition.bit_length)
+ v = draw(node.transition.ir_type, node.transition.kwargs)
try:
node = node.transition.children[v]
except KeyError as err:
@@ -305,6 +759,97 @@ class DataTree:
def new_observer(self):
return TreeRecordingObserver(self)
+ def _draw(self, ir_type, kwargs, *, random, forced=None):
+ # we should possibly pull out BUFFER_SIZE to a common file to avoid this
+ # circular import.
+ from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
+ cd = ConjectureData(max_length=BUFFER_SIZE, prefix=b"", random=random)
+ draw_func = getattr(cd, f"draw_{ir_type}")
+
+ value = draw_func(**kwargs, forced=forced)
+ buf = cd.buffer
+
+ # using floats as keys into branch.children breaks things, because
+ # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are
+ # in fact distinct child branches.
+ # To distinguish floats here we'll use their bits representation. This
+ # entails some bookkeeping such that we're careful about when the
+ # float key is in its bits form (as a key into branch.children) and
+ # when it is in its float form (as a value we want to write to the
+ # buffer), and converting between the two forms as appropriate.
+ if ir_type == "float":
+ value = float_to_int(value)
+ return (value, buf)
+
+ def _get_children_cache(self, ir_type, kwargs, *, key):
+ # cache the state of the children generator per node/branch (passed as
+ # `key` here), such that we track which children we've already tried
+ # for this branch across draws.
+ # We take advantage of python generators here as one-way iterables,
+ # so each time we iterate we implicitly store our position in the
+ # children generator and don't re-draw children. `children` is the
+ # concrete list of children draw from the generator that we will work
+ # with. Whenever we need to top up this list, we will draw a new value
+ # from the generator.
+ if key not in self._children_cache:
+ generator = all_children(ir_type, kwargs)
+ children = []
+ rejected = set()
+ self._children_cache[key] = (generator, children, rejected)
+
+ return self._children_cache[key]
+
+ def _draw_from_cache(self, ir_type, kwargs, *, key, random):
+ (generator, children, rejected) = self._get_children_cache(
+ ir_type, kwargs, key=key
+ )
+ # Keep a stock of 100 potentially-valid children at all times.
+ # This number is chosen to balance memory/speed vs randomness. Ideally
+ # we would sample uniformly from all not-yet-rejected children, but
+ # computing and storing said children is not free.
+ # no-branch because coverage of the fall-through case here is a bit
+ # annoying.
+ if len(children) < 100: # pragma: no branch
+ for v in generator:
+ if ir_type == "float":
+ v = float_to_int(v)
+ if v in rejected:
+ continue
+ children.append(v)
+ if len(children) >= 100:
+ break
+
+ forced = random.choice(children)
+ if ir_type == "float":
+ forced = int_to_float(forced)
+ (value, buf) = self._draw(ir_type, kwargs, forced=forced, random=random)
+ return (value, buf)
+
+ def _reject_child(self, ir_type, kwargs, *, child, key):
+ (_generator, children, rejected) = self._get_children_cache(
+ ir_type, kwargs, key=key
+ )
+ rejected.add(child)
+ # we remove a child from the list of possible children *only* when it is
+ # rejected, and not when it is initially drawn in _draw_from_cache. The
+ # reason is that a child being drawn does not guarantee that child will
+ # be used in a way such that it is written back to the tree, so it needs
+ # to be available for future draws until we are certain it has been
+ # used.
+ #
+ # For instance, if we generated novel prefixes in a loop (but never used
+ # those prefixes to generate new values!) then we don't want to remove
+ # the drawn children from the available pool until they are actually
+ # used.
+ #
+ # This does result in a small inefficiency: we may draw a child,
+ # immediately use it (so we know it cannot be drawn again), but still
+ # wait to draw and reject it here, because DataTree cannot guarantee
+ # the drawn child has been used.
+ if child in children:
+ children.remove(child)
+
class TreeRecordingObserver(DataObserver):
def __init__(self, tree):
@@ -313,13 +858,49 @@ class TreeRecordingObserver(DataObserver):
self.__trail = [self.__current_node]
self.killed = False
- def draw_bits(self, n_bits, forced, value):
+ def draw_integer(
+ self, value: int, *, was_forced: bool, kwargs: IntegerKWargs
+ ) -> None:
+ self.draw_value("integer", value, was_forced=was_forced, kwargs=kwargs)
+
+ def draw_float(
+ self, value: float, *, was_forced: bool, kwargs: FloatKWargs
+ ) -> None:
+ self.draw_value("float", value, was_forced=was_forced, kwargs=kwargs)
+
+ def draw_string(
+ self, value: str, *, was_forced: bool, kwargs: StringKWargs
+ ) -> None:
+ self.draw_value("string", value, was_forced=was_forced, kwargs=kwargs)
+
+ def draw_bytes(
+ self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs
+ ) -> None:
+ self.draw_value("bytes", value, was_forced=was_forced, kwargs=kwargs)
+
+ def draw_boolean(
+ self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs
+ ) -> None:
+ self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs)
+
+ def draw_value(
+ self,
+ ir_type: IRLiteralType,
+ value: IRType,
+ *,
+ was_forced: bool,
+ kwargs: IRKWargsType,
+ ) -> None:
i = self.__index_in_current_node
self.__index_in_current_node += 1
node = self.__current_node
- assert len(node.bit_lengths) == len(node.values)
- if i < len(node.bit_lengths):
- if n_bits != node.bit_lengths[i]:
+
+ if isinstance(value, float):
+ value = float_to_int(value)
+
+ assert len(node.kwargs) == len(node.values) == len(node.ir_types)
+ if i < len(node.values):
+ if ir_type != node.ir_types[i] or kwargs != node.kwargs[i]:
inconsistent_generation()
# Note that we don't check whether a previously
# forced value is now free. That will be caught
@@ -327,23 +908,43 @@ class TreeRecordingObserver(DataObserver):
# may pass silently. This is acceptable because it
# means we skip a hash set lookup on every
# draw and that's a pretty niche failure mode.
- if forced and i not in node.forced:
+ if was_forced and i not in node.forced:
inconsistent_generation()
if value != node.values[i]:
node.split_at(i)
assert i == len(node.values)
new_node = TreeNode()
- branch = node.transition
- branch.children[value] = new_node
+ node.transition.children[value] = new_node
self.__current_node = new_node
self.__index_in_current_node = 0
else:
trans = node.transition
if trans is None:
- node.bit_lengths.append(n_bits)
+ node.ir_types.append(ir_type)
+ node.kwargs.append(kwargs)
node.values.append(value)
- if forced:
+ if was_forced:
node.mark_forced(i)
+ # generate_novel_prefix assumes the following invariant: any one
+ # of the series of draws in a particular node can vary, i.e. the
+ # max number of children is at least 2. However, some draws are
+ # pseudo-choices and only have a single value, such as
+ # integers(0, 0).
+ #
+ # Currently, we address this by forcefully splitting such
+ # single-valued nodes into a transition when we see them. An
+ # exception to this is if it was forced: forced pseudo-choices
+ # do not cause the above issue because they inherently cannot
+ # vary, and moreover they trip other invariants about never
+ # splitting forced nodes.
+ #
+ # An alternative is not writing such choices to the tree at
+ # all, and thus guaranteeing that each node has at least 2 max
+ # children.
+ if compute_max_children(ir_type, kwargs) == 1 and not was_forced:
+ node.split_at(i)
+ self.__current_node = node.transition.children[value]
+ self.__index_in_current_node = 0
elif isinstance(trans, Conclusion):
assert trans.status != Status.OVERRUN
# We tried to draw where history says we should have
@@ -351,7 +952,7 @@ class TreeRecordingObserver(DataObserver):
inconsistent_generation()
else:
assert isinstance(trans, Branch), trans
- if n_bits != trans.bit_length:
+ if ir_type != trans.ir_type or kwargs != trans.kwargs:
inconsistent_generation()
try:
self.__current_node = trans.children[value]
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py
index 61f9d742bb..5e77437a78 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py
@@ -101,13 +101,12 @@ class Sampler:
table: List[Tuple[int, int, float]] # (base_idx, alt_idx, alt_chance)
- def __init__(self, weights: Sequence[float]):
- n = len(weights)
+ def __init__(self, weights: Sequence[float], *, observe: bool = True):
+ self.observe = observe
+ n = len(weights)
table: "list[list[int | float | None]]" = [[i, None, None] for i in range(n)]
-
total = sum(weights)
-
num_type = type(total)
zero = num_type(0) # type: ignore
@@ -179,7 +178,7 @@ class Sampler:
)
)
base, alternate, alternate_chance = data.choice(
- self.table, forced=forced_choice
+ self.table, forced=forced_choice, observe=self.observe
)
forced_use_alternate = None
if forced is not None:
@@ -189,7 +188,9 @@ class Sampler:
forced_use_alternate = forced == alternate and alternate_chance > 0
assert forced == base or forced_use_alternate
- use_alternate = data.draw_boolean(alternate_chance, forced=forced_use_alternate)
+ use_alternate = data.draw_boolean(
+ alternate_chance, forced=forced_use_alternate, observe=self.observe
+ )
data.stop_example()
if use_alternate:
assert forced is None or alternate == forced, (forced, alternate)
@@ -200,7 +201,7 @@ class Sampler:
INT_SIZES = (8, 16, 32, 64, 128)
-INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5))
+INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False)
class many:
@@ -223,6 +224,7 @@ class many:
average_size: Union[int, float],
*,
forced: Optional[int] = None,
+ observe: bool = True,
) -> None:
assert 0 <= min_size <= average_size <= max_size
assert forced is None or min_size <= forced <= max_size
@@ -236,17 +238,17 @@ class many:
self.drawn = False
self.force_stop = False
self.rejected = False
+ self.observe = observe
def more(self) -> bool:
"""Should I draw another element to add to the collection?"""
if self.drawn:
- self.data.stop_example(discard=self.rejected)
+ self.data.stop_example()
self.drawn = True
self.rejected = False
self.data.start_example(ONE_FROM_MANY_LABEL)
-
if self.min_size == self.max_size:
# if we have to hit an exact size, draw unconditionally until that
# point, and no further.
@@ -265,7 +267,7 @@ class many:
elif self.forced_size is not None:
forced_result = self.count < self.forced_size
should_continue = self.data.draw_boolean(
- self.p_continue, forced=forced_result
+ self.p_continue, forced=forced_result, observe=self.observe
)
if should_continue:
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
index 8ab02d0b39..97dd2bf9b3 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py
@@ -117,7 +117,7 @@ from hypothesis.strategies._internal.collections import (
from hypothesis.strategies._internal.deferred import DeferredStrategy
from hypothesis.strategies._internal.functions import FunctionStrategy
from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
-from hypothesis.strategies._internal.misc import just, none, nothing
+from hypothesis.strategies._internal.misc import BooleansStrategy, just, none, nothing
from hypothesis.strategies._internal.numbers import (
IntegersStrategy,
Real,
@@ -158,14 +158,14 @@ else:
@cacheable
-@defines_strategy()
+@defines_strategy(force_reusable_values=True)
def booleans() -> SearchStrategy[bool]:
"""Returns a strategy which generates instances of :class:`python:bool`.
Examples from this strategy will shrink towards ``False`` (i.e.
shrinking will replace ``True`` with ``False`` where possible).
"""
- return SampledFromStrategy([False, True], repr_="booleans()")
+ return BooleansStrategy()
@overload
diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py
index ad37107f73..3d0b0c97e0 100644
--- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py
+++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py
@@ -116,3 +116,11 @@ def nothing() -> SearchStrategy:
Examples from this strategy do not shrink (because there are none).
"""
return NOTHING
+
+
+class BooleansStrategy(SearchStrategy):
+ def do_draw(self, data):
+ return data.draw_boolean()
+
+ def __repr__(self):
+ return "booleans()"
diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py
index 3d7dd6774b..843784dc96 100644
--- a/contrib/python/hypothesis/py3/hypothesis/version.py
+++ b/contrib/python/hypothesis/py3/hypothesis/version.py
@@ -8,5 +8,5 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-__version_info__ = (6, 98, 0)
+__version_info__ = (6, 98, 2)
__version__ = ".".join(map(str, __version_info__))
diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make
index 5b197c3343..03a62edeb0 100644
--- a/contrib/python/hypothesis/py3/ya.make
+++ b/contrib/python/hypothesis/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(6.98.0)
+VERSION(6.98.2)
LICENSE(MPL-2.0)