diff options
| author | vvvv <[email protected]> | 2026-05-15 19:37:53 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2026-05-15 20:02:45 +0300 |
| commit | 90eb93a7848694a3412fc5f421977b7a9c4b9c0b (patch) | |
| tree | 25158d58bac01a709bd86d6c9c9052d89f914750 /yql/essentials/parser/pg_wrapper/postgresql/src | |
| parent | 471c0d42103be493db5e150ed799099f992d06fc (diff) | |
YQL-21241 pg 16.14
commit_hash:b8bda7a3ca0d266411cc83ed1ca113dc4cb64203
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src')
212 files changed, 5015 insertions, 1733 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin.c index d69d0fdf09a..d17cec3c12a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin.c @@ -362,7 +362,6 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) Relation heapRel; BrinOpaque *opaque; BlockNumber nblocks; - BlockNumber heapBlk; int64 totalpages = 0; FmgrInfo *consistentFn; MemoryContext oldcxt; @@ -522,9 +521,10 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) /* * Now scan the revmap. We start by querying for heap page 0, * incrementing by the number of pages per range; this gives us a full - * view of the table. + * view of the table. We make use of uint64 for heapBlk as a BlockNumber + * could wrap for tables with close to 2^32 pages. */ - for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) + for (uint64 heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange) { bool addrange; bool gottuple = false; @@ -536,7 +536,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) MemoryContextResetAndDeleteChildren(perRangeCxt); - tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf, + tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, (BlockNumber) heapBlk, &buf, &off, &size, BUFFER_LOCK_SHARE, scan->xs_snapshot); if (tup) @@ -712,7 +712,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) /* add the pages in the range to the output bitmap, if needed */ if (addrange) { - BlockNumber pageno; + uint64 pageno; for (pageno = heapBlk; pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin_pageops.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin_pageops.c index b578d259545..27eb2f62ad6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin_pageops.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/brin/brin_pageops.c @@ -894,7 +894,11 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) page = BufferGetPage(buffer); brin_page_init(page, BRIN_PAGETYPE_REGULAR); MarkBufferDirty(buffer); - log_newpage_buffer(buffer, true); + + /* XLOG stuff */ + if (RelationNeedsWAL(idxrel)) + log_newpage_buffer(buffer, true); + END_CRIT_SECTION(); /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/common/toast_internals.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/common/toast_internals.c index 588825ed85d..c82a914329e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/common/toast_internals.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/common/toast_internals.c @@ -136,7 +136,7 @@ toast_save_datum(Relation rel, Datum value, char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ]; /* ensure union is aligned well enough: */ int32 align_it; - } chunk_data; + } chunk_data = {0}; /* silence compiler warning */ int32 chunk_size; int32 chunk_seq = 0; char *data_p; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginget.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginget.c index 99ce4a60bdb..acc2e08656d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginget.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginget.c @@ -1316,6 +1316,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, */ do { + CHECK_FOR_INTERRUPTS(); + ItemPointerSetMin(item); match = true; for (i = 0; i < so->nkeys && match; i++) @@ -1959,8 +1961,6 @@ gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm) for (;;) { - CHECK_FOR_INTERRUPTS(); - if (!scanGetItem(scan, iptr, &iptr, &recheck)) break; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginscan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginscan.c index d88d1ed2531..4d27447c2f0 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginscan.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gin/ginscan.c @@ -270,6 +270,7 @@ ginNewScanKey(IndexScanDesc scan) ScanKey scankey = scan->keyData; GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; + int numExcludeOnly; bool hasNullQuery = false; bool attrHasNormalScan[INDEX_MAX_KEYS] = {false}; MemoryContext oldCtx; @@ -392,6 +393,7 @@ ginNewScanKey(IndexScanDesc scan) * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry * and be set to normal (excludeOnly = false). */ + numExcludeOnly = 0; for (i = 0; i < so->nkeys; i++) { GinScanKey key = &so->keys[i]; @@ -405,6 +407,47 @@ ginNewScanKey(IndexScanDesc scan) ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); attrHasNormalScan[key->attnum - 1] = true; } + else + numExcludeOnly++; + } + + /* + * If we left any excludeOnly scan keys as-is, move them to the end of the + * scan key array: they must appear after normal key(s). + */ + if (numExcludeOnly > 0) + { + GinScanKey tmpkeys; + int iNormalKey; + int iExcludeOnly; + + /* We'd better have made at least one normal key */ + Assert(numExcludeOnly < so->nkeys); + /* Make a temporary array to hold the re-ordered scan keys */ + tmpkeys = (GinScanKey) palloc(so->nkeys * sizeof(GinScanKeyData)); + /* Re-order the keys ... */ + iNormalKey = 0; + iExcludeOnly = so->nkeys - numExcludeOnly; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = &so->keys[i]; + + if (key->excludeOnly) + { + memcpy(tmpkeys + iExcludeOnly, key, sizeof(GinScanKeyData)); + iExcludeOnly++; + } + else + { + memcpy(tmpkeys + iNormalKey, key, sizeof(GinScanKeyData)); + iNormalKey++; + } + } + Assert(iNormalKey == so->nkeys - numExcludeOnly); + Assert(iExcludeOnly == so->nkeys); + /* ... and copy them back to so->keys[] */ + memcpy(so->keys, tmpkeys, so->nkeys * sizeof(GinScanKeyData)); + pfree(tmpkeys); } /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gist/gistutil.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gist/gistutil.c index 8793e02a79c..52c70175a40 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gist/gistutil.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/gist/gistutil.c @@ -157,7 +157,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, { int i; GistEntryVector *evec; - int attrsize; + int attrsize = 0; /* silence compiler warning */ evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); @@ -242,7 +242,7 @@ gistMakeUnionKey(GISTSTATE *giststate, int attno, char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ]; } storage; GistEntryVector *evec = &storage.gev; - int dstsize; + int dstsize = 0; /* silence compiler warning */ evec->n = 2; @@ -1034,7 +1034,7 @@ gistGetFakeLSN(Relation rel) * last call. */ static __thread XLogRecPtr lastlsn = InvalidXLogRecPtr; - XLogRecPtr currlsn = GetXLogInsertRecPtr(); + XLogRecPtr currlsn = GetXLogInsertEndRecPtr(); /* Shouldn't be called for WAL-logging relations */ Assert(!RelationNeedsWAL(rel)); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/hash/hashfunc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/hash/hashfunc.c index ac21884162f..933aa42ffc9 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/hash/hashfunc.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/hash/hashfunc.c @@ -235,6 +235,7 @@ hashoidvector(PG_FUNCTION_ARGS) { oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + check_valid_oidvector(key); return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid)); } @@ -243,6 +244,7 @@ hashoidvectorextended(PG_FUNCTION_ARGS) { oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + check_valid_oidvector(key); return hash_any_extended((unsigned char *) key->values, key->dim1 * sizeof(Oid), PG_GETARG_INT64(1)); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heapam.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heapam.c index e6c83334de4..b251653540e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heapam.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heapam.c @@ -102,8 +102,11 @@ static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2); -static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, - ItemPointer ctid, TransactionId xid, +static TM_Result heap_lock_updated_tuple(Relation rel, + uint16 prior_infomask, + TransactionId prior_rawxmax, + const ItemPointerData *prior_ctid, + TransactionId xid, LockTupleMode mode); static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xl_heap_freeze_plan *plans_out, @@ -4586,11 +4589,13 @@ l3: * If there are updates, follow the update chain; bail out if * that cannot be done. */ - if (follow_updates && updated) + if (follow_updates && updated && + !ItemPointerEquals(&tuple->t_self, &t_ctid)) { TM_Result res; - res = heap_lock_updated_tuple(relation, tuple, &t_ctid, + res = heap_lock_updated_tuple(relation, + infomask, xwait, &t_ctid, GetCurrentTransactionId(), mode); if (res != TM_Ok) @@ -4833,11 +4838,13 @@ l3: } /* if there are updates, follow the update chain */ - if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask)) + if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask) && + !ItemPointerEquals(&tuple->t_self, &t_ctid)) { TM_Result res; - res = heap_lock_updated_tuple(relation, tuple, &t_ctid, + res = heap_lock_updated_tuple(relation, + infomask, xwait, &t_ctid, GetCurrentTransactionId(), mode); if (res != TM_Ok) @@ -5491,7 +5498,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid, * version as well. */ static TM_Result -heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, +heap_lock_updated_tuple_rec(Relation rel, TransactionId priorXmax, + const ItemPointerData *tid, TransactionId xid, LockTupleMode mode) { TM_Result result; @@ -5504,7 +5512,6 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, old_infomask2; TransactionId xmax, new_xmax; - TransactionId priorXmax = InvalidTransactionId; bool cleared_all_frozen = false; bool pinned_desired_page; Buffer vmbuffer = InvalidBuffer; @@ -5818,7 +5825,10 @@ out_unlocked: * Follow update chain when locking an updated tuple, acquiring locks (row * marks) on the updated versions. * - * The initial tuple is assumed to be already locked. + * 'prior_infomask', 'prior_raw_xmax' and 'prior_ctid' are the corresponding + * fields from the initial tuple. We will lock the tuples starting from the + * one that 'prior_ctid' points to. Note: This function does not lock the + * initial tuple itself. * * This function doesn't check visibility, it just unconditionally marks the * tuple(s) as locked. If any tuple in the updated chain is being deleted @@ -5836,16 +5846,20 @@ out_unlocked: * levels, because that would lead to a serializability failure. */ static TM_Result -heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, +heap_lock_updated_tuple(Relation rel, + uint16 prior_infomask, + TransactionId prior_raw_xmax, + const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode) { /* - * If the tuple has not been updated, or has moved into another partition - * (effectively a delete) stop here. + * If the tuple has moved into another partition (effectively a delete) + * stop here. */ - if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) && - !ItemPointerEquals(&tuple->t_self, ctid)) + if (!ItemPointerIndicatesMovedPartitions(prior_ctid)) { + TransactionId prior_xmax; + /* * If this is the first possibly-multixact-able operation in the * current transaction, set my per-backend OldestMemberMXactId @@ -5857,7 +5871,9 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, */ MultiXactIdSetOldestMember(); - return heap_lock_updated_tuple_rec(rel, ctid, xid, mode); + prior_xmax = (prior_infomask & HEAP_XMAX_IS_MULTI) ? + MultiXactIdGetUpdateXid(prior_raw_xmax, prior_infomask) : prior_raw_xmax; + return heap_lock_updated_tuple_rec(rel, prior_xmax, prior_ctid, xid, mode); } /* nothing to lock */ @@ -6158,6 +6174,19 @@ heap_inplace_lock(Relation relation, Assert(BufferIsValid(buffer)); + /* + * Register shared cache invals if necessary. Other sessions may finish + * inplace updates of this tuple between this step and LockTuple(). Since + * inplace updates don't change cache keys, that's harmless. + * + * While it's tempting to register invals only after confirming we can + * return true, the following obstacle precludes reordering steps that + * way. Registering invals might reach a CatalogCacheInitializeCache() + * that locks "buffer". That would hang indefinitely if running after our + * own LockBuffer(). Hence, we must register invals before LockBuffer(). + */ + CacheInvalidateHeapTupleInplace(relation, oldtup_ptr); + LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -6253,6 +6282,7 @@ heap_inplace_lock(Relation relation, if (!ret) { UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); InvalidateCatalogSnapshot(); } return ret; @@ -6274,6 +6304,8 @@ heap_inplace_update_and_unlock(Relation relation, HeapTupleHeader htup = oldtup->t_data; uint32 oldlen; uint32 newlen; + char *dst; + char *src; Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self)); oldlen = oldtup->t_len - htup->t_hoff; @@ -6281,15 +6313,28 @@ heap_inplace_update_and_unlock(Relation relation, if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); - /* NO EREPORT(ERROR) from here till changes are logged */ - START_CRIT_SECTION(); + dst = (char *) htup + htup->t_hoff; + src = (char *) tuple->t_data + tuple->t_data->t_hoff; - memcpy((char *) htup + htup->t_hoff, - (char *) tuple->t_data + tuple->t_data->t_hoff, - newlen); + /* + * Unlink relcache init files as needed. If unlinking, acquire + * RelCacheInitLock until after associated invalidations. By doing this + * in advance, if we checkpoint and then crash between inplace + * XLogInsert() and inval, we don't rely on StartupXLOG() -> + * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would + * neglect to PANIC on EIO. + */ + PreInplace_Inval(); /*---------- - * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid: + * NO EREPORT(ERROR) from here till changes are complete + * + * Our buffer lock won't stop a reader having already pinned and checked + * visibility for this tuple. Hence, we write WAL first, then mutate the + * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(), + * checkpoint delay makes that acceptable. With the usual order of + * changes, a crash after memcpy() and before XLogInsert() could allow + * datfrozenxid to overtake relfrozenxid: * * ["D" is a VACUUM (ONLY_DATABASE_STATS)] * ["R" is a VACUUM tbl] @@ -6299,14 +6344,36 @@ heap_inplace_update_and_unlock(Relation relation, * D: raise pg_database.datfrozenxid, XLogInsert(), finish * [crash] * [recovery restores datfrozenxid w/o relfrozenxid] + * + * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(). + * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack. + * The stack copy facilitates a FPI of the post-mutation block before we + * accept other sessions seeing it. DELAY_CHKPT_START allows us to + * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint() + * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START. + * This function, however, likely could avoid it with the following order + * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use + * DELAY_CHKPT_START here, too, as a way to have fewer distinct code + * patterns to analyze. Inplace update isn't so frequent that it should + * pursue the small optimization of skipping DELAY_CHKPT_START. */ - - MarkBufferDirty(buffer); + Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0); + START_CRIT_SECTION(); + MyProc->delayChkptFlags |= DELAY_CHKPT_START; /* XLOG stuff */ if (RelationNeedsWAL(relation)) { xl_heap_inplace xlrec; + PGAlignedBlock copied_buffer; + char *origdata = (char *) BufferGetBlock(buffer); + Page page = BufferGetPage(buffer); + uint16 lower = ((PageHeader) page)->pd_lower; + uint16 upper = ((PageHeader) page)->pd_upper; + uintptr_t dst_offset_in_block; + RelFileLocator rlocator; + ForkNumber forkno; + BlockNumber blkno; XLogRecPtr recptr; xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); @@ -6314,27 +6381,47 @@ heap_inplace_update_and_unlock(Relation relation, XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapInplace); - XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); + /* register block matching what buffer will look like after changes */ + memcpy(copied_buffer.data, origdata, lower); + memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper); + dst_offset_in_block = dst - origdata; + memcpy(copied_buffer.data + dst_offset_in_block, src, newlen); + BufferGetTag(buffer, &rlocator, &forkno, &blkno); + Assert(forkno == MAIN_FORKNUM); + XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data, + REGBUF_STANDARD); + XLogRegisterBufData(0, src, newlen); /* inplace updates aren't decoded atm, don't log the origin */ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); - PageSetLSN(BufferGetPage(buffer), recptr); + PageSetLSN(page, recptr); } + memcpy(dst, src, newlen); + + MarkBufferDirty(buffer); + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + /* + * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we + * do this before UnlockTuple(). + */ + AtInplace_Inval(); + + MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock); - heap_inplace_unlock(relation, oldtup, buffer); + AcceptInvalidationMessages(); /* local processing of just-sent inval */ /* - * Send out shared cache inval if necessary. Note that because we only - * pass the new version of the tuple, this mustn't be used for any - * operations that could change catcache lookup keys. But we aren't - * bothering with index updates either, so that's true a fortiori. - * - * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec. + * Queue a transactional inval, for logical decoding and for third-party + * code that might have been relying on it since long before inplace + * update adopted immediate invalidation. See README.tuplock section + * "Reading inplace-updated columns" for logical decoding details. */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(relation, tuple, NULL); @@ -6349,6 +6436,7 @@ heap_inplace_unlock(Relation relation, { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); UnlockTuple(relation, &oldtup->t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); } /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heaptoast.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heaptoast.c index 52ecd45654c..ec22fbe0816 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heaptoast.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/heap/heaptoast.c @@ -770,7 +770,7 @@ heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE; memcpy(VARDATA(result) + - (curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, + curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset + chcpystrt, chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtcompare.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtcompare.c index 976a2cc6447..720733b75d2 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtcompare.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtcompare.c @@ -299,6 +299,9 @@ btoidvectorcmp(PG_FUNCTION_ARGS) oidvector *b = (oidvector *) PG_GETARG_POINTER(1); int i; + check_valid_oidvector(a); + check_valid_oidvector(b); + /* We arbitrarily choose to sort first by vector length */ if (a->dim1 != b->dim1) PG_RETURN_INT32(a->dim1 - b->dim1); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtutils.c index c8c7f7eded4..22147906763 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtutils.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/nbtree/nbtutils.c @@ -1779,7 +1779,6 @@ _bt_killitems(IndexScanDesc scan) buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ); latestlsn = BufferGetLSNAtomic(buf); - Assert(!XLogRecPtrIsInvalid(so->currPos.lsn)); Assert(so->currPos.lsn <= latestlsn); if (so->currPos.lsn != latestlsn) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/multixact.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/multixact.c index 2e584ac5c3b..391f8ad00d8 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/multixact.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/multixact.c @@ -338,6 +338,19 @@ static __thread MemoryContext MXactContext = NULL; #define debug_elog6(a,b,c,d,e,f) #endif +/* + * Hack to deal with WAL generated with older minor versions. + * + * last_initialized_offsets_page is the XLOG_MULTIXACT_ZERO_OFF_PAGE record + * that we saw during WAL replay, or -1 if we haven't seen any yet. + * + * pre_initialized_offsets_page is the last page that was implicitly + * initialized by replaying a XLOG_MULTIXACT_CREATE_ID record, when we had not + * seen a XLOG_MULTIXACT_ZERO_OFF_PAGE record for the page yet. + */ +static __thread int last_initialized_offsets_page = -1; +static __thread int pre_initialized_offsets_page = -1; + /* internal MultiXactId management */ static void MultiXactIdSetOldestVisible(void); static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, @@ -869,13 +882,101 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int entryno; int slotno; MultiXactOffset *offptr; - int i; + MultiXactId next; + int next_pageno; + int next_entryno; + MultiXactOffset *next_offptr; + MultiXactOffset next_offset; LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); + /* position of this multixid in the offsets SLRU area */ pageno = MultiXactIdToOffsetPage(multi); entryno = MultiXactIdToOffsetEntry(multi); + /* position of the next multixid */ + next = multi + 1; + if (next < FirstMultiXactId) + next = FirstMultiXactId; + next_pageno = MultiXactIdToOffsetPage(next); + next_entryno = MultiXactIdToOffsetEntry(next); + + /* + * Older minor versions didn't set the next multixid's offset in this + * function, and therefore didn't initialize the next page until the next + * multixid was assigned. If we're replaying WAL that was generated by + * such a version, the next page might not be initialized yet. Initialize + * it now. + */ + if (InRecovery && next_pageno != pageno) + { + bool init_needed; + + /*---------- + * Check if the page exists, and if not, initialize it now. + * + * The straightforward way to check if the page exists is to call + * SimpleLruDoesPhysicalPageExist(). However, there two problems with + * that: + * + * 1. It's somewhat expensive to call on every page switch. + * + * 2. It does not take into account pages that have been initialized + * in the SLRU buffer cache but not yet flushed to disk. For such + * pages, it will incorrectly return false. + * + * To fix both of those problems, if we have replayed any + * XLOG_MULTIXACT_ZERO_OFF_PAGE records, we assume that the last page + * that was zeroed by XLOG_MULTIXACT_ZERO_OFF_PAGE is the last page + * that exists. This works because the XLOG_MULTIXACT_ZERO_OFF_PAGE + * records must appear in the WAL in order, unlike CREATE_ID records. + * We only resort to SimpleLruDoesPhysicalPageExist() if we haven't + * seen any XLOG_MULTIXACT_ZERO_OFF_PAGE records yet, which should + * happen at most once after starting WAL recovery. + * + * As an extra safety measure, if we do resort to + * SimpleLruDoesPhysicalPageExist(), flush the SLRU buffers first so + * that it will return an accurate result. + *---------- + */ + if (last_initialized_offsets_page == -1) + { + SimpleLruWriteAll(MultiXactOffsetCtl, false); + init_needed = !SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, next_pageno); + } + else + init_needed = (last_initialized_offsets_page == pageno); + + if (init_needed) + { + elog(DEBUG1, "next offsets page is not initialized, initializing it now"); + + /* Create and zero the page */ + slotno = SimpleLruZeroPage(MultiXactOffsetCtl, next_pageno); + + /* Make sure it's written out */ + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + + /* + * Remember that we initialized the page, so that we don't zero it + * again at the XLOG_MULTIXACT_ZERO_OFF_PAGE record. + */ + pre_initialized_offsets_page = next_pageno; + last_initialized_offsets_page = next_pageno; + } + } + + /* + * Set the starting offset of this multixid's members. + * + * In the common case, it was already be set by the previous + * RecordNewMultiXact call, as this was the next multixid of the previous + * multixid. But if multiple backends are generating multixids + * concurrently, we might race ahead and get called before the previous + * multixid. + */ + /* * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction" * to complain about if there's any I/O error. This is kinda bogus, but @@ -887,9 +988,41 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; offptr += entryno; - *offptr = offset; + if (*offptr != offset) + { + /* should already be set to the correct value, or not at all */ + Assert(*offptr == 0); + *offptr = offset; + MultiXactOffsetCtl->shared->page_dirty[slotno] = true; + } - MultiXactOffsetCtl->shared->page_dirty[slotno] = true; + /* + * Set the next multixid's offset to the end of this multixid's members. + */ + if (next_pageno == pageno) + { + next_offptr = offptr + 1; + } + else + { + /* must be the first entry on the page */ + Assert(next_entryno == 0 || next == FirstMultiXactId); + slotno = SimpleLruReadPage(MultiXactOffsetCtl, next_pageno, true, next); + next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; + next_offptr += next_entryno; + } + + /* Like in GetNewMultiXactId(), skip over offset 0 */ + next_offset = offset + nmembers; + if (next_offset == 0) + next_offset = 1; + if (*next_offptr != next_offset) + { + /* should already be set to the correct value, or not at all */ + Assert(*next_offptr == 0); + *next_offptr = next_offset; + MultiXactOffsetCtl->shared->page_dirty[slotno] = true; + } /* Exchange our lock */ LWLockRelease(MultiXactOffsetSLRULock); @@ -898,7 +1031,7 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, prev_pageno = -1; - for (i = 0; i < nmembers; i++, offset++) + for (int i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; uint32 *flagsptr; @@ -1073,8 +1206,11 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) result = FirstMultiXactId; } - /* Make sure there is room for the MXID in the file. */ - ExtendMultiXactOffset(result); + /* + * Make sure there is room for the next MXID in the file. Assigning this + * MXID sets the next MXID's offset already. + */ + ExtendMultiXactOffset(result + 1); /* * Reserve the members space, similarly to above. Also, be careful not to @@ -1315,21 +1451,14 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, * one's. However, there are some corner cases to worry about: * * 1. This multixact may be the latest one created, in which case there is - * no next one to look at. In this case the nextOffset value we just - * saved is the correct endpoint. - * - * 2. The next multixact may still be in process of being filled in: that - * is, another process may have done GetNewMultiXactId but not yet written - * the offset entry for that ID. In that scenario, it is guaranteed that - * the offset entry for that multixact exists (because GetNewMultiXactId - * won't release MultiXactGenLock until it does) but contains zero - * (because we are careful to pre-zero offset pages). Because - * GetNewMultiXactId will never return zero as the starting offset for a - * multixact, when we read zero as the next multixact's offset, we know we - * have this case. We sleep for a bit and try again. + * no next one to look at. The next multixact's offset should be set + * already, as we set it in RecordNewMultiXact(), but we used to not do + * that in older minor versions. To cope with that case, if this + * multixact is the latest one created, use the nextOffset value we read + * above as the endpoint. * - * 3. Because GetNewMultiXactId increments offset zero to offset one to - * handle case #2, there is an ambiguity near the point of offset + * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero + * for to mean "unset", there is an ambiguity near the point of offset * wraparound. If we see next multixact's offset is one, is that our * multixact's actual endpoint, or did it end at zero with a subsequent * increment? We handle this using the knowledge that if the zero'th @@ -1341,7 +1470,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, * cases, so it seems better than holding the MultiXactGenLock for a long * time on every multixact creation. */ -retry: LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); pageno = MultiXactIdToOffsetPage(multi); @@ -1386,13 +1514,10 @@ retry: nextMXOffset = *offptr; if (nextMXOffset == 0) - { - /* Corner case 2: next multixact is still being filled in */ - LWLockRelease(MultiXactOffsetSLRULock); - CHECK_FOR_INTERRUPTS(); - pg_usleep(1000L); - goto retry; - } + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("MultiXact %u has invalid next offset", + multi))); length = nextMXOffset - offset; } @@ -1428,7 +1553,10 @@ retry: if (!TransactionIdIsValid(*xactptr)) { - /* Corner case 3: we must be looking at unused slot zero */ + /* + * Corner case 2: offset must have wrapped around to unused slot + * zero. + */ Assert(offset == 0); continue; } @@ -2055,24 +2183,32 @@ TrimMultiXact(void) MultiXactOffsetCtl->shared->latest_page_number = pageno; /* - * Zero out the remainder of the current offsets page. See notes in - * TrimCLOG() for background. Unlike CLOG, some WAL record covers every - * pg_multixact SLRU mutation. Since, also unlike CLOG, we ignore the WAL - * rule "write xlog before data," nextMXact successors may carry obsolete, - * nonzero offset values. Zero those so case 2 of GetMultiXactIdMembers() - * operates normally. + * Set the offset of nextMXact on the offsets page. This is normally done + * in RecordNewMultiXact() of the previous multixact, but we used to not + * do that in older minor versions. To ensure that the next offset is set + * if the binary was just upgraded from an older minor version, do it now. + * + * Zero out the remainder of the page. See notes in TrimCLOG() for + * background. Unlike CLOG, some WAL record covers every pg_multixact + * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write + * xlog before data," nextMXact successors may carry obsolete, nonzero + * offset values. */ entryno = MultiXactIdToOffsetEntry(nextMXact); - if (entryno != 0) { int slotno; MultiXactOffset *offptr; - slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact); + if (entryno == 0) + slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno); + else + slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact); offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno]; offptr += entryno; - MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset))); + *offptr = offset; + if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ) + MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset)); MultiXactOffsetCtl->shared->page_dirty[slotno] = true; } @@ -3055,6 +3191,23 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) return; } + /* + * On crash, MultiXactIdCreateFromMembers() can leave behind multixids + * that were not yet written out and hence have zero offset on disk. If + * such a multixid becomes oldestMulti, we won't be able to look up its + * offset. That should be rare, so we don't try to do anything smart about + * it. Just skip the truncation, and hope that by the next truncation + * attempt, oldestMulti has advanced to a valid multixid. + */ + if (newOldestOffset == 0) + { + ereport(LOG, + (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation", + newOldestMulti))); + LWLockRelease(MultiXactTruncationLock); + return; + } + elog(DEBUG1, "performing multixact truncation: " "offsets [%u, %u), offsets segments [%x, %x), " "members [%u, %u), members segments [%x, %x)", @@ -3251,13 +3404,23 @@ multixact_redo(XLogReaderState *record) memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); - - slotno = ZeroMultiXactOffsetPage(pageno, false); - SimpleLruWritePage(MultiXactOffsetCtl, slotno); - Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + /* + * Skip the record if we already initialized the page at the previous + * XLOG_MULTIXACT_CREATE_ID record. See RecordNewMultiXact(). + */ + if (pre_initialized_offsets_page != pageno) + { + LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); + slotno = ZeroMultiXactOffsetPage(pageno, false); + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + LWLockRelease(MultiXactOffsetSLRULock); - LWLockRelease(MultiXactOffsetSLRULock); + last_initialized_offsets_page = pageno; + } + else + elog(DEBUG1, "skipping initialization of offsets page %d because it was already initialized on multixid creation", pageno); + pre_initialized_offsets_page = -1; } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { @@ -3281,6 +3444,22 @@ multixact_redo(XLogReaderState *record) TransactionId max_xid; int i; + if (pre_initialized_offsets_page != -1) + { + /* + * If we implicitly initialized the next offsets page while + * replaying an XLOG_MULTIXACT_CREATE_ID record that was generated + * with an older minor version, we still expect to see an + * XLOG_MULTIXACT_ZERO_OFF_PAGE record for it before any other + * XLOG_MULTIXACT_CREATE_ID records. Therefore this case should + * not happen. If it does, we'll continue with the replay, but + * log a message to note that something's funny. + */ + elog(LOG, "expected to see an XLOG_MULTIXACT_ZERO_OFF_PAGE record for page %d that was implicitly initialized earlier", + pre_initialized_offsets_page); + pre_initialized_offsets_page = -1; + } + /* Store the data back into the SLRU files */ RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers, xlrec->members); @@ -3306,7 +3485,6 @@ multixact_redo(XLogReaderState *record) else if (info == XLOG_MULTIXACT_TRUNCATE_ID) { xl_multixact_truncate xlrec; - int pageno; memcpy(&xlrec, XLogRecGetData(record), SizeOfMultiXactTruncate); @@ -3331,14 +3509,6 @@ multixact_redo(XLogReaderState *record) SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false); PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb); - - /* - * During XLOG replay, latest_page_number isn't necessarily set up - * yet; insert a suitable value to bypass the sanity test in - * SimpleLruTruncate. - */ - pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff); - MultiXactOffsetCtl->shared->latest_page_number = pageno; PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff); LWLockRelease(MultiXactTruncationLock); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/parallel.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/parallel.c index 871e5504e53..76e823b5d63 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/parallel.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/parallel.c @@ -503,8 +503,12 @@ InitializeParallelDSM(ParallelContext *pcxt) void ReinitializeParallelDSM(ParallelContext *pcxt) { + MemoryContext oldcontext; FixedParallelState *fps; + /* We might be running in a very short-lived memory context. */ + oldcontext = MemoryContextSwitchTo(TopTransactionContext); + /* Wait for any old workers to exit. */ if (pcxt->nworkers_launched > 0) { @@ -542,6 +546,9 @@ ReinitializeParallelDSM(ParallelContext *pcxt) pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL); } } + + /* Restore previous memory context. */ + MemoryContextSwitchTo(oldcontext); } /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xact.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xact.c index baaa77c6ee8..3d489760d3c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xact.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xact.c @@ -1338,14 +1338,24 @@ RecordTransactionCommit(void) /* * Transactions without an assigned xid can contain invalidation - * messages (e.g. explicit relcache invalidations or catcache - * invalidations for inplace updates); standbys need to process those. - * We can't emit a commit record without an xid, and we don't want to - * force assigning an xid, because that'd be problematic for e.g. - * vacuum. Hence we emit a bespoke record for the invalidations. We - * don't want to use that in case a commit record is emitted, so they - * happen synchronously with commits (besides not wanting to emit more - * WAL records). + * messages. While inplace updates do this, this is not known to be + * necessary; see comment at inplace CacheInvalidateHeapTuple(). + * Extensions might still rely on this capability, and standbys may + * need to process those invals. We can't emit a commit record + * without an xid, and we don't want to force assigning an xid, + * because that'd be problematic for e.g. vacuum. Hence we emit a + * bespoke record for the invalidations. We don't want to use that in + * case a commit record is emitted, so they happen synchronously with + * commits (besides not wanting to emit more WAL records). + * + * XXX Every known use of this capability is a defect. Since an XID + * isn't controlling visibility of the change that prompted invals, + * other sessions need the inval even if this transactions aborts. + * + * ON COMMIT DELETE ROWS does a nontransactional index_build(), which + * queues a relcache inval, including in transactions without an xid + * that had read the (empty) table. Standbys don't need any ON COMMIT + * DELETE ROWS invals, but we've not done the work to withhold them. */ if (nmsgs != 0) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlog.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlog.c index 4d1ce502cd3..d7aee0fdb80 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlog.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlog.c @@ -6701,8 +6701,18 @@ CreateCheckPoint(int flags) * according to synchronized LSNs of replication slots. The slot's LSN * might be advanced concurrently, so we call this before * CheckPointReplicationSlots() synchronizes replication slots. + * + * We acquire the Allocation lock to serialize the minimum LSN calculation + * with concurrent slot WAL reservation. This ensures that the WAL + * position being reserved is either included in the miminum LSN or is + * beyond or equal to the redo pointer of the current checkpoint (See + * ReplicationSlotReserveWal for details), thus preventing its removal by + * checkpoints. Note that this lock is required only during checkpoints + * where WAL removal is dictated by the slot's minimum LSN. */ + LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED); slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + LWLockRelease(ReplicationSlotAllocationLock); /* * In some cases there are groups of actions that must all occur on one @@ -6877,7 +6887,10 @@ CreateCheckPoint(int flags) /* * Recalculate the current minimum LSN to be used in the WAL segment * cleanup. Then, we must synchronize the replication slots again in - * order to make this LSN safe to use. + * order to make this LSN safe to use. Here, we don't need to acquire + * the ReplicationSlotAllocationLock to serialize the minimum LSN + * computation with slot reservation as the RedoRecPtr is not updated + * after the previous computation of minimum LSN. */ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); CheckPointReplicationSlots(); @@ -7248,8 +7261,16 @@ CreateRestartPoint(int flags) * according to synchronized LSNs of replication slots. The slot's LSN * might be advanced concurrently, so we call this before * CheckPointReplicationSlots() synchronizes replication slots. + * + * We acquire the Allocation lock to serialize the minimum LSN calculation + * with concurrent slot WAL reservation. This ensures that the WAL + * position being reserved is either included in the miminum LSN or is + * beyond or equal to the redo pointer of the current checkpoint (See + * ReplicationSlotReserveWal for details). */ + LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED); slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); + LWLockRelease(ReplicationSlotAllocationLock); if (log_checkpoints) LogCheckpointStart(flags, true); @@ -7341,7 +7362,10 @@ CreateRestartPoint(int flags) /* * Recalculate the current minimum LSN to be used in the WAL segment * cleanup. Then, we must synchronize the replication slots again in - * order to make this LSN safe to use. + * order to make this LSN safe to use. Here, we don't need to acquire + * the ReplicationSlotAllocationLock to serialize the minimum LSN + * computation with slot reservation as the RedoRecPtr is not updated + * after the previous computation of minimum LSN. */ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); CheckPointReplicationSlots(); @@ -8985,6 +9009,22 @@ GetXLogInsertRecPtr(void) } /* + * Get latest WAL record end pointer + */ +XLogRecPtr +GetXLogInsertEndRecPtr(void) +{ + XLogCtlInsert *Insert = &XLogCtl->Insert; + uint64 current_bytepos; + + SpinLockAcquire(&Insert->insertpos_lck); + current_bytepos = Insert->CurrBytePos; + SpinLockRelease(&Insert->insertpos_lck); + + return XLogBytePosToEndRecPtr(current_bytepos); +} + +/* * Get latest WAL write pointer */ XLogRecPtr @@ -9015,10 +9055,7 @@ void XLogShutdownWalRcv(void) { ShutdownWalRcv(); - - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - XLogCtl->InstallXLogFileSegmentActive = false; - LWLockRelease(ControlFileLock); + ResetInstallXLogFileSegmentActive(); } /* Enable WAL file recycling and preallocation. */ @@ -9030,6 +9067,15 @@ SetInstallXLogFileSegmentActive(void) LWLockRelease(ControlFileLock); } +/* Disable WAL file recycling and preallocation. */ +void +ResetInstallXLogFileSegmentActive(void) +{ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + XLogCtl->InstallXLogFileSegmentActive = false; + LWLockRelease(ControlFileLock); +} + bool IsInstallXLogFileSegmentActive(void) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogrecovery.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogrecovery.c index c24162b2727..877b707e8cc 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogrecovery.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogrecovery.c @@ -813,6 +813,16 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, } memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint)); wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN); + + /* Make sure that REDO location exists. */ + if (checkPoint.redo < CheckPointLoc) + { + XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo); + if (!ReadRecord(xlogprefetcher, LOG, false, checkPoint.ThisTimeLineID)) + ereport(PANIC, + errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X", + LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc))); + } } /* @@ -3613,8 +3623,19 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * Before we leave XLOG_FROM_STREAM state, make sure that * walreceiver is not active, so that it won't overwrite * WAL that we restore from archive. + * + * If walreceiver is actively streaming (or attempting to + * connect), we must shut it down. However, if it's + * already in WAITING state (e.g., due to timeline + * divergence), we only need to reset the install flag to + * allow archive restoration. */ - XLogShutdownWalRcv(); + if (WalRcvStreaming()) + XLogShutdownWalRcv(); + else + { + ResetInstallXLogFileSegmentActive(); + } /* * Before we sleep, re-scan for possible new timelines if @@ -4690,9 +4711,20 @@ RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue bool check_primary_slot_name(char **newval, void **extra, GucSource source) { + int err_code; + char *err_msg = NULL; + char *err_hint = NULL; + if (*newval && strcmp(*newval, "") != 0 && - !ReplicationSlotValidateName(*newval, WARNING)) + !ReplicationSlotValidateNameInternal(*newval, &err_code, &err_msg, + &err_hint)) + { + GUC_check_errcode(err_code); + GUC_check_errdetail("%s", err_msg); + if (err_hint != NULL) + GUC_check_errhint("%s", err_hint); return false; + } return true; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogstats.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogstats.c index 2d315df67a0..029a0392f3d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogstats.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/access/transam/xlogstats.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * xlogstats.c - * Functions for WAL Statitstics + * Functions for WAL Statistics * * Copyright (c) 2022-2023, PostgreSQL Global Development Group * diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/backup/backup_manifest.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/backup/backup_manifest.c index cee62165246..b636296ec52 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/backup/backup_manifest.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/backup/backup_manifest.c @@ -251,7 +251,7 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, if (first_wal_range && endtli != entry->tli) ereport(ERROR, errmsg("expected end timeline %u but found timeline %u", - starttli, entry->tli)); + endtli, entry->tli)); /* * If this timeline entry matches with the timeline on which the diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/heap.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/heap.c index 3484481a16b..4250bd8a0a5 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/heap.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/heap.c @@ -508,9 +508,13 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind, */ for (i = 0; i < natts; i++) { - CheckAttributeType(NameStr(TupleDescAttr(tupdesc, i)->attname), - TupleDescAttr(tupdesc, i)->atttypid, - TupleDescAttr(tupdesc, i)->attcollation, + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (attr->attisdropped) + continue; + CheckAttributeType(NameStr(attr->attname), + attr->atttypid, + attr->attcollation, NIL, /* assume we're creating a new rowtype */ flags); } @@ -647,6 +651,16 @@ CheckAttributeType(const char *attname, containing_rowtypes, flags); } + else if (att_typtype == TYPTYPE_MULTIRANGE) + { + /* + * If it's a multirange, recurse to check its plain range type. + */ + CheckAttributeType(attname, get_multirange_range(atttypid), + InvalidOid, /* range types are not collatable */ + containing_rowtypes, + flags); + } else if (OidIsValid((att_typelem = get_element_type(atttypid)))) { /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/index.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/index.c index d4d00adb1f4..f276623598c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/index.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/index.c @@ -2931,12 +2931,19 @@ index_update_stats(Relation rel, if (dirty) { systable_inplace_update_finish(state, tuple); - /* the above sends a cache inval message */ + /* the above sends transactional and immediate cache inval messages */ } else { systable_inplace_update_cancel(state); - /* no need to change tuple, but force relcache inval anyway */ + + /* + * While we didn't change relhasindex, CREATE INDEX needs a + * transactional inval for when the new index's catalog rows become + * visible. Other CREATE INDEX and REINDEX code happens to also queue + * this inval, but keep this in case rare callers rely on this part of + * our API contract. + */ CacheInvalidateRelcacheByTuple(tuple); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/namespace.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/namespace.c index 841006ca918..5cf5655fb75 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/namespace.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/namespace.c @@ -2313,6 +2313,9 @@ StatisticsObjIsVisible(Oid relid) { Oid namespaceId = lfirst_oid(l); + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + if (namespaceId == stxnamespace) { /* Found it first in path */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_depend.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_depend.c index 02e0ce71a07..b3d1c2fba99 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_depend.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_depend.c @@ -23,11 +23,13 @@ #include "catalog/pg_constraint.h" #include "catalog/pg_depend.h" #include "catalog/pg_extension.h" +#include "catalog/pg_type.h" #include "commands/extension.h" #include "miscadmin.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/syscache.h" static bool isObjectPinned(const ObjectAddress *object); @@ -813,6 +815,77 @@ getAutoExtensionsOfObject(Oid classId, Oid objectId) } /* + * Look up a type belonging to an extension. + * + * Returns the type's OID, or InvalidOid if not found. + * + * Notice that the type is specified by name only, without a schema. + * That's because this will typically be used by relocatable extensions + * which can't make a-priori assumptions about which schema their objects + * are in. As long as the extension only defines one type of this name, + * the answer is unique anyway. + * + * We might later add the ability to look up functions, operators, etc. + */ +Oid +getExtensionType(Oid extensionOid, const char *typname) +{ + Oid result = InvalidOid; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple tup; + + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ExtensionRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionOid)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); + + if (depform->classid == TypeRelationId && + depform->deptype == DEPENDENCY_EXTENSION) + { + Oid typoid = depform->objid; + HeapTuple typtup; + + typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typoid)); + if (!HeapTupleIsValid(typtup)) + continue; /* should we throw an error? */ + if (strcmp(NameStr(((Form_pg_type) GETSTRUCT(typtup))->typname), + typname) == 0) + { + result = typoid; + ReleaseSysCache(typtup); + break; /* no need to keep searching */ + } + ReleaseSysCache(typtup); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + return result; +} + +/* * Detect whether a sequence is marked as "owned" by a column * * An ownership marker is an AUTO or INTERNAL dependency from the sequence to the diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_proc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_proc.c index bb85225f105..f1167dcb90f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_proc.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_proc.c @@ -1162,7 +1162,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal, if (cursorpos > 0) newcp++; } - chlen = pg_mblen(prosrc); + chlen = pg_mblen_cstr(prosrc); if (strncmp(prosrc, literal, chlen) != 0) goto fail; prosrc += chlen; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_type.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_type.c index 23453b0da71..c68f25096a4 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_type.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_type.c @@ -927,7 +927,7 @@ char * makeMultirangeTypeName(const char *rangeTypeName, Oid typeNamespace) { char *buf; - char *rangestr; + const char *rangestr; /* * If the range type name contains "range" then change that to diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/async.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/async.c index 6ed633c7b7e..25ba0acfa18 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/async.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/async.c @@ -461,9 +461,8 @@ static double asyncQueueUsage(void); static void asyncQueueFillWarning(void); static void SignalBackends(void); static void asyncQueueReadAllNotifications(void); -static bool asyncQueueProcessPageEntries(volatile QueuePosition *current, +static bool asyncQueueProcessPageEntries(QueuePosition *current, QueuePosition stop, - char *page_buffer, Snapshot snapshot); static void asyncQueueAdvanceTail(void); static void ProcessIncomingNotify(bool flush); @@ -1471,6 +1470,7 @@ asyncQueueAddEntries(ListCell *nextNotify) */ qe.length = QUEUE_PAGESIZE - offset; qe.dboid = InvalidOid; + qe.xid = InvalidTransactionId; qe.data[0] = '\0'; /* empty channel */ qe.data[1] = '\0'; /* empty payload */ } @@ -1899,17 +1899,10 @@ ProcessNotifyInterrupt(bool flush) static void asyncQueueReadAllNotifications(void) { - volatile QueuePosition pos; + QueuePosition pos; QueuePosition head; Snapshot snapshot; - /* page_buffer must be adequately aligned, so use a union */ - union - { - char buf[QUEUE_PAGESIZE]; - AsyncQueueEntry align; - } page_buffer; - /* Fetch current state */ LWLockAcquire(NotifyQueueLock, LW_SHARED); /* Assert checks that we have a valid state entry */ @@ -1969,49 +1962,27 @@ asyncQueueReadAllNotifications(void) * It is possible that we fail while trying to send a message to our * frontend (for example, because of encoding conversion failure). If * that happens it is critical that we not try to send the same message - * over and over again. Therefore, we place a PG_TRY block here that will - * forcibly advance our queue position before we lose control to an error. - * (We could alternatively retake NotifyQueueLock and move the position - * before handling each individual message, but that seems like too much - * lock traffic.) + * over and over again. Therefore, we set ExitOnAnyError to upgrade any + * ERRORs to FATAL, causing the client connection to be closed on error. + * + * We used to only skip over the offending message and try to soldier on, + * but it was somewhat questionable to lose a notification and give the + * client an ERROR instead. A client application is not be prepared for + * that and can't tell that a notification was missed. It was also not + * very useful in practice because notifications are often processed while + * a connection is idle and reading a message from the client, and in that + * state, any error is upgraded to FATAL anyway. Closing the connection + * is a clear signal to the application that it might have missed + * notifications. */ - PG_TRY(); { + bool save_ExitOnAnyError = ExitOnAnyError; bool reachedStop; + ExitOnAnyError = true; + do { - int curpage = QUEUE_POS_PAGE(pos); - int curoffset = QUEUE_POS_OFFSET(pos); - int slotno; - int copysize; - - /* - * We copy the data from SLRU into a local buffer, so as to avoid - * holding the NotifySLRULock while we are examining the entries - * and possibly transmitting them to our frontend. Copy only the - * part of the page we will actually inspect. - */ - slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage, - InvalidTransactionId); - if (curpage == QUEUE_POS_PAGE(head)) - { - /* we only want to read as far as head */ - copysize = QUEUE_POS_OFFSET(head) - curoffset; - if (copysize < 0) - copysize = 0; /* just for safety */ - } - else - { - /* fetch all the rest of the page */ - copysize = QUEUE_PAGESIZE - curoffset; - } - memcpy(page_buffer.buf + curoffset, - NotifyCtl->shared->page_buffer[slotno] + curoffset, - copysize); - /* Release lock that we got from SimpleLruReadPage_ReadOnly() */ - LWLockRelease(NotifySLRULock); - /* * Process messages up to the stop position, end of page, or an * uncommitted message. @@ -2027,19 +1998,16 @@ asyncQueueReadAllNotifications(void) * rewrite pages under us. Especially we don't want to hold a lock * while sending the notifications to the frontend. */ - reachedStop = asyncQueueProcessPageEntries(&pos, head, - page_buffer.buf, - snapshot); + reachedStop = asyncQueueProcessPageEntries(&pos, head, snapshot); } while (!reachedStop); - } - PG_FINALLY(); - { + /* Update shared state */ LWLockAcquire(NotifyQueueLock, LW_SHARED); QUEUE_BACKEND_POS(MyBackendId) = pos; LWLockRelease(NotifyQueueLock); + + ExitOnAnyError = save_ExitOnAnyError; } - PG_END_TRY(); /* Done with snapshot */ UnregisterSnapshot(snapshot); @@ -2049,31 +2017,42 @@ asyncQueueReadAllNotifications(void) * Fetch notifications from the shared queue, beginning at position current, * and deliver relevant ones to my frontend. * - * The current page must have been fetched into page_buffer from shared - * memory. (We could access the page right in shared memory, but that - * would imply holding the NotifySLRULock throughout this routine.) - * - * We stop if we reach the "stop" position, or reach a notification from an - * uncommitted transaction, or reach the end of the page. - * * The function returns true once we have reached the stop position or an * uncommitted notification, and false if we have finished with the page. * In other words: once it returns true there is no need to look further. * The QueuePosition *current is advanced past all processed messages. */ static bool -asyncQueueProcessPageEntries(volatile QueuePosition *current, +asyncQueueProcessPageEntries(QueuePosition *current, QueuePosition stop, - char *page_buffer, Snapshot snapshot) { + int64 curpage = QUEUE_POS_PAGE(*current); + int slotno; + char *page_buffer; bool reachedStop = false; bool reachedEndOfPage; - AsyncQueueEntry *qe; + + /* + * We copy the entries into a local buffer to avoid holding the SLRU lock + * while we transmit them to our frontend. The local buffer must be + * adequately aligned, so use a union. + */ + union + { + char buf[QUEUE_PAGESIZE]; + AsyncQueueEntry align; + } local_buf; + char *local_buf_end = local_buf.buf; + + slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage, + InvalidTransactionId); + page_buffer = NotifyCtl->shared->page_buffer[slotno]; do { QueuePosition thisentry = *current; + AsyncQueueEntry *qe; if (QUEUE_POS_EQUAL(thisentry, stop)) break; @@ -2115,18 +2094,23 @@ asyncQueueProcessPageEntries(volatile QueuePosition *current, reachedStop = true; break; } - else if (TransactionIdDidCommit(qe->xid)) - { - /* qe->data is the null-terminated channel name */ - char *channel = qe->data; - if (IsListeningOn(channel)) - { - /* payload follows channel name */ - char *payload = qe->data + strlen(channel) + 1; + /* + * Quick check for the case that we're not listening on any + * channels, before calling TransactionIdDidCommit(). This makes + * that case a little faster, but more importantly, it ensures + * that if there's a bad entry in the queue for which + * TransactionIdDidCommit() fails for some reason, we can skip + * over it on the first LISTEN in a session, and not get stuck on + * it indefinitely. + */ + if (listenChannels == NIL) + continue; - NotifyMyFrontEnd(channel, payload, qe->srcPid); - } + if (TransactionIdDidCommit(qe->xid)) + { + memcpy(local_buf_end, qe, qe->length); + local_buf_end += qe->length; } else { @@ -2140,6 +2124,32 @@ asyncQueueProcessPageEntries(volatile QueuePosition *current, /* Loop back if we're not at end of page */ } while (!reachedEndOfPage); + /* Release lock that we got from SimpleLruReadPage_ReadOnly() */ + LWLockRelease(NotifySLRULock); + + /* + * Now that we have let go of the SLRU bank lock, send the notifications + * to our backend + */ + Assert(local_buf_end - local_buf.buf <= BLCKSZ); + for (char *p = local_buf.buf; p < local_buf_end;) + { + AsyncQueueEntry *qe = (AsyncQueueEntry *) p; + + /* qe->data is the null-terminated channel name */ + char *channel = qe->data; + + if (IsListeningOn(channel)) + { + /* payload follows channel name */ + char *payload = qe->data + strlen(channel) + 1; + + NotifyMyFrontEnd(channel, payload, qe->srcPid); + } + + p += qe->length; + } + if (QUEUE_POS_EQUAL(*current, stop)) reachedStop = true; @@ -2223,6 +2233,117 @@ asyncQueueAdvanceTail(void) } /* + * AsyncNotifyFreezeXids + * + * Prepare the async notification queue for CLOG truncation by freezing + * transaction IDs that are about to become inaccessible. + * + * This function is called by VACUUM before advancing datfrozenxid. It scans + * the notification queue and replaces XIDs that would become inaccessible + * after CLOG truncation with special markers: + * - Committed transactions are set to FrozenTransactionId + * - Aborted/crashed transactions are set to InvalidTransactionId + * + * Only XIDs < newFrozenXid are processed, as those are the ones whose CLOG + * pages will be truncated. If XID < newFrozenXid, it cannot still be running + * (or it would have held back newFrozenXid through ProcArray). + * Therefore, if TransactionIdDidCommit returns false, we know the transaction + * either aborted explicitly or crashed, and we can safely mark it invalid. + */ +void +AsyncNotifyFreezeXids(TransactionId newFrozenXid) +{ + QueuePosition pos; + QueuePosition head; + int64 curpage = -1; + int slotno = -1; + char *page_buffer = NULL; + bool page_dirty = false; + + /* + * Acquire locks in the correct order to avoid deadlocks. As per the + * locking protocol: NotifyQueueTailLock, then NotifyQueueLock, then + * NotifySLRULock. + * + * We only need SHARED mode since we're just reading the head/tail + * positions, not modifying them. + */ + LWLockAcquire(NotifyQueueTailLock, LW_SHARED); + LWLockAcquire(NotifyQueueLock, LW_SHARED); + + pos = QUEUE_TAIL; + head = QUEUE_HEAD; + + /* Release NotifyQueueLock early, we only needed to read the positions */ + LWLockRelease(NotifyQueueLock); + + /* + * Scan the queue from tail to head, freezing XIDs as needed. We hold + * NotifyQueueTailLock throughout to ensure the tail doesn't move while + * we're working. + */ + while (!QUEUE_POS_EQUAL(pos, head)) + { + AsyncQueueEntry *qe; + TransactionId xid; + int64 pageno = QUEUE_POS_PAGE(pos); + int offset = QUEUE_POS_OFFSET(pos); + + /* If we need a different page, release old lock and get new one */ + if (pageno != curpage) + { + /* Release previous page if any */ + if (slotno >= 0) + { + if (page_dirty) + { + NotifyCtl->shared->page_dirty[slotno] = true; + page_dirty = false; + } + LWLockRelease(NotifySLRULock); + } + + LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE); + slotno = SimpleLruReadPage(NotifyCtl, pageno, true, + InvalidTransactionId); + page_buffer = NotifyCtl->shared->page_buffer[slotno]; + curpage = pageno; + } + + qe = (AsyncQueueEntry *) (page_buffer + offset); + xid = qe->xid; + + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, newFrozenXid)) + { + if (TransactionIdDidCommit(xid)) + { + qe->xid = FrozenTransactionId; + page_dirty = true; + } + else + { + qe->xid = InvalidTransactionId; + page_dirty = true; + } + } + + /* Advance to next entry */ + asyncQueueAdvance(&pos, qe->length); + } + + /* Release final page lock if we acquired one */ + if (slotno >= 0) + { + if (page_dirty) + NotifyCtl->shared->page_dirty[slotno] = true; + LWLockRelease(NotifySLRULock); + } + + LWLockRelease(NotifyQueueTailLock); +} + +/* * ProcessIncomingNotify * * Scan the queue for arriving notifications and report them to the front diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/copy.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/copy.c index f14fae33083..a469825bdce 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/copy.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/copy.c @@ -135,6 +135,9 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, if (stmt->whereClause) { + Bitmapset *expr_attrs = NULL; + int i; + /* add nsitem to query namespace */ addNSItemToQuery(pstate, nsitem, false, true, true); @@ -147,6 +150,41 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, /* we have to fix its collations too */ assign_expr_collations(pstate, whereClause); + /* + * Examine all the columns in the WHERE clause expression. When + * the whole-row reference is present, examine all the columns of + * the table. + */ + pull_varattnos(whereClause, 1, &expr_attrs); + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, expr_attrs)) + { + expr_attrs = bms_add_range(expr_attrs, + 1 - FirstLowInvalidHeapAttributeNumber, + RelationGetNumberOfAttributes(rel) - FirstLowInvalidHeapAttributeNumber); + expr_attrs = bms_del_member(expr_attrs, 0 - FirstLowInvalidHeapAttributeNumber); + } + + i = -1; + while ((i = bms_next_member(expr_attrs, i)) >= 0) + { + AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber; + + Assert(attno != 0); + + /* + * Prohibit generated columns in the WHERE clause. Stored + * generated columns are not yet computed when the filtering + * happens. + */ + if (attno > 0 && + TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated) + ereport(ERROR, + errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("generated columns are not supported in COPY FROM WHERE conditions"), + errdetail("Column \"%s\" is a generated column.", + get_attname(RelationGetRelid(rel), attno, false))); + } + whereClause = eval_const_expressions(NULL, whereClause); whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/dbcommands.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/dbcommands.c index a100d5b37e0..f39709666ea 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/dbcommands.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/dbcommands.c @@ -3289,6 +3289,7 @@ dbase_redo(XLogReaderState *record) parent_path = pstrdup(dbpath); get_parent_directory(parent_path); recovery_create_dbdir(parent_path, true); + pfree(parent_path); /* Create the database directory with the version file. */ CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/event_trigger.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/event_trigger.c index 9462a8892b7..22b32ead436 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/event_trigger.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/event_trigger.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/genam.h" #include "access/htup_details.h" #include "access/table.h" #include "access/xact.h" @@ -20,10 +21,12 @@ #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_event_trigger.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_policy.h" #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" #include "catalog/pg_ts_config.h" @@ -98,6 +101,8 @@ static Oid insert_event_trigger_tuple(const char *trigname, const char *eventnam static void validate_ddl_tags(const char *filtervar, List *taglist); static void validate_table_rewrite_tags(const char *filtervar, List *taglist); static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata); +static bool obtain_object_name_namespace(const ObjectAddress *object, + SQLDropObject *obj); static const char *stringify_grant_objtype(ObjectType objtype); static const char *stringify_adefprivs_objtype(ObjectType objtype); @@ -1180,12 +1185,6 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no Assert(EventTriggerSupportsObjectClass(getObjectClass(object))); - /* don't report temp schemas except my own */ - if (object->classId == NamespaceRelationId && - (isAnyTempNamespace(object->objectId) && - !isTempNamespace(object->objectId))) - return; - oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); obj = palloc0(sizeof(SQLDropObject)); @@ -1193,21 +1192,172 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no obj->original = original; obj->normal = normal; + if (object->classId == NamespaceRelationId) + { + /* Special handling is needed for temp namespaces */ + if (isTempNamespace(object->objectId)) + obj->istemp = true; + else if (isAnyTempNamespace(object->objectId)) + { + /* don't report temp schemas except my own */ + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + obj->objname = get_namespace_name(object->objectId); + } + else if (object->classId == AttrDefaultRelationId) + { + /* We treat a column default as temp if its table is temp */ + ObjectAddress colobject; + + colobject = GetAttrDefaultColumnAddress(object->objectId); + if (OidIsValid(colobject.objectId)) + { + if (!obtain_object_name_namespace(&colobject, obj)) + { + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + } + else if (object->classId == TriggerRelationId) + { + /* Similarly, a trigger is temp if its table is temp */ + /* Sadly, there's no lsyscache.c support for trigger objects */ + Relation pg_trigger_rel; + ScanKeyData skey[1]; + SysScanDesc sscan; + HeapTuple tuple; + Oid relid; + + /* Fetch the trigger's table OID the hard way */ + pg_trigger_rel = table_open(TriggerRelationId, AccessShareLock); + ScanKeyInit(&skey[0], + Anum_pg_trigger_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + sscan = systable_beginscan(pg_trigger_rel, TriggerOidIndexId, true, + NULL, 1, skey); + tuple = systable_getnext(sscan); + if (HeapTupleIsValid(tuple)) + relid = ((Form_pg_trigger) GETSTRUCT(tuple))->tgrelid; + else + relid = InvalidOid; /* shouldn't happen */ + systable_endscan(sscan); + table_close(pg_trigger_rel, AccessShareLock); + /* Do nothing if we didn't find the trigger */ + if (OidIsValid(relid)) + { + ObjectAddress relobject; + + relobject.classId = RelationRelationId; + relobject.objectId = relid; + /* Arbitrarily set objectSubId nonzero so as not to fill objname */ + relobject.objectSubId = 1; + if (!obtain_object_name_namespace(&relobject, obj)) + { + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + } + else if (object->classId == PolicyRelationId) + { + /* Similarly, a policy is temp if its table is temp */ + /* Sadly, there's no lsyscache.c support for policy objects */ + Relation pg_policy_rel; + ScanKeyData skey[1]; + SysScanDesc sscan; + HeapTuple tuple; + Oid relid; + + /* Fetch the policy's table OID the hard way */ + pg_policy_rel = table_open(PolicyRelationId, AccessShareLock); + ScanKeyInit(&skey[0], + Anum_pg_policy_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true, + NULL, 1, skey); + tuple = systable_getnext(sscan); + if (HeapTupleIsValid(tuple)) + relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid; + else + relid = InvalidOid; /* shouldn't happen */ + systable_endscan(sscan); + table_close(pg_policy_rel, AccessShareLock); + /* Do nothing if we didn't find the policy */ + if (OidIsValid(relid)) + { + ObjectAddress relobject; + + relobject.classId = RelationRelationId; + relobject.objectId = relid; + /* Arbitrarily set objectSubId nonzero so as not to fill objname */ + relobject.objectSubId = 1; + if (!obtain_object_name_namespace(&relobject, obj)) + { + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + } + else + { + /* Generic handling for all other object classes */ + if (!obtain_object_name_namespace(object, obj)) + { + /* don't report temp objects except my own */ + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + + /* object identity, objname and objargs */ + obj->objidentity = + getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs, + false); + + /* object type */ + obj->objecttype = getObjectTypeDescription(&obj->address, false); + + slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * Fill obj->objname, obj->schemaname, and obj->istemp based on object. + * + * Returns true if this object should be reported, false if it should + * be ignored because it is a temporary object of another session. + */ +static bool +obtain_object_name_namespace(const ObjectAddress *object, SQLDropObject *obj) +{ /* * Obtain schema names from the object's catalog tuple, if one exists; * this lets us skip objects in temp schemas. We trust that * ObjectProperty contains all object classes that can be * schema-qualified. + * + * Currently, this function does nothing for object classes that are not + * in ObjectProperty, but we might sometime add special cases for that. */ if (is_objectclass_supported(object->classId)) { Relation catalog; HeapTuple tuple; - catalog = table_open(obj->address.classId, AccessShareLock); + catalog = table_open(object->classId, AccessShareLock); tuple = get_catalog_object_by_oid(catalog, get_object_attnum_oid(object->classId), - obj->address.objectId); + object->objectId); if (tuple) { @@ -1215,7 +1365,7 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no Datum datum; bool isnull; - attnum = get_object_attnum_namespace(obj->address.classId); + attnum = get_object_attnum_namespace(object->classId); if (attnum != InvalidAttrNumber) { datum = heap_getattr(tuple, attnum, @@ -1233,10 +1383,9 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no } else if (isAnyTempNamespace(namespaceId)) { - pfree(obj); + /* no need to fill any fields of *obj */ table_close(catalog, AccessShareLock); - MemoryContextSwitchTo(oldcxt); - return; + return false; } else { @@ -1246,10 +1395,10 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no } } - if (get_object_namensp_unique(obj->address.classId) && - obj->address.objectSubId == 0) + if (get_object_namensp_unique(object->classId) && + object->objectSubId == 0) { - attnum = get_object_attnum_name(obj->address.classId); + attnum = get_object_attnum_name(object->classId); if (attnum != InvalidAttrNumber) { datum = heap_getattr(tuple, attnum, @@ -1262,24 +1411,8 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no table_close(catalog, AccessShareLock); } - else - { - if (object->classId == NamespaceRelationId && - isTempNamespace(object->objectId)) - obj->istemp = true; - } - - /* object identity, objname and objargs */ - obj->objidentity = - getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs, - false); - /* object type */ - obj->objecttype = getObjectTypeDescription(&obj->address, false); - - slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next); - - MemoryContextSwitchTo(oldcxt); + return true; } /* @@ -1772,8 +1905,11 @@ EventTriggerCollectAlterTSConfig(AlterTSConfigurationStmt *stmt, Oid cfgId, command->in_extension = creating_extension; ObjectAddressSet(command->d.atscfg.address, TSConfigRelationId, cfgId); - command->d.atscfg.dictIds = palloc(sizeof(Oid) * ndicts); - memcpy(command->d.atscfg.dictIds, dictIds, sizeof(Oid) * ndicts); + if (ndicts > 0) + { + command->d.atscfg.dictIds = palloc_array(Oid, ndicts); + memcpy(command->d.atscfg.dictIds, dictIds, sizeof(Oid) * ndicts); + } command->d.atscfg.ndicts = ndicts; command->parsetree = (Node *) copyObject(stmt); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/extension.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/extension.c index c810b888246..15a86de25d3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/extension.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/extension.c @@ -46,6 +46,7 @@ #include "catalog/pg_depend.h" #include "catalog/pg_extension.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "commands/alter.h" #include "commands/comment.h" @@ -62,10 +63,12 @@ #include "utils/builtins.h" #include "utils/conffiles.h" #include "utils/fmgroids.h" +#include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/syscache.h" #include "utils/varlena.h" @@ -108,7 +111,26 @@ typedef struct ExtensionVersionInfo struct ExtensionVersionInfo *previous; /* current best predecessor */ } ExtensionVersionInfo; +/* + * Cache structure for get_function_sibling_type (and maybe later, + * allied lookup functions). + */ +typedef struct ExtensionSiblingCache +{ + struct ExtensionSiblingCache *next; /* list link */ + /* lookup key: requesting function's OID and type name */ + Oid reqfuncoid; + const char *typname; + bool valid; /* is entry currently valid? */ + uint32 exthash; /* cache hash of owning extension's OID */ + Oid typeoid; /* OID associated with typname */ +} ExtensionSiblingCache; + +/* Head of linked list of ExtensionSiblingCache structs */ +static __thread ExtensionSiblingCache *ext_sibling_list = NULL; + /* Local functions */ +static void ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue); static List *find_update_path(List *evi_list, ExtensionVersionInfo *evi_start, ExtensionVersionInfo *evi_target, @@ -259,6 +281,114 @@ get_extension_schema_original(Oid ext_oid) } /* + * get_function_sibling_type - find a type belonging to same extension as func + * + * Returns the type's OID, or InvalidOid if not found. + * + * This is useful in extensions, which won't have fixed object OIDs. + * We work from the calling function's own OID, which it can get from its + * FunctionCallInfo parameter, and look up the owning extension and thence + * a type belonging to the same extension. + * + * Notice that the type is specified by name only, without a schema. + * That's because this will typically be used by relocatable extensions + * which can't make a-priori assumptions about which schema their objects + * are in. As long as the extension only defines one type of this name, + * the answer is unique anyway. + * + * We might later add the ability to look up functions, operators, etc. + * + * This code is simply a frontend for some pg_depend lookups. Those lookups + * are fairly expensive, so we provide a simple cache facility. We assume + * that the passed typname is actually a C constant, or at least permanently + * allocated, so that we need not copy that string. + */ +Oid +get_function_sibling_type(Oid funcoid, const char *typname) +{ + ExtensionSiblingCache *cache_entry; + Oid extoid; + Oid typeoid; + + /* + * See if we have the answer cached. Someday there may be enough callers + * to justify a hash table, but for now, a simple linked list is fine. + */ + for (cache_entry = ext_sibling_list; cache_entry != NULL; + cache_entry = cache_entry->next) + { + if (funcoid == cache_entry->reqfuncoid && + strcmp(typname, cache_entry->typname) == 0) + break; + } + if (cache_entry && cache_entry->valid) + return cache_entry->typeoid; + + /* + * Nope, so do the expensive lookups. We do not expect failures, so we do + * not cache negative results. + */ + extoid = getExtensionOfObject(ProcedureRelationId, funcoid); + if (!OidIsValid(extoid)) + return InvalidOid; + typeoid = getExtensionType(extoid, typname); + if (!OidIsValid(typeoid)) + return InvalidOid; + + /* + * Build, or revalidate, cache entry. + */ + if (cache_entry == NULL) + { + /* Register invalidation hook if this is first entry */ + if (ext_sibling_list == NULL) + CacheRegisterSyscacheCallback(EXTENSIONOID, + ext_sibling_callback, + (Datum) 0); + + /* Momentarily zero the space to ensure valid flag is false */ + cache_entry = (ExtensionSiblingCache *) + MemoryContextAllocZero(CacheMemoryContext, + sizeof(ExtensionSiblingCache)); + cache_entry->next = ext_sibling_list; + ext_sibling_list = cache_entry; + } + + cache_entry->reqfuncoid = funcoid; + cache_entry->typname = typname; + cache_entry->exthash = GetSysCacheHashValue1(EXTENSIONOID, + ObjectIdGetDatum(extoid)); + cache_entry->typeoid = typeoid; + /* Mark it valid only once it's fully populated */ + cache_entry->valid = true; + + return typeoid; +} + +/* + * ext_sibling_callback + * Syscache inval callback function for EXTENSIONOID cache + * + * It seems sufficient to invalidate ExtensionSiblingCache entries when + * the owning extension's pg_extension entry is modified or deleted. + * Neither a requesting function's OID, nor the OID of the object it's + * looking for, could change without an extension update or drop/recreate. + */ +static void +ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue) +{ + ExtensionSiblingCache *cache_entry; + + for (cache_entry = ext_sibling_list; cache_entry != NULL; + cache_entry = cache_entry->next) + { + if (hashvalue == 0 || + cache_entry->exthash == hashvalue) + cache_entry->valid = false; + } +} + +/* * Utility functions to check validity of extension and version names */ static void diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/foreigncmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/foreigncmds.c index 6baf80cf09c..ec648051c6f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/foreigncmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/foreigncmds.c @@ -740,6 +740,11 @@ AlterForeignDataWrapper(ParseState *pstate, AlterFdwStmt *stmt) ereport(WARNING, (errmsg("changing the foreign-data wrapper handler can change behavior of existing foreign tables"))); } + else + { + /* handler unchanged */ + fdwhandler = fdwForm->fdwhandler; + } if (validator_given) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/operatorcmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/operatorcmds.c index cd7f83136f7..5166ad85881 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/operatorcmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/operatorcmds.c @@ -276,7 +276,6 @@ ValidateRestrictionEstimator(List *restrictionName) { Oid typeId[4]; Oid restrictionOid; - AclResult aclresult; typeId[0] = INTERNALOID; /* PlannerInfo */ typeId[1] = OIDOID; /* operator OID */ @@ -292,11 +291,33 @@ ValidateRestrictionEstimator(List *restrictionName) errmsg("restriction estimator function %s must return type %s", NameListToString(restrictionName), "float8"))); - /* Require EXECUTE rights for the estimator */ - aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, GetUserId(), ACL_EXECUTE); - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, OBJECT_FUNCTION, - NameListToString(restrictionName)); + /* + * If the estimator is not a built-in function, require superuser + * privilege to install it. This protects against using something that is + * not a restriction estimator or has hard-wired assumptions about what + * data types it is working with. (Built-in estimators are required to + * defend themselves adequately against unexpected data type choices, but + * it seems impractical to expect that of extensions' estimators.) + * + * If it is built-in, only require EXECUTE rights. + */ + if (restrictionOid >= FirstGenbkiObjectId) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to specify a non-built-in restriction estimator function"))); + } + else + { + AclResult aclresult; + + aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, + GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(restrictionName)); + } return restrictionOid; } @@ -312,7 +333,6 @@ ValidateJoinEstimator(List *joinName) Oid typeId[5]; Oid joinOid; Oid joinOid2; - AclResult aclresult; typeId[0] = INTERNALOID; /* PlannerInfo */ typeId[1] = OIDOID; /* operator OID */ @@ -350,11 +370,24 @@ ValidateJoinEstimator(List *joinName) errmsg("join estimator function %s must return type %s", NameListToString(joinName), "float8"))); - /* Require EXECUTE rights for the estimator */ - aclresult = object_aclcheck(ProcedureRelationId, joinOid, GetUserId(), ACL_EXECUTE); - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, OBJECT_FUNCTION, - NameListToString(joinName)); + /* privilege checks are the same as in ValidateRestrictionEstimator */ + if (joinOid >= FirstGenbkiObjectId) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to specify a non-built-in join estimator function"))); + } + else + { + AclResult aclresult; + + aclresult = object_aclcheck(ProcedureRelationId, joinOid, + GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(joinName)); + } return joinOid; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/publicationcmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/publicationcmds.c index f4ba572697a..874a8ce6e14 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/publicationcmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/publicationcmds.c @@ -123,7 +123,12 @@ parse_publication_options(ParseState *pstate, pubactions->pubtruncate = false; *publish_given = true; - publish = defGetString(defel); + + /* + * SplitIdentifierString destructively modifies its input, so make + * a copy so we don't modify the memory of the executing statement + */ + publish = pstrdup(defGetString(defel)); if (!SplitIdentifierString(publish, ',', &publish_list)) ereport(ERROR, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/statscmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/statscmds.c index 3568edbb553..78b000a24d3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/statscmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/statscmds.c @@ -62,7 +62,7 @@ compare_int16(const void *a, const void *b) * CREATE STATISTICS */ ObjectAddress -CreateStatistics(CreateStatsStmt *stmt) +CreateStatistics(CreateStatsStmt *stmt, bool check_rights) { int16 attnums[STATS_MAX_DIMENSIONS]; int nattnums = 0; @@ -137,7 +137,13 @@ CreateStatistics(CreateStatsStmt *stmt) RelationGetRelationName(rel)), errdetail_relkind_not_supported(rel->rd_rel->relkind))); - /* You must own the relation to create stats on it */ + /* + * You must own the relation to create stats on it. + * + * NB: Concurrent changes could cause this function's lookup to find a + * different relation than a previous lookup by the caller, so we must + * perform this check even when check_rights == false. + */ if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), stxowner)) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind), RelationGetRelationName(rel)); @@ -173,6 +179,21 @@ CreateStatistics(CreateStatsStmt *stmt) namestrcpy(&stxname, namestr); /* + * Check we have creation rights in target namespace. Skip check if + * caller doesn't want it. + */ + if (check_rights) + { + AclResult aclresult; + + aclresult = object_aclcheck(NamespaceRelationId, namespaceId, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + } + + /* * Deal with the possibility that the statistics object already exists. */ if (SearchSysCacheExists2(STATEXTNAMENSP, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/subscriptioncmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/subscriptioncmds.c index e1260fc0e90..46cfed12731 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/subscriptioncmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/subscriptioncmds.c @@ -1008,10 +1008,10 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data, * * It is possible that the origin is not yet created for * tablesync worker, this can happen for the states before - * SUBREL_STATE_FINISHEDCOPY. The tablesync worker or - * apply worker can also concurrently try to drop the - * origin and by this time the origin might be already - * removed. For these reasons, passing missing_ok = true. + * SUBREL_STATE_DATASYNC. The tablesync worker or apply + * worker can also concurrently try to drop the origin and + * by this time the origin might be already removed. For + * these reasons, passing missing_ok = true. */ ReplicationOriginNameForLogicalRep(sub->oid, relid, originname, sizeof(originname)); @@ -1491,10 +1491,12 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) bool must_use_password; /* - * Lock pg_subscription with AccessExclusiveLock to ensure that the - * launcher doesn't restart new worker during dropping the subscription + * The launcher may concurrently start a new worker for this subscription. + * During initialization, the worker checks for subscription validity and + * exits if the subscription has already been dropped. See + * InitializeLogRepWorker. */ - rel = table_open(SubscriptionRelationId, AccessExclusiveLock); + rel = table_open(SubscriptionRelationId, RowExclusiveLock); tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId, CStringGetDatum(stmt->subname)); @@ -1636,7 +1638,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) * * It is possible that the origin is not yet created for tablesync * worker so passing missing_ok = true. This can happen for the states - * before SUBREL_STATE_FINISHEDCOPY. + * before SUBREL_STATE_DATASYNC. */ ReplicationOriginNameForLogicalRep(subid, relid, originname, sizeof(originname)); @@ -1984,9 +1986,14 @@ check_publications_origin(WalReceiverConn *wrconn, List *publications, Oid relid = subrel_local_oids[i]; char *schemaname = get_namespace_name(get_rel_namespace(relid)); char *tablename = get_rel_name(relid); + char *schemaname_lit = quote_literal_cstr(schemaname); + char *tablename_lit = quote_literal_cstr(tablename); + + appendStringInfo(&cmd, "AND NOT (N.nspname = %s AND C.relname = %s)\n", + schemaname_lit, tablename_lit); - appendStringInfo(&cmd, "AND NOT (N.nspname = '%s' AND C.relname = '%s')\n", - schemaname, tablename); + pfree(schemaname_lit); + pfree(tablename_lit); } res = walrcv_exec(wrconn, cmd.data, 1, tableRow); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/tablecmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/tablecmds.c index 0b212e623e3..cf4d2610e4b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/tablecmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/tablecmds.c @@ -8873,7 +8873,7 @@ ATExecAddStatistics(AlteredTableInfo *tab, Relation rel, /* The CreateStatsStmt has already been through transformStatsStmt */ Assert(stmt->transformed); - address = CreateStatistics(stmt); + address = CreateStatistics(stmt, !is_rebuild); return address; } @@ -17636,6 +17636,8 @@ ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNu /* Expression */ Node *expr = pelem->expr; char partattname[16]; + Bitmapset *expr_attrs = NULL; + int i; Assert(expr != NULL); atttype = exprType(expr); @@ -17659,9 +17661,55 @@ ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNu while (IsA(expr, CollateExpr)) expr = (Node *) ((CollateExpr *) expr)->arg; + /* + * Examine all the columns in the partition key expression. When + * the whole-row reference is present, examine all the columns of + * the partitioned table. + */ + pull_varattnos(expr, 1, &expr_attrs); + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, expr_attrs)) + { + expr_attrs = bms_add_range(expr_attrs, + 1 - FirstLowInvalidHeapAttributeNumber, + RelationGetNumberOfAttributes(rel) - FirstLowInvalidHeapAttributeNumber); + expr_attrs = bms_del_member(expr_attrs, 0 - FirstLowInvalidHeapAttributeNumber); + } + + i = -1; + while ((i = bms_next_member(expr_attrs, i)) >= 0) + { + AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber; + + Assert(attno != 0); + + /* + * Cannot allow system column references, since that would + * make partition routing impossible: their values won't be + * known yet when we need to do that. + */ + if (attno < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition key expressions cannot contain system column references"))); + + /* + * Generated columns cannot work: They are computed after + * BEFORE triggers, but partition routing is done before all + * triggers. + */ + if (TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use generated column in partition key"), + errdetail("Column \"%s\" is a generated column.", + get_attname(RelationGetRelid(rel), attno, false)), + parser_errposition(pstate, pelem->location))); + } + if (IsA(expr, Var) && ((Var *) expr)->varattno > 0) { + /* * User wrote "(column)" or "(column COLLATE something)". * Treat it like simple attribute anyway. @@ -17670,9 +17718,6 @@ ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNu } else { - Bitmapset *expr_attrs = NULL; - int i; - partattrs[attn] = 0; /* marks the column as expression */ *partexprs = lappend(*partexprs, expr); @@ -17683,41 +17728,6 @@ ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNu */ /* - * Cannot allow system column references, since that would - * make partition routing impossible: their values won't be - * known yet when we need to do that. - */ - pull_varattnos(expr, 1, &expr_attrs); - for (i = FirstLowInvalidHeapAttributeNumber; i < 0; i++) - { - if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, - expr_attrs)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("partition key expressions cannot contain system column references"))); - } - - /* - * Generated columns cannot work: They are computed after - * BEFORE triggers, but partition routing is done before all - * triggers. - */ - i = -1; - while ((i = bms_next_member(expr_attrs, i)) >= 0) - { - AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber; - - if (attno > 0 && - TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("cannot use generated column in partition key"), - errdetail("Column \"%s\" is a generated column.", - get_attname(RelationGetRelid(rel), attno, false)), - parser_errposition(pstate, pelem->location))); - } - - /* * Preprocess the expression before checking for mutability. * This is essential for the reasons described in * contain_mutable_functions_after_planning. However, we call @@ -18665,13 +18675,14 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, Relation partRel; ObjectAddress address; Oid defaultPartOid; + PartitionDesc partdesc; /* * We must lock the default partition, because detaching this partition * will change its partition constraint. */ - defaultPartOid = - get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true)); + partdesc = RelationGetPartitionDesc(rel, true); + defaultPartOid = get_default_oid_from_partdesc(partdesc); if (OidIsValid(defaultPartOid)) { /* @@ -18738,10 +18749,13 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, char *partrelname; /* - * Add a new constraint to the partition being detached, which - * supplants the partition constraint (unless there is one already). + * For strategies other than hash, add a constraint to the partition + * being detached which supplants the partition constraint. For hash + * we cannot do that, because the constraint would reference the + * partitioned table OID, possibly causing problems later. */ - DetachAddConstraintIfNeeded(wqueue, partRel); + if (partdesc->boundinfo->strategy != PARTITION_STRATEGY_HASH) + DetachAddConstraintIfNeeded(wqueue, partRel); /* * We're almost done now; the only traces that remain are the @@ -19398,7 +19412,10 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx)); - /* Silently do nothing if already in the right state */ + /* + * Check if the index is already attached to the correct parent, + * ultimately attempting one round of validation if already the case. + */ currParent = partIdx->rd_rel->relispartition ? get_partition_parent(partIdxId, false) : InvalidOid; if (currParent != RelationGetRelid(parentIdx)) @@ -19500,6 +19517,14 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) validatePartitionedIndex(parentIdx, parentTbl); } + else if (!parentIdx->rd_index->indisvalid) + { + /* + * The index is attached, but the parent is still invalid; see if it + * can be validated now. + */ + validatePartitionedIndex(parentIdx, parentTbl); + } relation_close(parentTbl, AccessShareLock); /* keep these locks till commit */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/trigger.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/trigger.c index 878ca5f23e4..c6190f8051f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/trigger.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/trigger.c @@ -3892,21 +3892,10 @@ struct AfterTriggersTableData bool after_trig_done; /* did we already queue AS triggers? */ AfterTriggerEventList after_trig_events; /* if so, saved list pointer */ - /* - * We maintain separate transition tables for UPDATE/INSERT/DELETE since - * MERGE can run all three actions in a single statement. Note that UPDATE - * needs both old and new transition tables whereas INSERT needs only new, - * and DELETE needs only old. - */ - - /* "old" transition table for UPDATE, if any */ - Tuplestorestate *old_upd_tuplestore; - /* "new" transition table for UPDATE, if any */ - Tuplestorestate *new_upd_tuplestore; - /* "old" transition table for DELETE, if any */ - Tuplestorestate *old_del_tuplestore; - /* "new" transition table for INSERT, if any */ - Tuplestorestate *new_ins_tuplestore; + /* "old" transition table for UPDATE/DELETE, if any */ + Tuplestorestate *old_tuplestore; + /* "new" transition table for INSERT/UPDATE, if any */ + Tuplestorestate *new_tuplestore; TupleTableSlot *storeslot; /* for converting to tuplestore's format */ }; @@ -3933,6 +3922,7 @@ static Tuplestorestate *GetAfterTriggersTransitionTable(int event, TupleTableSlot *newslot, TransitionCaptureState *transition_capture); static void TransitionTableAddTuple(EState *estate, + int event, TransitionCaptureState *transition_capture, ResultRelInfo *relinfo, TupleTableSlot *slot, @@ -4492,19 +4482,13 @@ AfterTriggerExecute(EState *estate, { if (LocTriggerData.tg_trigger->tgoldtable) { - if (TRIGGER_FIRED_BY_UPDATE(evtshared->ats_event)) - LocTriggerData.tg_oldtable = evtshared->ats_table->old_upd_tuplestore; - else - LocTriggerData.tg_oldtable = evtshared->ats_table->old_del_tuplestore; + LocTriggerData.tg_oldtable = evtshared->ats_table->old_tuplestore; evtshared->ats_table->closed = true; } if (LocTriggerData.tg_trigger->tgnewtable) { - if (TRIGGER_FIRED_BY_INSERT(evtshared->ats_event)) - LocTriggerData.tg_newtable = evtshared->ats_table->new_ins_tuplestore; - else - LocTriggerData.tg_newtable = evtshared->ats_table->new_upd_tuplestore; + LocTriggerData.tg_newtable = evtshared->ats_table->new_tuplestore; evtshared->ats_table->closed = true; } } @@ -4836,6 +4820,11 @@ GetAfterTriggersTableData(Oid relid, CmdType cmdType) MemoryContext oldcxt; ListCell *lc; + /* At this level, cmdType should not be, eg, CMD_MERGE */ + Assert(cmdType == CMD_INSERT || + cmdType == CMD_UPDATE || + cmdType == CMD_DELETE); + /* Caller should have ensured query_depth is OK. */ Assert(afterTriggers.query_depth >= 0 && afterTriggers.query_depth < afterTriggers.maxquerydepth); @@ -4922,7 +4911,9 @@ MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType) need_new_upd, need_old_del, need_new_ins; - AfterTriggersTableData *table; + AfterTriggersTableData *ins_table; + AfterTriggersTableData *upd_table; + AfterTriggersTableData *del_table; MemoryContext oldcxt; ResourceOwner saveResourceOwner; @@ -4969,10 +4960,15 @@ MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType) AfterTriggerEnlargeQueryState(); /* - * Find or create an AfterTriggersTableData struct to hold the + * Find or create AfterTriggersTableData struct(s) to hold the * tuplestore(s). If there's a matching struct but it's marked closed, * ignore it; we need a newer one. * + * Note: MERGE must use the same AfterTriggersTableData structs as INSERT, + * UPDATE, and DELETE, so that any MERGE'd tuples are added to the same + * tuplestores as tuples from any INSERT, UPDATE, or DELETE commands + * running in the same top-level command (e.g., in a writable CTE). + * * Note: the AfterTriggersTableData list, as well as the tuplestores, are * allocated in the current (sub)transaction's CurTransactionContext, and * the tuplestores are managed by the (sub)transaction's resource owner. @@ -4980,21 +4976,34 @@ MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType) * transition tables to be deferrable; they will be fired during * AfterTriggerEndQuery, after which it's okay to delete the data. */ - table = GetAfterTriggersTableData(relid, cmdType); + if (need_new_ins) + ins_table = GetAfterTriggersTableData(relid, CMD_INSERT); + else + ins_table = NULL; + + if (need_old_upd || need_new_upd) + upd_table = GetAfterTriggersTableData(relid, CMD_UPDATE); + else + upd_table = NULL; + + if (need_old_del) + del_table = GetAfterTriggersTableData(relid, CMD_DELETE); + else + del_table = NULL; /* Now create required tuplestore(s), if we don't have them already. */ oldcxt = MemoryContextSwitchTo(CurTransactionContext); saveResourceOwner = CurrentResourceOwner; CurrentResourceOwner = CurTransactionResourceOwner; - if (need_old_upd && table->old_upd_tuplestore == NULL) - table->old_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem); - if (need_new_upd && table->new_upd_tuplestore == NULL) - table->new_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem); - if (need_old_del && table->old_del_tuplestore == NULL) - table->old_del_tuplestore = tuplestore_begin_heap(false, false, work_mem); - if (need_new_ins && table->new_ins_tuplestore == NULL) - table->new_ins_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_old_upd && upd_table->old_tuplestore == NULL) + upd_table->old_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_new_upd && upd_table->new_tuplestore == NULL) + upd_table->new_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_old_del && del_table->old_tuplestore == NULL) + del_table->old_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_new_ins && ins_table->new_tuplestore == NULL) + ins_table->new_tuplestore = tuplestore_begin_heap(false, false, work_mem); CurrentResourceOwner = saveResourceOwner; MemoryContextSwitchTo(oldcxt); @@ -5005,7 +5014,9 @@ MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType) state->tcs_update_old_table = need_old_upd; state->tcs_update_new_table = need_new_upd; state->tcs_insert_new_table = need_new_ins; - state->tcs_private = table; + state->tcs_insert_private = ins_table; + state->tcs_update_private = upd_table; + state->tcs_delete_private = del_table; return state; } @@ -5183,20 +5194,12 @@ AfterTriggerFreeQuery(AfterTriggersQueryData *qs) { AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc); - ts = table->old_upd_tuplestore; - table->old_upd_tuplestore = NULL; - if (ts) - tuplestore_end(ts); - ts = table->new_upd_tuplestore; - table->new_upd_tuplestore = NULL; - if (ts) - tuplestore_end(ts); - ts = table->old_del_tuplestore; - table->old_del_tuplestore = NULL; + ts = table->old_tuplestore; + table->old_tuplestore = NULL; if (ts) tuplestore_end(ts); - ts = table->new_ins_tuplestore; - table->new_ins_tuplestore = NULL; + ts = table->new_tuplestore; + table->new_tuplestore = NULL; if (ts) tuplestore_end(ts); if (table->storeslot) @@ -5507,17 +5510,17 @@ GetAfterTriggersTransitionTable(int event, { Assert(TupIsNull(newslot)); if (event == TRIGGER_EVENT_DELETE && delete_old_table) - tuplestore = transition_capture->tcs_private->old_del_tuplestore; + tuplestore = transition_capture->tcs_delete_private->old_tuplestore; else if (event == TRIGGER_EVENT_UPDATE && update_old_table) - tuplestore = transition_capture->tcs_private->old_upd_tuplestore; + tuplestore = transition_capture->tcs_update_private->old_tuplestore; } else if (!TupIsNull(newslot)) { Assert(TupIsNull(oldslot)); if (event == TRIGGER_EVENT_INSERT && insert_new_table) - tuplestore = transition_capture->tcs_private->new_ins_tuplestore; + tuplestore = transition_capture->tcs_insert_private->new_tuplestore; else if (event == TRIGGER_EVENT_UPDATE && update_new_table) - tuplestore = transition_capture->tcs_private->new_upd_tuplestore; + tuplestore = transition_capture->tcs_update_private->new_tuplestore; } return tuplestore; @@ -5531,6 +5534,7 @@ GetAfterTriggersTransitionTable(int event, */ static void TransitionTableAddTuple(EState *estate, + int event, TransitionCaptureState *transition_capture, ResultRelInfo *relinfo, TupleTableSlot *slot, @@ -5549,9 +5553,26 @@ TransitionTableAddTuple(EState *estate, tuplestore_puttupleslot(tuplestore, original_insert_tuple); else if ((map = ExecGetChildToRootMap(relinfo)) != NULL) { - AfterTriggersTableData *table = transition_capture->tcs_private; + AfterTriggersTableData *table; TupleTableSlot *storeslot; + switch (event) + { + case TRIGGER_EVENT_INSERT: + table = transition_capture->tcs_insert_private; + break; + case TRIGGER_EVENT_UPDATE: + table = transition_capture->tcs_update_private; + break; + case TRIGGER_EVENT_DELETE: + table = transition_capture->tcs_delete_private; + break; + default: + elog(ERROR, "invalid after-trigger event code: %d", event); + table = NULL; /* keep compiler quiet */ + break; + } + storeslot = GetAfterTriggersStoreSlot(table, map->outdesc); execute_attr_map_slot(map->attrMap, slot, storeslot); tuplestore_puttupleslot(tuplestore, storeslot); @@ -6145,7 +6166,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, oldslot, NULL, transition_capture); - TransitionTableAddTuple(estate, transition_capture, relinfo, + TransitionTableAddTuple(estate, event, transition_capture, relinfo, oldslot, NULL, old_tuplestore); } @@ -6161,7 +6182,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, NULL, newslot, transition_capture); - TransitionTableAddTuple(estate, transition_capture, relinfo, + TransitionTableAddTuple(estate, event, transition_capture, relinfo, newslot, original_insert_tuple, new_tuplestore); } @@ -6463,7 +6484,24 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, new_shared.ats_firing_id = 0; if ((trigger->tgoldtable || trigger->tgnewtable) && transition_capture != NULL) - new_shared.ats_table = transition_capture->tcs_private; + { + switch (event) + { + case TRIGGER_EVENT_INSERT: + new_shared.ats_table = transition_capture->tcs_insert_private; + break; + case TRIGGER_EVENT_UPDATE: + new_shared.ats_table = transition_capture->tcs_update_private; + break; + case TRIGGER_EVENT_DELETE: + new_shared.ats_table = transition_capture->tcs_delete_private; + break; + default: + /* Must be TRUNCATE, see switch above */ + new_shared.ats_table = NULL; + break; + } + } else new_shared.ats_table = NULL; new_shared.ats_modifiedcols = modifiedCols; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/typecmds.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/typecmds.c index 9781ab5b20a..3bc2f3dc802 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/typecmds.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/typecmds.c @@ -1443,6 +1443,13 @@ DefineRange(ParseState *pstate, CreateRangeStmt *stmt) /* we can look up the subtype name immediately */ multirangeNamespace = QualifiedNameGetCreationNamespace(defGetQualifiedName(defel), &multirangeTypeName); + + /* Check we have creation rights in target namespace */ + aclresult = object_aclcheck(NamespaceRelationId, multirangeNamespace, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(multirangeNamespace)); } else ereport(ERROR, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/vacuum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/vacuum.c index aa4fd5db159..8f5a8df305d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/vacuum.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/commands/vacuum.c @@ -39,6 +39,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" +#include "commands/async.h" #include "commands/cluster.h" #include "commands/defrem.h" #include "commands/vacuum.h" @@ -1943,6 +1944,12 @@ vac_truncate_clog(TransactionId frozenXID, } /* + * Freeze any old transaction IDs in the async notification queue before + * CLOG truncation. + */ + AsyncNotifyFreezeXids(frozenXID); + + /* * Advance the oldest value for commit timestamps before truncating, so * that if a user requests a timestamp for a transaction we're truncating * away right after this point, they get NULL instead of an ugly "file not diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execExprInterp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execExprInterp.c index 98c21ebe779..1cfc5d28975 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execExprInterp.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execExprInterp.c @@ -170,6 +170,14 @@ static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); /* execution helper functions */ +static pg_attribute_always_inline void ExecEvalArrayCompareInternal(FunctionCallInfo fcinfo, + ArrayType *arr, + int16 typlen, + bool typbyval, + char typalign, + bool useOr, + Datum *result, + bool *resultnull); static pg_attribute_always_inline void ExecAggPlainTransByVal(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup, @@ -3363,12 +3371,6 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) int nitems; Datum result; bool resultnull; - int16 typlen; - bool typbyval; - char typalign; - char *s; - bits8 *bitmap; - int bitmask; /* * If the array is NULL then we return NULL --- it's not very meaningful @@ -3417,13 +3419,42 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) op->d.scalararrayop.element_type = ARR_ELEMTYPE(arr); } - typlen = op->d.scalararrayop.typlen; - typbyval = op->d.scalararrayop.typbyval; - typalign = op->d.scalararrayop.typalign; + ExecEvalArrayCompareInternal(fcinfo, + arr, + op->d.scalararrayop.typlen, + op->d.scalararrayop.typbyval, + op->d.scalararrayop.typalign, + useOr, + &result, + &resultnull); + + *op->resvalue = result; + *op->resnull = resultnull; +} + +/* + * Shared helper for ExecEvalScalarArrayOp() and the NULL-LHS fallback for + * non-strict ExecEvalHashedScalarArrayOp(). + * + * Callers must handle the strict LHS-is-NULL; return NULL fast path prior to + * calling this. + */ +static pg_attribute_always_inline void +ExecEvalArrayCompareInternal(FunctionCallInfo fcinfo, ArrayType *arr, + int16 typlen, bool typbyval, char typalign, + bool useOr, Datum *result, bool *resultnull) +{ + int nitems; + char *s; + bits8 *bitmap; + int bitmask; + bool strictfunc = fcinfo->flinfo->fn_strict; + + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); /* Initialize result appropriately depending on useOr */ - result = BoolGetDatum(!useOr); - resultnull = false; + *result = BoolGetDatum(!useOr); + *resultnull = false; /* Loop over the array elements */ s = (char *) ARR_DATA_PTR(arr); @@ -3459,18 +3490,18 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) else { fcinfo->isnull = false; - thisresult = op->d.scalararrayop.fn_addr(fcinfo); + thisresult = fcinfo->flinfo->fn_addr(fcinfo); } /* Combine results per OR or AND semantics */ if (fcinfo->isnull) - resultnull = true; + *resultnull = true; else if (useOr) { if (DatumGetBool(thisresult)) { - result = BoolGetDatum(true); - resultnull = false; + *result = BoolGetDatum(true); + *resultnull = false; break; /* needn't look at any more elements */ } } @@ -3478,8 +3509,8 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) { if (!DatumGetBool(thisresult)) { - result = BoolGetDatum(false); - resultnull = false; + *result = BoolGetDatum(false); + *resultnull = false; break; /* needn't look at any more elements */ } } @@ -3495,9 +3526,6 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) } } } - - *op->resvalue = result; - *op->resnull = resultnull; } /* @@ -3576,7 +3604,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * If the scalar is NULL, and the function is strict, return NULL; no * point in executing the search. */ - if (fcinfo->args[0].isnull && strictfunc) + if (scalar_isnull && strictfunc) { *op->resnull = true; return; @@ -3674,8 +3702,51 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * non-strict functions with a null lhs value if no match is found. */ op->d.hashedscalararrayop.has_nulls = has_nulls; + + /* + * When we have a non-strict equality function, check and cache the + * result from looking up a NULL. Non-strict functions are free to + * treat a NULL as equal to any other value, e.g. a 0 or an empty + * string. Here we perform a linear search over the array and cache + * the outcome so that we can use that result any time we receive a + * NULL. + */ + if (!strictfunc) + { + bool null_lhs_result; + + fcinfo->args[0].value = (Datum) 0; + fcinfo->args[0].isnull = true; + + ExecEvalArrayCompareInternal(fcinfo, arr, typlen, typbyval, + typalign, true, &result, + &resultnull); + + null_lhs_result = DatumGetBool(result); + + /* invert non-NULL results for NOT IN */ + if (!resultnull && !inclause) + null_lhs_result = !null_lhs_result; + + op->d.hashedscalararrayop.null_lhs_isnull = resultnull; + op->d.hashedscalararrayop.null_lhs_result = null_lhs_result; + } + } + + /* + * When looking up an SQL NULL value with non-strict functions, we defer + * to the value we cached when building the hash table. + */ + if (scalar_isnull) + { + Assert(!strictfunc); + + *op->resnull = op->d.hashedscalararrayop.null_lhs_isnull; + *op->resvalue = BoolGetDatum(op->d.hashedscalararrayop.null_lhs_result); + return; } + /* Check the hash to see if we have a match. */ hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execGrouping.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execGrouping.c index ba4f238ed9f..dcb0c23a284 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execGrouping.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execGrouping.c @@ -149,6 +149,12 @@ execTuplesHashPrepare(int numCols, * * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in * storage that will live as long as the hashtable does. + * + * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak + * memory in the tempcxt. It is caller's responsibility to reset that context + * reasonably often, typically once per tuple. (We do it that way, rather + * than managing an extra context within the hashtable, because in many cases + * the caller can specify a tempcxt that it needs to reset per-tuple anyway.) */ TupleHashTable BuildTupleHashTableExt(PlanState *parent, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execMain.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execMain.c index ebc0b38344b..60013b92ac6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execMain.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execMain.c @@ -1010,11 +1010,19 @@ InitPlan(QueryDesc *queryDesc, int eflags) * Generally the parser and/or planner should have noticed any such mistake * already, but let's make sure. * + * For INSERT ON CONFLICT, the result relation is required to support the + * onConflictAction, regardless of whether a conflict actually occurs. + * + * For MERGE, mergeActions is the list of actions that may be performed. The + * result relation is required to support every action, regardless of whether + * or not they are all executed. + * * Note: when changing this function, you probably also need to look at * CheckValidRowMarkRel. */ void -CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation) +CheckValidResultRelNew(ResultRelInfo *resultRelInfo, CmdType operation, + OnConflictAction onConflictAction, List *mergeActions) { Relation resultRel = resultRelInfo->ri_RelationDesc; TriggerDesc *trigDesc = resultRel->trigdesc; @@ -1028,7 +1036,31 @@ CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation) { case RELKIND_RELATION: case RELKIND_PARTITIONED_TABLE: - CheckCmdReplicaIdentity(resultRel, operation); + + /* + * For MERGE, check that the target relation supports each action. + * For other operations, just check the operation itself. + */ + if (operation == CMD_MERGE) + { + ListCell *lc; + + foreach(lc, mergeActions) + { + MergeAction *action = (MergeAction *) lfirst(lc); + + CheckCmdReplicaIdentity(resultRel, action->commandType); + } + } + else + CheckCmdReplicaIdentity(resultRel, operation); + + /* + * For INSERT ON CONFLICT DO UPDATE, additionally check that the + * target relation supports UPDATE. + */ + if (onConflictAction == ONCONFLICT_UPDATE) + CheckCmdReplicaIdentity(resultRel, CMD_UPDATE); break; case RELKIND_SEQUENCE: ereport(ERROR, @@ -1148,6 +1180,16 @@ CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation) } /* + * ABI-compatible wrapper to emulate old version of the above function. + * Do not call this version in new code. + */ +void +CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation) +{ + CheckValidResultRelNew(resultRelInfo, operation, ONCONFLICT_NONE, NIL); +} + +/* * Check that a proposed rowmark target relation is a legal target * * In most cases parser and/or planner should have noticed this already, but @@ -1301,10 +1343,9 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, * Get a ResultRelInfo for a trigger target relation. * * Most of the time, triggers are fired on one of the result relations of the - * query, and so we can just return a member of the es_result_relations array, - * or the es_tuple_routing_result_relations list (if any). (Note: in self-join - * situations there might be multiple members with the same OID; if so it - * doesn't matter which one we pick.) + * query, and so we can just return a suitable one we already made and stored + * in the es_opened_result_relations or es_tuple_routing_result_relations + * Lists. * * However, it is sometimes necessary to fire triggers on other relations; * this happens mainly when an RI update trigger queues additional triggers @@ -1324,11 +1365,20 @@ ExecGetTriggerResultRel(EState *estate, Oid relid, Relation rel; MemoryContext oldcontext; + /* + * Before creating a new ResultRelInfo, check if we've already made and + * cached one for this relation. We must ensure that the given + * 'rootRelInfo' matches the one stored in the cached ResultRelInfo as + * trigger handling for partitions can result in mixed requirements for + * what ri_RootResultRelInfo is set to. + */ + /* Search through the query result relations */ foreach(l, estate->es_opened_result_relations) { rInfo = lfirst(l); - if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) + if (RelationGetRelid(rInfo->ri_RelationDesc) == relid && + rInfo->ri_RootResultRelInfo == rootRelInfo) return rInfo; } @@ -1339,7 +1389,8 @@ ExecGetTriggerResultRel(EState *estate, Oid relid, foreach(l, estate->es_tuple_routing_result_relations) { rInfo = (ResultRelInfo *) lfirst(l); - if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) + if (RelationGetRelid(rInfo->ri_RelationDesc) == relid && + rInfo->ri_RootResultRelInfo == rootRelInfo) return rInfo; } @@ -1347,7 +1398,8 @@ ExecGetTriggerResultRel(EState *estate, Oid relid, foreach(l, estate->es_trig_target_relations) { rInfo = (ResultRelInfo *) lfirst(l); - if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) + if (RelationGetRelid(rInfo->ri_RelationDesc) == relid && + rInfo->ri_RootResultRelInfo == rootRelInfo) return rInfo; } /* Nope, so we need a new one */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execPartition.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execPartition.c index f56117bbae8..04ea47b1e41 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execPartition.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execPartition.c @@ -360,8 +360,12 @@ ExecFindPartition(ModifyTableState *mtstate, true, false); if (rri) { + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + /* Verify this ResultRelInfo allows INSERTs */ - CheckValidResultRel(rri, CMD_INSERT); + CheckValidResultRelNew(rri, CMD_INSERT, + node ? node->onConflictAction : ONCONFLICT_NONE, + NIL); /* * Initialize information needed to insert this and @@ -527,7 +531,9 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, * partition-key becomes a DELETE+INSERT operation, so this check is still * required when the operation is CMD_UPDATE. */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); + CheckValidResultRelNew(leaf_part_rri, CMD_INSERT, + node ? node->onConflictAction : ONCONFLICT_NONE, + NIL); /* * Open partition indices. The user may have asked to check for conflicts diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execScan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execScan.c index a47c8f5f712..ef9b85f1adf 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execScan.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/execScan.c @@ -55,16 +55,24 @@ ExecScanFetch(ScanState *node, { /* * This is a ForeignScan or CustomScan which has pushed down a - * join to the remote side. The recheck method is responsible not - * only for rechecking the scan/join quals but also for storing - * the correct tuple in the slot. + * join to the remote side. If it is a descendant node in the EPQ + * recheck plan tree, run the recheck method function. Otherwise, + * run the access method function below. */ + if (bms_is_member(epqstate->epqParam, node->ps.plan->extParam)) + { + /* + * The recheck method is responsible not only for rechecking + * the scan/join quals but also for storing the correct tuple + * in the slot. + */ - TupleTableSlot *slot = node->ss_ScanTupleSlot; + TupleTableSlot *slot = node->ss_ScanTupleSlot; - if (!(*recheckMtd) (node, slot)) - ExecClearTuple(slot); /* would not be returned by scan */ - return slot; + if (!(*recheckMtd) (node, slot)) + ExecClearTuple(slot); /* would not be returned by scan */ + return slot; + } } else if (epqstate->relsubs_done[scanrelid - 1]) { @@ -324,7 +332,7 @@ ExecScanReScan(ScanState *node) /* * If an FDW or custom scan provider has replaced the join with a - * scan, there are multiple RTIs; reset the epqScanDone flag for + * scan, there are multiple RTIs; reset the relsubs_done flag for * all of them. */ if (IsA(node->ps.plan, ForeignScan)) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeAgg.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeAgg.c index 468db94fe5b..1059d265136 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeAgg.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeAgg.c @@ -4318,7 +4318,7 @@ ExecEndAgg(AggState *node) { AggregateInstrumentation *si; - Assert(ParallelWorkerNumber <= node->shared_info->num_workers); + Assert(ParallelWorkerNumber < node->shared_info->num_workers); si = &node->shared_info->sinstrument[ParallelWorkerNumber]; si->hash_batches_used = node->hash_batches_used; si->hash_disk_used = node->hash_disk_used; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeIncrementalSort.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeIncrementalSort.c index 7683e3341cd..ddefb1ac950 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeIncrementalSort.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeIncrementalSort.c @@ -103,7 +103,7 @@ if ((node)->shared_info && (node)->am_worker) \ { \ Assert(IsParallelWorker()); \ - Assert(ParallelWorkerNumber <= (node)->shared_info->num_workers); \ + Assert(ParallelWorkerNumber < (node)->shared_info->num_workers); \ instrumentSortedGroup(&(node)->shared_info->sinfo[ParallelWorkerNumber].groupName##GroupInfo, \ (node)->groupName##_state); \ } \ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeMemoize.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeMemoize.c index 3c54ca54214..732441a398d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeMemoize.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeMemoize.c @@ -1109,7 +1109,7 @@ ExecEndMemoize(MemoizeState *node) if (node->stats.mem_peak == 0) node->stats.mem_peak = node->mem_used; - Assert(ParallelWorkerNumber <= node->shared_info->num_workers); + Assert(ParallelWorkerNumber < node->shared_info->num_workers); si = &node->shared_info->sinstrument[ParallelWorkerNumber]; memcpy(si, &node->stats, sizeof(MemoizeInstrumentation)); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeModifyTable.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeModifyTable.c index a31c24c3cc3..2005674838d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeModifyTable.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeModifyTable.c @@ -2644,14 +2644,6 @@ ExecOnConflictUpdate(ModifyTableContext *context, errmsg("could not serialize access due to concurrent update"))); /* - * As long as we don't support an UPDATE of INSERT ON CONFLICT for - * a partitioned table we shouldn't reach to a case where tuple to - * be lock is moved to another partition due to concurrent update - * of the partition key. - */ - Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); - - /* * Tell caller to try again from the very start. * * It does not make sense to use the usual EvalPlanQual() style @@ -2668,7 +2660,6 @@ ExecOnConflictUpdate(ModifyTableContext *context, errmsg("could not serialize access due to concurrent delete"))); /* see TM_Updated case */ - Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); ExecClearTuple(existing); return false; @@ -3086,6 +3077,11 @@ lmerge_matched: *inputslot; LockTupleMode lockmode; + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + /* * The target tuple was concurrently updated by some other * transaction. Run EvalPlanQual() with the new version of @@ -3146,7 +3142,7 @@ lmerge_matched: * the tuple moved, and setting our current * resultRelInfo to that. */ - if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) + if (ItemPointerIndicatesMovedPartitions(tupleid)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); @@ -3158,14 +3154,14 @@ lmerge_matched: * that the first qualifying WHEN MATCHED action * is executed. * - * Update tupleid to that of the new tuple, for - * the refetch we do at the top. + * tupleid has been updated to that of the new + * tuple, as required for the refetch we do at the + * top. */ if (resultRelInfo->ri_needLockTagTuple) UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid, InplaceUpdateTupleLock); - ItemPointerCopy(&context->tmfd.ctid, tupleid); goto lmerge_matched; case TM_Deleted: @@ -3925,8 +3921,12 @@ ExecModifyTable(PlanState *pstate) relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { - /* ri_RowIdAttNo refers to a ctid attribute */ - Assert(AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)); + /* + * ri_RowIdAttNo refers to a ctid attribute. See the comment + * in ExecInitModifyTable(). + */ + Assert(AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo) || + relkind == RELKIND_PARTITIONED_TABLE); datum = ExecGetJunkAttribute(slot, resultRelInfo->ri_RowIdAttNo, &isNull); @@ -4240,6 +4240,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) foreach(l, node->resultRelations) { Index resultRelation = lfirst_int(l); + List *mergeActions = NIL; + + if (node->mergeActionLists) + mergeActions = list_nth(node->mergeActionLists, i); if (resultRelInfo != mtstate->rootResultRelInfo) { @@ -4261,7 +4265,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Verify result relation is a valid target for the current operation */ - CheckValidResultRel(resultRelInfo, operation); + CheckValidResultRelNew(resultRelInfo, operation, + node->onConflictAction, mergeActions); resultRelInfo++; i++; @@ -4311,7 +4316,16 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) { resultRelInfo->ri_RowIdAttNo = ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); - if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + + /* + * For heap relations, a ctid junk attribute must be present. + * Partitioned tables should only appear here when all leaf + * partitions were pruned, in which case no rows can be + * produced and ctid is not needed. + */ + if (relkind == RELKIND_PARTITIONED_TABLE) + Assert(nrels == 1); + else if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) elog(ERROR, "could not find junk ctid column"); } else if (relkind == RELKIND_FOREIGN_TABLE) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSort.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSort.c index c6c72c6e678..e9e58d07338 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSort.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSort.c @@ -175,7 +175,7 @@ ExecSort(PlanState *pstate) TuplesortInstrumentation *si; Assert(IsParallelWorker()); - Assert(ParallelWorkerNumber <= node->shared_info->num_workers); + Assert(ParallelWorkerNumber < node->shared_info->num_workers); si = &node->shared_info->sinstrument[ParallelWorkerNumber]; tuplesort_get_stats(tuplesortstate, si); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSubplan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSubplan.c index c136f75ac24..2a777c78007 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSubplan.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeSubplan.c @@ -102,6 +102,7 @@ ExecHashSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull) { + bool result = false; SubPlan *subplan = node->subplan; PlanState *planstate = node->planstate; TupleTableSlot *slot; @@ -133,14 +134,6 @@ ExecHashSubPlan(SubPlanState *node, slot = ExecProject(node->projLeft); /* - * Note: because we are typically called in a per-tuple context, we have - * to explicitly clear the projected tuple before returning. Otherwise, - * we'll have a double-free situation: the per-tuple context will probably - * be reset before we're called again, and then the tuple slot will think - * it still needs to free the tuple. - */ - - /* * If the LHS is all non-null, probe for an exact match in the main hash * table. If we find one, the result is TRUE. Otherwise, scan the * partly-null table to see if there are any rows that aren't provably @@ -161,19 +154,10 @@ ExecHashSubPlan(SubPlanState *node, slot, node->cur_eq_comp, node->lhs_hash_funcs) != NULL) - { - ExecClearTuple(slot); - return BoolGetDatum(true); - } - if (node->havenullrows && - findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + result = true; + else if (node->havenullrows && + findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } - ExecClearTuple(slot); - return BoolGetDatum(false); } /* @@ -186,34 +170,31 @@ ExecHashSubPlan(SubPlanState *node, * aren't provably unequal to the LHS; if so, the result is UNKNOWN. * Otherwise, the result is FALSE. */ - if (node->hashnulls == NULL) - { - ExecClearTuple(slot); - return BoolGetDatum(false); - } - if (slotAllNulls(slot)) - { - ExecClearTuple(slot); + else if (node->hashnulls == NULL) + /* just return FALSE */ ; + else if (slotAllNulls(slot)) *isNull = true; - return BoolGetDatum(false); - } /* Scan partly-null table first, since more likely to get a match */ - if (node->havenullrows && - findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + else if (node->havenullrows && + findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } - if (node->havehashrows && - findPartialMatch(node->hashtable, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + else if (node->havehashrows && + findPartialMatch(node->hashtable, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } + + /* + * Note: because we are typically called in a per-tuple context, we have + * to explicitly clear the projected tuple before returning. Otherwise, + * we'll have a double-free situation: the per-tuple context will probably + * be reset before we're called again, and then the tuple slot will think + * it still needs to free the tuple. + */ ExecClearTuple(slot); - return BoolGetDatum(false); + + /* Also must reset the hashtempcxt after each hashtable lookup. */ + MemoryContextReset(node->hashtempcxt); + + return BoolGetDatum(result); } /* @@ -643,6 +624,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) * during ExecProject. */ ResetExprContext(innerecontext); + + /* Also must reset the hashtempcxt after each hashtable lookup. */ + MemoryContextReset(node->hashtempcxt); } /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidrangescan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidrangescan.c index 2124c55ef53..30b0b66d2be 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidrangescan.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidrangescan.c @@ -273,6 +273,16 @@ TidRangeNext(TidRangeScanState *node) static bool TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot) { + if (!TidRangeEval(node)) + return false; + + Assert(ItemPointerIsValid(&slot->tts_tid)); + + /* Recheck the ctid is still within range */ + if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 || + ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0) + return false; + return true; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidscan.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidscan.c index 862bd0330bc..3a3f1644265 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidscan.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeTidscan.c @@ -403,12 +403,23 @@ TidNext(TidScanState *node) static bool TidRecheck(TidScanState *node, TupleTableSlot *slot) { + ItemPointer match; + + /* WHERE CURRENT OF always intends to resolve to the latest tuple */ + if (node->tss_isCurrentOf) + return true; + + if (node->tss_TidList == NULL) + TidListEval(node); + /* - * XXX shouldn't we check here to make sure tuple matches TID list? In - * runtime-key case this is not certain, is it? However, in the WHERE - * CURRENT OF case it might not match anyway ... + * Binary search the TidList to see if this ctid is mentioned and return + * true if it is. */ - return true; + match = (ItemPointer) bsearch(&slot->tts_tid, node->tss_TidList, + node->tss_NumTids, sizeof(ItemPointerData), + itemptr_comparator); + return match != NULL; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeWindowAgg.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeWindowAgg.c index 066a410cd31..7b37767648f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeWindowAgg.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/executor/nodeWindowAgg.c @@ -37,6 +37,7 @@ #include "catalog/objectaccess.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" +#include "common/int.h" #include "executor/executor.h" #include "executor/nodeWindowAgg.h" #include "miscadmin.h" @@ -1424,12 +1425,21 @@ row_is_in_frame(WindowAggState *winstate, int64 pos, TupleTableSlot *slot) if (frameOptions & FRAMEOPTION_ROWS) { int64 offset = DatumGetInt64(winstate->endOffsetValue); + int64 frameendpos = 0; /* rows after current row + offset are out of frame */ if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) offset = -offset; - if (pos > winstate->currentpos + offset) + /* + * If we have an overflow, it means the frame end is beyond the + * range of int64. Since currentpos >= 0, this can only be a + * positive overflow. We treat this as meaning that the frame + * extends to end of partition. + */ + if (!pg_add_s64_overflow(winstate->currentpos, offset, + &frameendpos) && + pos > frameendpos) return -1; } else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS)) @@ -1564,7 +1574,16 @@ update_frameheadpos(WindowAggState *winstate) if (frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING) offset = -offset; - winstate->frameheadpos = winstate->currentpos + offset; + /* + * If we have an overflow, it means the frame head is beyond the + * range of int64. Since currentpos >= 0, this can only be a + * positive overflow. We treat this as being beyond end of + * partition. + */ + if (pg_add_s64_overflow(winstate->currentpos, offset, + &winstate->frameheadpos)) + winstate->frameheadpos = PG_INT64_MAX; + /* frame head can't go before first row */ if (winstate->frameheadpos < 0) winstate->frameheadpos = 0; @@ -1676,12 +1695,21 @@ update_frameheadpos(WindowAggState *winstate) * framehead_slot empty. */ int64 offset = DatumGetInt64(winstate->startOffsetValue); - int64 minheadgroup; + int64 minheadgroup = 0; if (frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING) minheadgroup = winstate->currentgroup - offset; else - minheadgroup = winstate->currentgroup + offset; + { + /* + * If we have an overflow, it means the target group is beyond + * the range of int64. We treat this as "infinity", which + * ensures the loop below advances to end of partition. + */ + if (pg_add_s64_overflow(winstate->currentgroup, offset, + &minheadgroup)) + minheadgroup = PG_INT64_MAX; + } tuplestore_select_read_pointer(winstate->buffer, winstate->framehead_ptr); @@ -1818,7 +1846,18 @@ update_frametailpos(WindowAggState *winstate) if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) offset = -offset; - winstate->frametailpos = winstate->currentpos + offset + 1; + /* + * If we have an overflow, it means the frame tail is beyond the + * range of int64. Since currentpos >= 0, this can only be a + * positive overflow. We treat this as being beyond end of + * partition. + */ + if (pg_add_s64_overflow(winstate->currentpos, offset, + &winstate->frametailpos) || + pg_add_s64_overflow(winstate->frametailpos, 1, + &winstate->frametailpos)) + winstate->frametailpos = PG_INT64_MAX; + /* smallest allowable value of frametailpos is 0 */ if (winstate->frametailpos < 0) winstate->frametailpos = 0; @@ -1930,12 +1969,21 @@ update_frametailpos(WindowAggState *winstate) * leave frametailpos = end+1 and frametail_slot empty. */ int64 offset = DatumGetInt64(winstate->endOffsetValue); - int64 maxtailgroup; + int64 maxtailgroup = 0; if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) maxtailgroup = winstate->currentgroup - offset; else - maxtailgroup = winstate->currentgroup + offset; + { + /* + * If we have an overflow, it means the target group is beyond + * the range of int64. We treat this as "infinity", which + * ensures the loop below advances to end of partition. + */ + if (pg_add_s64_overflow(winstate->currentgroup, offset, + &maxtailgroup)) + maxtailgroup = PG_INT64_MAX; + } tuplestore_select_read_pointer(winstate->buffer, winstate->frametail_ptr); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth-scram.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth-scram.c index a91f1968752..937877eaeec 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth-scram.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth-scram.c @@ -577,7 +577,7 @@ scram_verify_plain_password(const char *username, const char *password, * Compare the secret's Server Key with the one computed from the * user-supplied password. */ - return memcmp(computed_key, server_key, key_length) == 0; + return timingsafe_bcmp(computed_key, server_key, key_length) == 0; } @@ -1125,9 +1125,9 @@ verify_final_nonce(scram_state *state) if (final_nonce_len != client_nonce_len + server_nonce_len) return false; - if (memcmp(state->client_final_nonce, state->client_nonce, client_nonce_len) != 0) + if (timingsafe_bcmp(state->client_final_nonce, state->client_nonce, client_nonce_len) != 0) return false; - if (memcmp(state->client_final_nonce + client_nonce_len, state->server_nonce, server_nonce_len) != 0) + if (timingsafe_bcmp(state->client_final_nonce + client_nonce_len, state->server_nonce, server_nonce_len) != 0) return false; return true; @@ -1182,7 +1182,7 @@ verify_client_proof(scram_state *state) client_StoredKey, &errstr) < 0) elog(ERROR, "could not hash stored key: %s", errstr); - if (memcmp(client_StoredKey, state->StoredKey, state->key_length) != 0) + if (timingsafe_bcmp(client_StoredKey, state->StoredKey, state->key_length) != 0) return false; return true; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth.c index 05e78ff7653..950dbba2526 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/auth.c @@ -3250,7 +3250,7 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por } pfree(cryptvector); - if (memcmp(receivepacket->vector, encryptedpassword, RADIUS_VECTOR_LENGTH) != 0) + if (timingsafe_bcmp(receivepacket->vector, encryptedpassword, RADIUS_VECTOR_LENGTH) != 0) { ereport(LOG, (errmsg("RADIUS response from %s has incorrect MD5 signature", diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/crypt.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/crypt.c index ef496a0bea9..3e67e75fe84 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/crypt.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/libpq/crypt.c @@ -197,7 +197,8 @@ md5_crypt_verify(const char *role, const char *shadow_pass, return STATUS_ERROR; } - if (strcmp(client_pass, crypt_pwd) == 0) + if (strlen(client_pass) == strlen(crypt_pwd) && + timingsafe_bcmp(client_pass, crypt_pwd, strlen(crypt_pwd)) == 0) retval = STATUS_OK; else { @@ -259,7 +260,8 @@ plain_crypt_verify(const char *role, const char *shadow_pass, *logdetail = errstr; return STATUS_ERROR; } - if (strcmp(crypt_client_pass, shadow_pass) == 0) + if (strlen(crypt_client_pass) == strlen(shadow_pass) && + timingsafe_bcmp(crypt_client_pass, shadow_pass, strlen(shadow_pass)) == 0) return STATUS_OK; else { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/costsize.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/costsize.c index 0d3f773a841..5fe2518bac6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/costsize.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/costsize.c @@ -4167,10 +4167,24 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan) { QualCost sp_cost; - /* Figure any cost for evaluating the testexpr */ + /* + * Figure any cost for evaluating the testexpr. + * + * Usually, SubPlan nodes are built very early, before we have constructed + * any RelOptInfos for the parent query level, which means the parent root + * does not yet contain enough information to safely consult statistics. + * Therefore, we pass root as NULL here. cost_qual_eval() is already + * well-equipped to handle a NULL root. + * + * One exception is SubPlan nodes built for the initplans of MIN/MAX + * aggregates from indexes (cf. SS_make_initplan_from_plan). In this + * case, having a NULL root is safe because testexpr will be NULL. + * Besides, an initplan will by definition not consult anything from the + * parent plan. + */ cost_qual_eval(&sp_cost, make_ands_implicit((Expr *) subplan->testexpr), - root); + NULL); if (subplan->useHashTable) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/indxpath.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/indxpath.c index 0065c8992bd..1e179f717a4 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/indxpath.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/path/indxpath.c @@ -3404,6 +3404,16 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) if (is_target_rel) continue; + /* + * If index is !amoptionalkey, also leave indrestrictinfo as set + * above. Otherwise we risk removing all quals for the first index + * key and then not being able to generate an indexscan at all. It + * would be better to be more selective, but we've not yet identified + * which if any of the quals match the first index key. + */ + if (!index->amoptionalkey) + continue; + /* Else compute indrestrictinfo as the non-implied quals */ index->indrestrictinfo = NIL; foreach(lcr, rel->baserestrictinfo) @@ -3577,16 +3587,19 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, * The condition's equality operator must be a member of the * index opfamily, else it is not asserting the right kind of * equality behavior for this index. We check this first - * since it's probably cheaper than match_index_to_operand(). + * since it's probably the cheapest test. */ if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c])) continue; /* - * XXX at some point we may need to check collations here too. - * For the moment we assume all collations reduce to the same - * notion of equality. + * The index's collation must agree with the clause's input + * collation on equality, else the index's uniqueness does not + * imply uniqueness under the clause's equality semantics. */ + if (!collations_agree_on_equality(ind->indexcollations[c], + exprInputCollation((Node *) rinfo->clause))) + continue; /* OK, see if the condition operand matches the index key */ if (rinfo->outer_is_left) @@ -3624,10 +3637,13 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, continue; /* - * XXX at some point we may need to check collations here too. - * For the moment we assume all collations reduce to the same - * notion of equality. + * The index's collation must agree with the operand's + * collation on equality, else the index's uniqueness does not + * imply uniqueness under the operator's equality semantics. */ + if (!collations_agree_on_equality(ind->indexcollations[c], + exprCollation(expr))) + continue; matched = true; /* column is unique */ break; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/analyzejoins.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/analyzejoins.c index 5f3cce873a0..ec554b7c55c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/analyzejoins.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/analyzejoins.c @@ -33,6 +33,20 @@ #include "optimizer/tlist.h" #include "utils/lsyscache.h" +/* + * One element of the list passed to query_is_distinct_for_with_collations(). + * Each entry names a subquery output column that the caller needs to be + * distinct over, plus the upper-level equality operator and its input + * collation, so that the subquery's own DISTINCT/GROUP BY/set-op clauses can + * be compared for compatibility. + */ +typedef struct DistinctColInfo +{ + int colno; /* subquery output column resno */ + Oid opid; /* upper-level equality operator */ + Oid collid; /* input collation of opid */ +} DistinctColInfo; + /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, @@ -45,7 +59,9 @@ static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list); -static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool query_is_distinct_for_with_collations(Query *query, + List *distinct_cols); +static DistinctColInfo *distinct_col_search(int colno, List *distinct_cols); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, @@ -544,7 +560,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) /* * Remove any references to relid or ojrelid from the RestrictInfo. * - * We only bother to clean out bits in clause_relids and required_relids, + * We only bother to clean out bits in the RestrictInfo's various relid sets, * not nullingrel bits in contained Vars and PHVs. (This might have to be * improved sometime.) However, if the RestrictInfo contains an OR clause * we have to also clean up the sub-clauses. @@ -563,6 +579,22 @@ remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid) rinfo->required_relids = bms_copy(rinfo->required_relids); rinfo->required_relids = bms_del_member(rinfo->required_relids, relid); rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid); + /* Likewise for incompatible_relids */ + rinfo->incompatible_relids = bms_copy(rinfo->incompatible_relids); + rinfo->incompatible_relids = bms_del_member(rinfo->incompatible_relids, relid); + rinfo->incompatible_relids = bms_del_member(rinfo->incompatible_relids, ojrelid); + /* Likewise for outer_relids */ + rinfo->outer_relids = bms_copy(rinfo->outer_relids); + rinfo->outer_relids = bms_del_member(rinfo->outer_relids, relid); + rinfo->outer_relids = bms_del_member(rinfo->outer_relids, ojrelid); + /* Likewise for left_relids */ + rinfo->left_relids = bms_copy(rinfo->left_relids); + rinfo->left_relids = bms_del_member(rinfo->left_relids, relid); + rinfo->left_relids = bms_del_member(rinfo->left_relids, ojrelid); + /* Likewise for right_relids */ + rinfo->right_relids = bms_copy(rinfo->right_relids); + rinfo->right_relids = bms_del_member(rinfo->right_relids, relid); + rinfo->right_relids = bms_del_member(rinfo->right_relids, ojrelid); /* If it's an OR, recurse to clean up sub-clauses */ if (restriction_is_or_clause(rinfo)) @@ -628,6 +660,8 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid) bms_is_member(ojrelid, cur_em->em_relids)) { Assert(!cur_em->em_is_const); + /* em_relids is likely to be shared with some RestrictInfo */ + cur_em->em_relids = bms_copy(cur_em->em_relids); cur_em->em_relids = bms_del_member(cur_em->em_relids, relid); cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid); if (bms_is_empty(cur_em->em_relids)) @@ -869,15 +903,17 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { Index relid = rel->relid; Query *subquery = root->simple_rte_array[relid]->subquery; - List *colnos = NIL; - List *opids = NIL; + List *distinct_cols = NIL; ListCell *l; /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. + * Build the argument list for query_is_distinct_for_with_collations: + * a list of DistinctColInfo entries, each holding an output column + * number that the query needs to be distinct over, the equality + * operator that the column needs to be distinct according to, and + * that operator's input collation. The collation matters because the + * subquery's own DISTINCT / GROUP BY / set-op proves uniqueness under + * its own collation, which need not agree with the operator's. * * (XXX we are not considering restriction clauses attached to the * subquery; is that worth doing?) @@ -885,18 +921,18 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) foreach(l, clause_list) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Oid op; + OpExpr *opexpr; Var *var; + DistinctColInfo *dcinfo; /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * caller's mergejoinability test should have selected only - * OpExprs. + * The caller's mergejoinability test should have selected only + * OpExprs. The operator might be a cross-type operator and thus + * not exactly the same operator the subquery would consider; + * that's all right since query_is_distinct_for_with_collations + * can resolve such cases. */ - op = castNode(OpExpr, rinfo->clause)->opno; + opexpr = castNode(OpExpr, rinfo->clause); /* caller identified the inner side for us */ if (rinfo->outer_is_left) @@ -920,11 +956,14 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) var->varno != relid || var->varlevelsup != 0) continue; - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); + dcinfo = palloc(sizeof(DistinctColInfo)); + dcinfo->colno = var->varattno; + dcinfo->opid = opexpr->opno; + dcinfo->collid = opexpr->inputcollid; + distinct_cols = lappend(distinct_cols, dcinfo); } - if (query_is_distinct_for(subquery, colnos, opids)) + if (query_is_distinct_for_with_collations(subquery, distinct_cols)) return true; } return false; @@ -962,31 +1001,71 @@ query_supports_distinctness(Query *query) } /* - * query_is_distinct_for - does query never return duplicates of the - * specified columns? + * query_is_distinct_for - ABI-preserving wrapper around + * query_is_distinct_for_with_collations(). * - * query is a not-yet-planned subquery (in current usage, it's always from - * a subquery RTE, which the planner avoids scribbling on). - * - * colnos is an integer list of output column numbers (resno's). We are - * interested in whether rows consisting of just these columns are certain - * to be distinct. "Distinctness" is defined according to whether the - * corresponding upper-level equality operators listed in opids would think - * the values are distinct. (Note: the opids entries could be cross-type - * operators, and thus not exactly the equality operators that the subquery - * would use itself. We use equality_ops_are_compatible() to check - * compatibility. That looks at btree or hash opfamily membership, and so - * should give trustworthy answers for all operators that we might need - * to deal with here.) + * The original signature took parallel colnos/opids lists and did not + * consider collations. External callers built against earlier minor + * releases continue to call it with the historical (collation-blind) + * semantics; we forward with InvalidOid collations, which makes the + * collation check a no-op (see collations_agree_on_equality()). */ bool query_is_distinct_for(Query *query, List *colnos, List *opids) { - ListCell *l; - Oid opid; + List *distinct_cols = NIL; + ListCell *lc1; + ListCell *lc2; Assert(list_length(colnos) == list_length(opids)); + forboth(lc1, colnos, lc2, opids) + { + DistinctColInfo *dcinfo = palloc(sizeof(DistinctColInfo)); + + dcinfo->colno = lfirst_int(lc1); + dcinfo->opid = lfirst_oid(lc2); + dcinfo->collid = InvalidOid; + distinct_cols = lappend(distinct_cols, dcinfo); + } + + return query_is_distinct_for_with_collations(query, distinct_cols); +} + +/* + * query_is_distinct_for_with_collations - does query never return duplicates + * of the specified columns? + * + * query is a not-yet-planned subquery (in current usage, it's always from + * a subquery RTE, which the planner avoids scribbling on). + * + * distinct_cols is a list of DistinctColInfo, one per requested output column. + * Each entry names the subquery output column number we want distinct, the + * upper-level equality operator we'll compare values with, and that operator's + * input collation. We are interested in whether rows consisting of just these + * columns are certain to be distinct. + * + * "Distinctness" is defined according to whether the corresponding upper-level + * equality operators would think the values are distinct. (Note: each opid + * could be a cross-type operator, and thus not exactly the equality operator + * that the subquery would use itself. We use equality_ops_are_compatible() to + * check compatibility. That looks at opfamily membership for index AMs that + * have declared that they support consistent equality semantics within an + * opfamily, and so should give trustworthy answers for all operators that we + * might need to deal with here.) + * + * The collid must also agree on equality with the collation the subquery's own + * DISTINCT/GROUP BY/set-op uses to deduplicate the column, else the subquery's + * distinctness does not carry over to the caller's equality semantics. Two + * collations agree on equality if they match or if both are deterministic (in + * which case both reduce equality to byte-equality; see CREATE COLLATION). + */ +static bool +query_is_distinct_for_with_collations(Query *query, List *distinct_cols) +{ + ListCell *l; + DistinctColInfo *dcinfo; + /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the * columns in the DISTINCT clause appear in colnos and operator semantics @@ -1001,9 +1080,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); - opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid) || - !equality_ops_are_compatible(opid, sgc->eqop)) + dcinfo = distinct_col_search(tle->resno, distinct_cols); + if (dcinfo == NULL || + !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) || + !collations_agree_on_equality(dcinfo->collid, + exprCollation((Node *) tle->expr))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1032,9 +1113,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); - opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid) || - !equality_ops_are_compatible(opid, sgc->eqop)) + dcinfo = distinct_col_search(tle->resno, distinct_cols); + if (dcinfo == NULL || + !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) || + !collations_agree_on_equality(dcinfo->collid, + exprCollation((Node *) tle->expr))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1100,9 +1183,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) sgc = (SortGroupClause *) lfirst(lg); lg = lnext(topop->groupClauses, lg); - opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid) || - !equality_ops_are_compatible(opid, sgc->eqop)) + dcinfo = distinct_col_search(tle->resno, distinct_cols); + if (dcinfo == NULL || + !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) || + !collations_agree_on_equality(dcinfo->collid, + exprCollation((Node *) tle->expr))) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1122,24 +1207,27 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) } /* - * distinct_col_search - subroutine for query_is_distinct_for + * distinct_col_search - subroutine for query_is_distinct_for_with_collations * - * If colno is in colnos, return the corresponding element of opids, - * else return InvalidOid. (Ordinarily colnos would not contain duplicates, - * but if it does, we arbitrarily select the first match.) + * If colno matches the colno field of an entry in distinct_cols, return a + * pointer to that entry; else return NULL. (Ordinarily distinct_cols would + * not contain duplicate colnos, but if it does, we arbitrarily select the + * first match.) */ -static Oid -distinct_col_search(int colno, List *colnos, List *opids) +static DistinctColInfo * +distinct_col_search(int colno, List *distinct_cols) { - ListCell *lc1, - *lc2; + ListCell *lc; - forboth(lc1, colnos, lc2, opids) + foreach(lc, distinct_cols) { - if (colno == lfirst_int(lc1)) - return lfirst_oid(lc2); + DistinctColInfo *dcinfo = (DistinctColInfo *) lfirst(lc); + + if (dcinfo->colno == colno) + return dcinfo; } - return InvalidOid; + + return NULL; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/planner.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/planner.c index dd687856b57..e2b89487ea3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/planner.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/plan/planner.c @@ -5796,6 +5796,41 @@ optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists) } } } + + /* + * XXX remove any duplicate WindowFuncs from each WindowClause. This has + * been done only in the back branches. Previously, the deduplication was + * done in find_window_functions(), but that caused issues with the code + * above when moving a WindowFunc to another WindowClause as any duplicate + * WindowFuncs won't receive the adjusted winref when merging + * WindowClauses. The deduplication below has been done only so that we + * maintain the same cost calculations. As it turns out, the previous + * deduplication code thought it was saving effort during execution by + * getting rid of duplicates, but that was not true as the expression + * evaluation code will evaluate each WindowFunc mentioned in the + * targetlist. + */ + foreach(lc, windowClause) + { + WindowClause *wc = lfirst_node(WindowClause, lc); + ListCell *lc2; + List *list = wflists->windowFuncs[wc->winref]; + List *newlist = NIL; + + if (list == NIL) + continue; + + foreach(lc2, list) + { + if (!list_member(newlist, lfirst(lc2))) + newlist = lappend(newlist, lfirst(lc2)); + else + wflists->numWindowFuncs--; + } + list_free(list); + + wflists->windowFuncs[wc->winref] = newlist; + } } /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/appendinfo.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/appendinfo.c index f456b3b0a44..a72704da425 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/appendinfo.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/appendinfo.c @@ -233,8 +233,9 @@ adjust_appendrel_attrs_mutator(Node *node, * You might think we need to adjust var->varnullingrels, but that * shouldn't need any changes. It will contain outer-join relids, * while the transformation we are making affects only baserels. - * Below, we just propagate var->varnullingrels into the translated - * Var. + * Below, we just merge var->varnullingrels into the translated Var. + * (We must merge not just copy: the child Var could have some + * nullingrel bits set already, and we mustn't drop those.) * * If var->varnullingrels isn't empty, and the translation wouldn't be * a Var, we have to fail. One could imagine wrapping the translated @@ -279,7 +280,12 @@ adjust_appendrel_attrs_mutator(Node *node, elog(ERROR, "attribute %d of relation \"%s\" does not exist", var->varattno, get_rel_name(appinfo->parent_reloid)); if (IsA(newnode, Var)) - ((Var *) newnode)->varnullingrels = var->varnullingrels; + { + Var *newvar = (Var *) newnode; + + newvar->varnullingrels = bms_add_members(newvar->varnullingrels, + var->varnullingrels); + } else if (var->varnullingrels != NULL) elog(ERROR, "failed to apply nullingrels to a non-Var"); return newnode; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/clauses.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/clauses.c index 85ee860a136..7bb9dbbcd89 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/clauses.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/clauses.c @@ -250,13 +250,10 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) if (wfunc->winref > lists->maxWinRef) elog(ERROR, "WindowFunc contains out-of-range winref %u", wfunc->winref); - /* eliminate duplicates, so that we avoid repeated computation */ - if (!list_member(lists->windowFuncs[wfunc->winref], wfunc)) - { - lists->windowFuncs[wfunc->winref] = - lappend(lists->windowFuncs[wfunc->winref], wfunc); - lists->numWindowFuncs++; - } + + lists->windowFuncs[wfunc->winref] = + lappend(lists->windowFuncs[wfunc->winref], wfunc); + lists->numWindowFuncs++; /* * We assume that the parser checked that there are no window @@ -1094,6 +1091,8 @@ contain_nonstrict_functions_walker(Node *node, void *context) return true; if (IsA(node, BooleanTest)) return true; + if (IsA(node, JsonConstructorExpr)) + return true; /* Check other function-containing nodes */ if (check_functions_in_node(node, contain_nonstrict_functions_checker, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/inherit.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/inherit.c index f9a8bc5cd8f..5e0bb6a9ac3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/inherit.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/inherit.c @@ -322,7 +322,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, PlanRowMark *top_parentrc, LOCKMODE lockmode) { PartitionDesc partdesc; - Bitmapset *live_parts; int num_live_parts; int i; @@ -356,10 +355,10 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, * that survive pruning. Below, we will initialize child objects for the * surviving partitions. */ - relinfo->live_parts = live_parts = prune_append_rel_partitions(relinfo); + relinfo->live_parts = prune_append_rel_partitions(relinfo); /* Expand simple_rel_array and friends to hold child objects. */ - num_live_parts = bms_num_members(live_parts); + num_live_parts = bms_num_members(relinfo->live_parts); if (num_live_parts > 0) expand_planner_arrays(root, num_live_parts); @@ -378,7 +377,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, * table itself, because it's not going to be scanned. */ i = -1; - while ((i = bms_next_member(live_parts, i)) >= 0) + while ((i = bms_next_member(relinfo->live_parts, i)) >= 0) { Oid childOID = partdesc->oids[i]; Relation childrel; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/plancat.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/plancat.c index 7e7f636db52..13eafeb8fe1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/plancat.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/optimizer/util/plancat.c @@ -672,6 +672,11 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, * the purposes of inference. If no opclass (or collation) is specified, then * all matching indexes (that may or may not match the default in terms of * each attribute opclass/collation) are used for inference. + * + * Note: during index CONCURRENTLY operations, different transactions may + * reference different sets of arbiter indexes. This can lead to false unique + * constraint violations that wouldn't occur during normal operations. For + * more information, see insert.sgml. */ List * infer_arbiter_indexes(PlannerInfo *root) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/analyze.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/analyze.c index e01ff1f9e28..72768d01067 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/analyze.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/analyze.c @@ -2785,6 +2785,7 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt) qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); + qry->groupDistinct = sstmt->groupDistinct; if (sstmt->distinctClause == NIL) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_agg.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_agg.c index e78c297fa03..f9f066ed129 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_agg.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_agg.c @@ -36,6 +36,8 @@ typedef struct ParseState *pstate; int min_varlevel; int min_agglevel; + int min_ctelevel; + RangeTblEntry *min_cte; int sublevels_up; } check_agg_arguments_context; @@ -55,7 +57,8 @@ typedef struct static int check_agg_arguments(ParseState *pstate, List *directargs, List *args, - Expr *filter); + Expr *filter, + int agglocation); static bool check_agg_arguments_walker(Node *node, check_agg_arguments_context *context); static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, @@ -333,7 +336,8 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) min_varlevel = check_agg_arguments(pstate, directargs, args, - filter); + filter, + location); *p_levelsup = min_varlevel; @@ -634,7 +638,8 @@ static int check_agg_arguments(ParseState *pstate, List *directargs, List *args, - Expr *filter) + Expr *filter, + int agglocation) { int agglevel; check_agg_arguments_context context; @@ -642,6 +647,8 @@ check_agg_arguments(ParseState *pstate, context.pstate = pstate; context.min_varlevel = -1; /* signifies nothing found yet */ context.min_agglevel = -1; + context.min_ctelevel = -1; + context.min_cte = NULL; context.sublevels_up = 0; (void) check_agg_arguments_walker((Node *) args, &context); @@ -680,6 +687,20 @@ check_agg_arguments(ParseState *pstate, } /* + * If there's a non-local CTE that's below the aggregate's semantic level, + * complain. It's not quite clear what we should do to fix up such a case + * (treating the CTE reference like a Var seems wrong), and it's also + * unclear whether there is a real-world use for such cases. + */ + if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("outer-level aggregate cannot use a nested CTE"), + errdetail("CTE \"%s\" is below the aggregate's semantic level.", + context.min_cte->eref->aliasname), + parser_errposition(pstate, agglocation))); + + /* * Now check for vars/aggs in the direct arguments, and throw error if * needed. Note that we allow a Var of the agg's semantic level, but not * an Agg of that level. In principle such Aggs could probably be @@ -692,6 +713,7 @@ check_agg_arguments(ParseState *pstate, { context.min_varlevel = -1; context.min_agglevel = -1; + context.min_ctelevel = -1; (void) check_agg_arguments_walker((Node *) directargs, &context); if (context.min_varlevel >= 0 && context.min_varlevel < agglevel) ereport(ERROR, @@ -707,6 +729,13 @@ check_agg_arguments(ParseState *pstate, parser_errposition(pstate, locate_agg_of_level((Node *) directargs, context.min_agglevel)))); + if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("outer-level aggregate cannot use a nested CTE"), + errdetail("CTE \"%s\" is below the aggregate's semantic level.", + context.min_cte->eref->aliasname), + parser_errposition(pstate, agglocation))); } return agglevel; } @@ -784,6 +813,30 @@ check_agg_arguments_walker(Node *node, parser_errposition(context->pstate, ((WindowFunc *) node)->location))); } + + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + if (rte->rtekind == RTE_CTE) + { + int ctelevelsup = rte->ctelevelsup; + + /* convert levelsup to frame of reference of original query */ + ctelevelsup -= context->sublevels_up; + /* ignore local CTEs of subqueries */ + if (ctelevelsup >= 0) + { + if (context->min_ctelevel < 0 || + context->min_ctelevel > ctelevelsup) + { + context->min_ctelevel = ctelevelsup; + context->min_cte = rte; + } + } + } + return false; /* allow range_table_walker to continue */ + } if (IsA(node, Query)) { /* Recurse into subselects */ @@ -793,7 +846,7 @@ check_agg_arguments_walker(Node *node, result = query_tree_walker((Query *) node, check_agg_arguments_walker, (void *) context, - 0); + QTW_EXAMINE_RTES_BEFORE); context->sublevels_up--; return result; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_utilcmd.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_utilcmd.c index 8fcaf15cd51..e327a0a5fc7 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_utilcmd.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/parser/parse_utilcmd.c @@ -1897,7 +1897,10 @@ generateClonedIndexStmt(RangeVar *heapRel, Relation source_idx, * extended statistic "source_statsid", for the rel identified by heapRel and * heapRelid. * - * Attribute numbers in expression Vars are adjusted according to attmap. + * stxkeys in the source statistic holds attribute numbers from the parent + * relation. Those attnums, along with the attribute numbers referenced by + * Vars inside the expression tree, are remapped to the new relation's + * numbering according to attmap. */ static CreateStatsStmt * generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, @@ -1955,7 +1958,8 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, StatsElem *selem = makeNode(StatsElem); AttrNumber attnum = statsrec->stxkeys.values[i]; - selem->name = get_attname(heapRelid, attnum, false); + selem->name = + get_attname(heapRelid, attmap->attnums[attnum - 1], false); selem->expr = NULL; def_names = lappend(def_names, selem); @@ -4176,12 +4180,14 @@ transformPartitionRangeBounds(ParseState *pstate, List *blist, int i, j; - i = j = 0; + j = 0; foreach(lc, blist) { Node *expr = lfirst(lc); PartitionRangeDatum *prd = NULL; + i = foreach_current_index(lc); + /* * Infinite range bounds -- "minvalue" and "maxvalue" -- get passed in * as ColumnRefs. @@ -4259,7 +4265,6 @@ transformPartitionRangeBounds(ParseState *pstate, List *blist, prd = makeNode(PartitionRangeDatum); prd->kind = PARTITION_RANGE_DATUM_VALUE; prd->value = (Node *) value; - ++i; } prd->location = exprLocation(expr); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/autovacuum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/autovacuum.c index 0b73987d719..3dacb63a2fa 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/autovacuum.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/autovacuum.c @@ -2680,7 +2680,10 @@ deleted: workitem->avw_active = true; LWLockRelease(AutovacuumLock); + PushActiveSnapshot(GetTransactionSnapshot()); perform_work_item(workitem); + if (ActiveSnapshotSet()) /* transaction could have aborted */ + PopActiveSnapshot(); /* * Check for config changes before acquiring lock for further jobs. diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/interrupt.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/interrupt.c index 4ed5475e711..972d5f28aac 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/interrupt.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/interrupt.c @@ -98,9 +98,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS) * shut down and exit. * * Typically, this handler would be used for SIGTERM, but some processes use - * other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL - * writer and the logical replication parallel apply worker exits on either - * SIGINT or SIGTERM. + * other signals. In particular, the checkpointer and parallel apply worker + * exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM. * * ShutdownRequestPending should be checked at a convenient place within the * main loop, or else the main loop should call HandleMainLoopInterrupts. diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/postmaster.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/postmaster.c index 7c903229ec6..f5fc2232f41 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/postmaster.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/postmaster/postmaster.c @@ -290,12 +290,13 @@ static __thread bool FatalError = false; /* T if recovering from backend crash * * * When the startup process is ready to start archive recovery, it signals the * postmaster, and we switch to PM_RECOVERY state. The background writer and - * checkpointer are launched, while the startup process continues applying WAL. - * If Hot Standby is enabled, then, after reaching a consistent point in WAL - * redo, startup process signals us again, and we switch to PM_HOT_STANDBY - * state and begin accepting connections to perform read-only queries. When - * archive recovery is finished, the startup process exits with exit code 0 - * and we switch to PM_RUN state. + * checkpointer are already running (as these are launched during PM_STARTUP), + * and the startup process continues applying WAL. If Hot Standby is enabled, + * then, after reaching a consistent point in WAL redo, startup process + * signals us again, and we switch to PM_HOT_STANDBY state and begin accepting + * connections to perform read-only queries. When archive recovery is + * finished, the startup process exits with exit code 0 and we switch to + * PM_RUN state. * * Normal child backends can only be launched when we are in PM_RUN or * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.) @@ -956,7 +957,9 @@ PostmasterMain(int argc, char *argv[]) /* For debugging: display postmaster environment */ { +#if !defined(WIN32) || defined(_MSC_VER) extern char **environ; +#endif char **p; ereport(DEBUG3, @@ -1959,6 +1962,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ProtocolVersion proto; MemoryContext oldcontext; +retry: pq_startmsgread(); /* @@ -2088,7 +2092,16 @@ retry1: * another SSL negotiation request, and a GSS request should only * follow if SSL was rejected (client may negotiate in either order) */ - return ProcessStartupPacket(port, true, SSLok == 'S'); + ssl_done = true; + if (SSLok == 'S') + { + /* + * We are done with SSL and negotiated correctly, so consider the + * same for GSS. + */ + gss_done = true; + } + goto retry; } else if (proto == NEGOTIATE_GSS_CODE && !gss_done) { @@ -2132,7 +2145,16 @@ retry1: * another GSS negotiation request, and an SSL request should only * follow if GSS was rejected (client may negotiate in either order) */ - return ProcessStartupPacket(port, GSSok == 'G', true); + gss_done = true; + if (GSSok == 'G') + { + /* + * We are done with GSS and negotiated correctly, so consider the + * same for SSL. + */ + ssl_done = true; + } + goto retry; } /* Could add additional special packet types here */ @@ -3026,29 +3048,13 @@ process_pm_child_exit(void) } /* - * Unexpected exit of startup process (including FATAL exit) - * during PM_STARTUP is treated as catastrophic. There are no - * other processes running yet, so we can just exit. - */ - if (pmState == PM_STARTUP && - StartupStatus != STARTUP_SIGNALED && - !EXIT_STATUS_0(exitstatus)) - { - LogChildExit(LOG, _("startup process"), - pid, exitstatus); - ereport(LOG, - (errmsg("aborting startup due to startup process failure"))); - ExitPostmaster(1); - } - - /* - * After PM_STARTUP, any unexpected exit (including FATAL exit) of - * the startup process is catastrophic, so kill other children, - * and set StartupStatus so we don't try to reinitialize after - * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED, - * then we previously sent the startup process a SIGQUIT; so - * that's probably the reason it died, and we do want to try to - * restart in that case. + * Any unexpected exit (including FATAL exit) of the startup + * process is catastrophic, so kill other children, and set + * StartupStatus so we don't try to reinitialize after they're + * gone. Exception: if StartupStatus is STARTUP_SIGNALED, then we + * previously sent the startup process a SIGQUIT; so that's + * probably the reason it died, and we do want to try to restart + * in that case. * * This stanza also handles the case where we sent a SIGQUIT * during PM_STARTUP due to some dead_end child crashing: in that @@ -3626,7 +3632,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) FatalError = true; /* We now transit into a state of waiting for children to die */ - if (pmState == PM_RECOVERY || + if (pmState == PM_STARTUP || + pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_RUN || pmState == PM_STOP_BACKENDS || diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_color.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_color.c index 8ae788f5195..1587f452ea3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_color.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_color.c @@ -218,6 +218,7 @@ newcolor(struct colormap *cm) n = cm->ncds * 2; if (n > MAX_COLOR + 1) n = MAX_COLOR + 1; + /* the MAX_COLOR+1 limit ensures these alloc sizes can't overflow: */ if (cm->cd == cm->cdspace) { newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc)); @@ -434,9 +435,8 @@ newhicolorrow(struct colormap *cm, CERR(REG_ESPACE); return 0; } - newarray = (color *) REALLOC(cm->hicolormap, - cm->maxarrayrows * 2 * - cm->hiarraycols * sizeof(color)); + newarray = REALLOC_ARRAY(cm->hicolormap, color, + cm->maxarrayrows * 2 * cm->hiarraycols); if (newarray == NULL) { CERR(REG_ESPACE); @@ -477,9 +477,8 @@ newhicolorcols(struct colormap *cm) CERR(REG_ESPACE); return; } - newarray = (color *) REALLOC(cm->hicolormap, - cm->maxarrayrows * - cm->hiarraycols * 2 * sizeof(color)); + newarray = REALLOC_ARRAY(cm->hicolormap, color, + cm->maxarrayrows * cm->hiarraycols * 2); if (newarray == NULL) { CERR(REG_ESPACE); @@ -652,8 +651,7 @@ subcoloronechr(struct vars *v, * Potentially, we could need two more colormapranges than we have now, if * the given chr is in the middle of some existing range. */ - newranges = (colormaprange *) - MALLOC((cm->numcmranges + 2) * sizeof(colormaprange)); + newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges + 2); if (newranges == NULL) { CERR(REG_ESPACE); @@ -766,8 +764,7 @@ subcoloronerange(struct vars *v, * Potentially, if we have N non-adjacent ranges, we could need as many as * 2N+1 result ranges (consider case where new range spans 'em all). */ - newranges = (colormaprange *) - MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange)); + newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges * 2 + 1); if (newranges == NULL) { CERR(REG_ESPACE); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_cvec.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_cvec.c index 10306215596..8dbcf3c55e3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_cvec.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_cvec.c @@ -40,6 +40,9 @@ /* * newcvec - allocate a new cvec + * + * Note: in current usage, nchrs and nranges are never so large that we risk + * integer overflow in these size calculations, even with 32-bit size_t. */ static struct cvec * newcvec(int nchrs, /* to hold this many chrs... */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_nfa.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_nfa.c index bad9a8b36a1..37d57d4ec55 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_nfa.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regc_nfa.c @@ -3466,6 +3466,10 @@ compact(struct nfa *nfa, assert(!NISERR()); + /* + * The REG_MAX_COMPILE_SPACE restriction ensures that integer overflow + * can't occur in this loop nor in the allocation requests below. + */ nstates = 0; narcs = 0; for (s = nfa->states; s != NULL; s = s->next) @@ -3518,6 +3522,12 @@ compact(struct nfa *nfa, case LACON: assert(s->no != cnfa->pre); assert(a->co >= 0); + /* make sure the modified color number will fit */ + if (a->co > MAX_COLOR - cnfa->ncolors) + { + NERR(REG_ECOLORS); + return; + } ca->co = (color) (cnfa->ncolors + a->co); ca->to = a->to->no; ca++; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regcomp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regcomp.c index 15b264e50f1..4b8eed1b384 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regcomp.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regcomp.c @@ -561,6 +561,7 @@ moresubs(struct vars *v, assert(wanted > 0 && (size_t) wanted >= v->nsubs); n = (size_t) wanted * 3 / 2 + 1; + /* n is bounded by the number of states, so no chance of overflow here */ if (v->subs == v->sub10) { p = (struct subre **) MALLOC(n * sizeof(struct subre *)); @@ -2405,8 +2406,8 @@ newlacon(struct vars *v, else { n = v->nlacons; - newlacons = (struct subre *) REALLOC(v->lacons, - (n + 1) * sizeof(struct subre)); + /* better use REALLOC_ARRAY here, as struct subre is big */ + newlacons = REALLOC_ARRAY(v->lacons, struct subre, n + 1); } if (newlacons == NULL) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/rege_dfa.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/rege_dfa.c index 1f8f2ab1441..5b57fed60a9 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/rege_dfa.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/rege_dfa.c @@ -640,20 +640,29 @@ newdfa(struct vars *v, } else { + /* + * Restrict the ranges of nstates and ncolors enough that the arrays + * we allocate here have no more than INT_MAX members. This protects + * not only the allocation calculations just below, but later indexing + * into these arrays. + */ + if (wordsper >= INT_MAX / (nss + WORK) || + cnfa->ncolors >= INT_MAX / nss) + { + ERR(REG_ETOOBIG); + return NULL; + } d = (struct dfa *) MALLOC(sizeof(struct dfa)); if (d == NULL) { ERR(REG_ESPACE); return NULL; } - d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * - sizeof(unsigned)); + d->ssets = MALLOC_ARRAY(struct sset, nss); + d->statesarea = MALLOC_ARRAY(unsigned, (nss + WORK) * wordsper); d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); + d->outsarea = MALLOC_ARRAY(struct sset *, nss * cnfa->ncolors); + d->incarea = MALLOC_ARRAY(struct arcp, nss * cnfa->ncolors); d->ismalloced = true; d->arraysmalloced = true; /* now freedfa() will behave sanely */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regexec.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regexec.c index 2a1d5bebda3..665aa31bd03 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regexec.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/regex/regexec.c @@ -231,7 +231,7 @@ pg_regexec(regex_t *re, if (v->nmatch <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t)); + v->pmatch = MALLOC_ARRAY(regmatch_t, v->nmatch); if (v->pmatch == NULL) return REG_ESPACE; zapallsubs(v->pmatch, v->nmatch); @@ -265,6 +265,7 @@ pg_regexec(regex_t *re, v->subdfas = subdfas; else { + /* ntree is surely less than the number of states, so this is safe: */ v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->subdfas == NULL) { @@ -279,6 +280,7 @@ pg_regexec(regex_t *re, n = (size_t) v->g->nlacons; if (n > 0) { + /* nlacons is surely less than the number of arcs, so this is safe: */ v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->ladfas == NULL) { @@ -1163,7 +1165,7 @@ citerdissect(struct vars *v, max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; - endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + endpts = MALLOC_ARRAY(chr *, max_matches + 1); if (endpts == NULL) return REG_ESPACE; endpts[0] = begin; @@ -1370,7 +1372,7 @@ creviterdissect(struct vars *v, max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; - endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + endpts = MALLOC_ARRAY(chr *, max_matches + 1); if (endpts == NULL) return REG_ESPACE; endpts[0] = begin; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/applyparallelworker.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/applyparallelworker.c index 889e5f1edf3..e8d15199d92 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/applyparallelworker.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/applyparallelworker.c @@ -814,6 +814,15 @@ LogicalParallelApplyLoop(shm_mq_handle *mqh) if (rc & WL_LATCH_SET) ResetLatch(MyLatch); + + /* + * Force stats reporting to avoid long delays. There can be long + * idle gaps before the leader assigns the next transaction, and + * the only opportunity to report stats during such gaps is + * here. + */ + if ((rc & WL_TIMEOUT) && !IsTransactionState()) + pgstat_report_stat(true); } } else @@ -872,10 +881,17 @@ ParallelApplyWorkerMain(Datum main_arg) InitializingApplyWorker = true; - /* Setup signal handling. */ + /* + * Setup signal handling. + * + * Note: We intentionally used SIGUSR2 to trigger a graceful shutdown + * initiated by the leader apply worker. This helps to differentiate it + * from the case where we abort the current transaction and exit on + * receiving SIGTERM. + */ pqsignal(SIGHUP, SignalHandlerForConfigReload); - pqsignal(SIGINT, SignalHandlerForShutdownRequest); pqsignal(SIGTERM, die); + pqsignal(SIGUSR2, SignalHandlerForShutdownRequest); BackgroundWorkerUnblockSignals(); /* @@ -974,9 +990,9 @@ ParallelApplyWorkerMain(Datum main_arg) /* * The parallel apply worker must not get here because the parallel apply - * worker will only stop when it receives a SIGTERM or SIGINT from the - * leader, or when there is an error. None of these cases will allow the - * code to reach here. + * worker will only stop when it receives a SIGTERM or SIGUSR2 from the + * leader, or SIGINT from itself, or when there is an error. None of these + * cases will allow the code to reach here. */ Assert(false); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/decode.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/decode.c index db8b2c230c5..a591bf08e1f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/decode.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/decode.c @@ -523,20 +523,13 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) /* * Inplace updates are only ever performed on catalog tuples and * can, per definition, not change tuple visibility. Since we - * don't decode catalog tuples, we're not interested in the + * also don't decode catalog tuples, we're not interested in the * record's contents. - * - * In-place updates can be used either by XID-bearing transactions - * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less - * transactions (e.g. VACUUM). In the former case, the commit - * record will include cache invalidations, so we mark the - * transaction as catalog modifying here. Currently that's - * redundant because the commit will do that as well, but once we - * support decoding in-progress relations, this will be important. */ if (!TransactionIdIsValid(xid)) break; + /* PostgreSQL 13 was the last to need these actions. */ (void) SnapBuildProcessChange(builder, xid, buf->origptr); ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); break; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/launcher.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/launcher.c index 887d59b1324..20407aabe40 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/launcher.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/launcher.c @@ -624,7 +624,7 @@ logicalrep_worker_stop(Oid subid, Oid relid) /* * Stop the given logical replication parallel apply worker. * - * Node that the function sends SIGINT instead of SIGTERM to the parallel apply + * Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply * worker so that the worker exits cleanly. */ void @@ -662,7 +662,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo) * Only stop the worker if the generation matches and the worker is alive. */ if (worker->generation == generation && worker->proc) - logicalrep_worker_stop_internal(worker, SIGINT); + logicalrep_worker_stop_internal(worker, SIGUSR2); LWLockRelease(LogicalRepWorkerLock); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/logical.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/logical.c index 6e1879d8149..f4ed7b0b93f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/logical.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/logical.c @@ -404,11 +404,11 @@ CreateInitDecodingContext(const char *plugin, * without further interlock its return value might immediately be out of * date. * - * So we have to acquire the ProcArrayLock to prevent computation of new - * xmin horizons by other backends, get the safe decoding xid, and inform - * the slot machinery about the new limit. Once that's done the - * ProcArrayLock can be released as the slot machinery now is - * protecting against vacuum. + * So we have to acquire both the ReplicationSlotControlLock and the + * ProcArrayLock to prevent concurrent computation and update of new xmin + * horizons by other backends, get the safe decoding xid, and inform the + * slot machinery about the new limit. Once that's done both locks can be + * released as the slot machinery now is protecting against vacuum. * * Note that, temporarily, the data, not just the catalog, xmin has to be * reserved if a data snapshot is to be exported. Otherwise the initial @@ -421,6 +421,7 @@ CreateInitDecodingContext(const char *plugin, * * ---- */ + LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot); @@ -435,6 +436,7 @@ CreateInitDecodingContext(const char *plugin, ReplicationSlotsComputeRequiredXmin(true); LWLockRelease(ProcArrayLock); + LWLockRelease(ReplicationSlotControlLock); ReplicationSlotMarkDirty(); ReplicationSlotSave(); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/tablesync.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/tablesync.c index 784c5b89110..1760a3e11fe 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/tablesync.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/tablesync.c @@ -1411,12 +1411,26 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) MyLogicalRepWorker->relstate_lsn = InvalidXLogRecPtr; SpinLockRelease(&MyLogicalRepWorker->relmutex); - /* Update the state and make it visible to others. */ + /* + * Update the state, create the replication origin, and make them visible + * to others. + */ StartTransactionCommand(); UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, MyLogicalRepWorker->relstate, MyLogicalRepWorker->relstate_lsn); + + /* + * Create the replication origin in a separate transaction from the one + * that sets up the origin in shared memory. This prevents the risk that + * changes to the origin in shared memory cannot be rolled back if the + * transaction aborts. + */ + originid = replorigin_by_name(originname, true); + if (!OidIsValid(originid)) + originid = replorigin_create(originname); + CommitTransactionCommand(); pgstat_report_stat(true); @@ -1455,37 +1469,21 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) CRS_USE_SNAPSHOT, origin_startpos); /* - * Setup replication origin tracking. The purpose of doing this before the - * copy is to avoid doing the copy again due to any error in setting up - * origin tracking. + * Advance the origin to the LSN got from walrcv_create_slot and then set + * up the origin. The advancement is WAL logged for the purpose of + * recovery. Locks are to prevent the replication origin from vanishing + * while advancing. + * + * The purpose of doing these before the copy is to avoid doing the copy + * again due to any error in advancing or setting up origin tracking. */ - originid = replorigin_by_name(originname, true); - if (!OidIsValid(originid)) - { - /* - * Origin tracking does not exist, so create it now. - * - * Then advance to the LSN got from walrcv_create_slot. This is WAL - * logged for the purpose of recovery. Locks are to prevent the - * replication origin from vanishing while advancing. - */ - originid = replorigin_create(originname); - - LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); - replorigin_advance(originid, *origin_startpos, InvalidXLogRecPtr, - true /* go backward */ , true /* WAL log */ ); - UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); + LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); + replorigin_advance(originid, *origin_startpos, InvalidXLogRecPtr, + true /* go backward */ , true /* WAL log */ ); + UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); - replorigin_session_setup(originid, 0); - replorigin_session_origin = originid; - } - else - { - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("replication origin \"%s\" already exists", - originname))); - } + replorigin_session_setup(originid, 0); + replorigin_session_origin = originid; /* * Make sure that the copy command runs as the table owner, unless the diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/worker.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/worker.c index 199f10da7fe..1aab5a776d4 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/worker.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/logical/worker.c @@ -4492,6 +4492,13 @@ InitializeApplyWorker(void) StartTransactionCommand(); oldctx = MemoryContextSwitchTo(ApplyContext); + /* + * Lock the subscription to prevent it from being concurrently dropped, + * then re-verify its existence. After the initialization, the worker will + * be terminated gracefully if the subscription is dropped. + */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, 0, + AccessShareLock); MySubscription = GetSubscription(MyLogicalRepWorker->subid, true); if (!MySubscription) { @@ -4537,6 +4544,23 @@ InitializeApplyWorker(void) MySubscription->name))); CommitTransactionCommand(); + + /* + * Register a callback to reset the origin state before aborting any + * pending transaction during shutdown (see ShutdownPostgres()). This will + * avoid origin advancement for an incomplete transaction which could + * otherwise lead to its loss as such a transaction won't be sent by the + * server again. + * + * Note that even a LOG or DEBUG statement placed after setting the origin + * state may process a shutdown signal before committing the current apply + * operation. So, it is important to register such a callback here. + * + * Register this callback here to ensure that all types of logical + * replication workers that set up origins and apply remote transactions + * are protected. + */ + before_shmem_exit(replorigin_reset, (Datum) 0); } /* Logical Replication Apply worker entry point */ @@ -4574,19 +4598,6 @@ ApplyWorkerMain(Datum main_arg) InitializeApplyWorker(); - /* - * Register a callback to reset the origin state before aborting any - * pending transaction during shutdown (see ShutdownPostgres()). This will - * avoid origin advancement for an in-complete transaction which could - * otherwise lead to its loss as such a transaction won't be sent by the - * server again. - * - * Note that even a LOG or DEBUG statement placed after setting the origin - * state may process a shutdown signal before committing the current apply - * operation. So, it is important to register such a callback here. - */ - before_shmem_exit(replorigin_reset, (Datum) 0); - InitializingApplyWorker = false; /* Connect to the origin and start the replication. */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/slot.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/slot.c index f35ee6b2c77..3c058c2c19d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/slot.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/slot.c @@ -190,31 +190,62 @@ ReplicationSlotShmemExit(int code, Datum arg) /* * Check whether the passed slot name is valid and report errors at elevel. * + * See comments for ReplicationSlotValidateNameInternal(). + */ +bool +ReplicationSlotValidateName(const char *name, int elevel) +{ + int err_code; + char *err_msg = NULL; + char *err_hint = NULL; + + if (!ReplicationSlotValidateNameInternal(name, &err_code, &err_msg, + &err_hint)) + { + ereport(elevel, + errcode(err_code), + errmsg_internal("%s", err_msg), + (err_hint != NULL) ? errhint("%s", err_hint) : 0); + + pfree(err_msg); + if (err_hint != NULL) + pfree(err_hint); + return false; + } + + return true; +} + +/* + * Check whether the passed slot name is valid. + * * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow * the name to be used as a directory name on every supported OS. * - * Returns whether the directory name is valid or not if elevel < ERROR. + * Returns true if the slot name is valid. Otherwise, returns false and stores + * the error code, error message, and optional hint in err_code, err_msg, and + * err_hint, respectively. The caller is responsible for freeing err_msg and + * err_hint, which are palloc'd. */ bool -ReplicationSlotValidateName(const char *name, int elevel) +ReplicationSlotValidateNameInternal(const char *name, int *err_code, + char **err_msg, char **err_hint) { const char *cp; if (strlen(name) == 0) { - ereport(elevel, - (errcode(ERRCODE_INVALID_NAME), - errmsg("replication slot name \"%s\" is too short", - name))); + *err_code = ERRCODE_INVALID_NAME; + *err_msg = psprintf(_("replication slot name \"%s\" is too short"), name); + *err_hint = NULL; return false; } if (strlen(name) >= NAMEDATALEN) { - ereport(elevel, - (errcode(ERRCODE_NAME_TOO_LONG), - errmsg("replication slot name \"%s\" is too long", - name))); + *err_code = ERRCODE_NAME_TOO_LONG; + *err_msg = psprintf(_("replication slot name \"%s\" is too long"), name); + *err_hint = NULL; return false; } @@ -224,11 +255,9 @@ ReplicationSlotValidateName(const char *name, int elevel) || (*cp >= '0' && *cp <= '9') || (*cp == '_'))) { - ereport(elevel, - (errcode(ERRCODE_INVALID_NAME), - errmsg("replication slot name \"%s\" contains invalid character", - name), - errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character."))); + *err_code = ERRCODE_INVALID_NAME; + *err_msg = psprintf(_("replication slot name \"%s\" contains invalid character"), name); + *err_hint = psprintf(_("Replication slot names may only contain lower case letters, numbers, and the underscore character.")); return false; } } @@ -496,7 +525,7 @@ retry: SpinLockRelease(&s->mutex); } else - active_pid = MyProcPid; + s->active_pid = active_pid = MyProcPid; LWLockRelease(ReplicationSlotControlLock); /* @@ -829,8 +858,11 @@ ReplicationSlotPersist(void) /* * Compute the oldest xmin across all slots and store it in the ProcArray. * - * If already_locked is true, ProcArrayLock has already been acquired - * exclusively. + * If already_locked is true, both the ReplicationSlotControlLock and the + * ProcArrayLock have already been acquired exclusively. It is crucial that the + * caller first acquires the ReplicationSlotControlLock, followed by the + * ProcArrayLock, to prevent any undetectable deadlocks since this function + * acquires them in that order. */ void ReplicationSlotsComputeRequiredXmin(bool already_locked) @@ -840,8 +872,33 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked) TransactionId agg_catalog_xmin = InvalidTransactionId; Assert(ReplicationSlotCtl != NULL); + Assert(!already_locked || + (LWLockHeldByMeInMode(ReplicationSlotControlLock, LW_EXCLUSIVE) && + LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE))); - LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + /* + * Hold the ReplicationSlotControlLock until after updating the slot xmin + * values, so no backend updates the initial xmin for newly created slot + * concurrently. A shared lock is used here to minimize lock contention, + * especially when many slots exist and advancements occur frequently. + * This is safe since an exclusive lock is taken during initial slot xmin + * update in slot creation. + * + * One might think that we can hold the ProcArrayLock exclusively and + * update the slot xmin values, but it could increase lock contention on + * the ProcArrayLock, which is not great since this function can be called + * at non-negligible frequency. + * + * Concurrent invocation of this function may cause the computed slot xmin + * to regress. However, this is harmless because tuples prior to the most + * recent xmin are no longer useful once advancement occurs (see + * LogicalConfirmReceivedLocation where the slot's xmin value is flushed + * before updating the effective_xmin). Thus, such regression merely + * prevents VACUUM from prematurely removing tuples without causing the + * early deletion of required data. + */ + if (!already_locked) + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { @@ -876,9 +933,10 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked) agg_catalog_xmin = effective_catalog_xmin; } - LWLockRelease(ReplicationSlotControlLock); - ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked); + + if (!already_locked) + LWLockRelease(ReplicationSlotControlLock); } /* @@ -1174,62 +1232,65 @@ void ReplicationSlotReserveWal(void) { ReplicationSlot *slot = MyReplicationSlot; + XLogSegNo segno; + XLogRecPtr restart_lsn; Assert(slot != NULL); Assert(slot->data.restart_lsn == InvalidXLogRecPtr); /* - * The replication slot mechanism is used to prevent removal of required - * WAL. As there is no interlock between this routine and checkpoints, WAL - * segments could concurrently be removed when a now stale return value of - * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that - * this happens we'll just retry. + * The replication slot mechanism is used to prevent the removal of + * required WAL. + * + * Acquire an exclusive lock to prevent the checkpoint process from + * concurrently computing the minimum slot LSN (see the call to + * XLogGetReplicationSlotMinimumLSN in CreateCheckPoint). This ensures + * that the WAL reserved for replication cannot be removed during a + * checkpoint. + * + * The mechanism is reliable because if WAL reservation occurs first, the + * checkpoint must wait for the restart_lsn update before determining the + * minimum non-removable LSN. On the other hand, if the checkpoint happens + * first, subsequent WAL reservations will select positions at or beyond + * the redo pointer of that checkpoint. */ - while (true) - { - XLogSegNo segno; - XLogRecPtr restart_lsn; + LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE); - /* - * For logical slots log a standby snapshot and start logical decoding - * at exactly that position. That allows the slot to start up more - * quickly. But on a standby we cannot do WAL writes, so just use the - * replay pointer; effectively, an attempt to create a logical slot on - * standby will cause it to wait for an xl_running_xact record to be - * logged independently on the primary, so that a snapshot can be - * built using the record. - * - * None of this is needed (or indeed helpful) for physical slots as - * they'll start replay at the last logged checkpoint anyway. Instead - * return the location of the last redo LSN. While that slightly - * increases the chance that we have to retry, it's where a base - * backup has to start replay at. - */ - if (SlotIsPhysical(slot)) - restart_lsn = GetRedoRecPtr(); - else if (RecoveryInProgress()) - restart_lsn = GetXLogReplayRecPtr(NULL); - else - restart_lsn = GetXLogInsertRecPtr(); + /* + * For logical slots log a standby snapshot and start logical decoding at + * exactly that position. That allows the slot to start up more quickly. + * But on a standby we cannot do WAL writes, so just use the replay + * pointer; effectively, an attempt to create a logical slot on standby + * will cause it to wait for an xl_running_xact record to be logged + * independently on the primary, so that a snapshot can be built using the + * record. + * + * None of this is needed (or indeed helpful) for physical slots as + * they'll start replay at the last logged checkpoint anyway. Instead, + * return the location of the last redo LSN, where a base backup has to + * start replay at. + */ + if (SlotIsPhysical(slot)) + restart_lsn = GetRedoRecPtr(); + else if (RecoveryInProgress()) + restart_lsn = GetXLogReplayRecPtr(NULL); + else + restart_lsn = GetXLogInsertRecPtr(); - SpinLockAcquire(&slot->mutex); - slot->data.restart_lsn = restart_lsn; - SpinLockRelease(&slot->mutex); + SpinLockAcquire(&slot->mutex); + slot->data.restart_lsn = restart_lsn; + SpinLockRelease(&slot->mutex); - /* prevent WAL removal as fast as possible */ - ReplicationSlotsComputeRequiredLSN(); + /* prevent WAL removal as fast as possible */ + ReplicationSlotsComputeRequiredLSN(); - /* - * If all required WAL is still there, great, otherwise retry. The - * slot should prevent further removal of WAL, unless there's a - * concurrent ReplicationSlotsComputeRequiredLSN() after we've written - * the new restart_lsn above, so normally we should never need to loop - * more than twice. - */ - XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size); - if (XLogGetLastRemovedSegno() < segno) - break; - } + /* Checkpoint shouldn't remove the required WAL. */ + XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size); + if (XLogGetLastRemovedSegno() >= segno) + elog(ERROR, "WAL required by replication slot %s has been removed concurrently", + NameStr(slot->data.name)); + + LWLockRelease(ReplicationSlotAllocationLock); if (!RecoveryInProgress() && SlotIsLogical(slot)) { @@ -1321,11 +1382,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, { int last_signaled_pid = 0; bool released_lock = false; - bool terminated = false; - TransactionId initial_effective_xmin = InvalidTransactionId; - TransactionId initial_catalog_effective_xmin = InvalidTransactionId; - XLogRecPtr initial_restart_lsn = InvalidXLogRecPtr; - ReplicationSlotInvalidationCause conflict_prev PG_USED_FOR_ASSERTS_ONLY = RS_INVAL_NONE; for (;;) { @@ -1360,24 +1416,11 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, */ if (s->data.invalidated == RS_INVAL_NONE) { - /* - * The slot's mutex will be released soon, and it is possible that - * those values change since the process holding the slot has been - * terminated (if any), so record them here to ensure that we - * would report the correct conflict cause. - */ - if (!terminated) - { - initial_restart_lsn = s->data.restart_lsn; - initial_effective_xmin = s->effective_xmin; - initial_catalog_effective_xmin = s->effective_catalog_xmin; - } - switch (cause) { case RS_INVAL_WAL_REMOVED: - if (initial_restart_lsn != InvalidXLogRecPtr && - initial_restart_lsn < oldestLSN) + if (s->data.restart_lsn != InvalidXLogRecPtr && + s->data.restart_lsn < oldestLSN) conflict = cause; break; case RS_INVAL_HORIZON: @@ -1386,12 +1429,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, /* invalid DB oid signals a shared relation */ if (dboid != InvalidOid && dboid != s->data.database) break; - if (TransactionIdIsValid(initial_effective_xmin) && - TransactionIdPrecedesOrEquals(initial_effective_xmin, + if (TransactionIdIsValid(s->effective_xmin) && + TransactionIdPrecedesOrEquals(s->effective_xmin, snapshotConflictHorizon)) conflict = cause; - else if (TransactionIdIsValid(initial_catalog_effective_xmin) && - TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin, + else if (TransactionIdIsValid(s->effective_catalog_xmin) && + TransactionIdPrecedesOrEquals(s->effective_catalog_xmin, snapshotConflictHorizon)) conflict = cause; break; @@ -1404,13 +1447,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, } } - /* - * The conflict cause recorded previously should not change while the - * process owning the slot (if any) has been terminated. - */ - Assert(!(conflict_prev != RS_INVAL_NONE && terminated && - conflict_prev != conflict)); - /* if there's no conflict, we're done */ if (conflict == RS_INVAL_NONE) { @@ -1485,8 +1521,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, (void) kill(active_pid, SIGTERM); last_signaled_pid = active_pid; - terminated = true; - conflict_prev = conflict; } /* Wait until the slot is released. */ @@ -1497,6 +1531,14 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, * Re-acquire lock and start over; we expect to invalidate the * slot next time (unless another process acquires the slot in the * meantime). + * + * Note: It is possible for a slot to advance its restart_lsn or + * xmin values sufficiently between when we release the mutex and + * when we recheck, moving from a conflicting state to a non + * conflicting state. This is intentional and safe: if the slot + * has caught up while we're busy here, the resources we were + * concerned about (WAL segments or tuples) have not yet been + * removed, and there's no reason to invalidate the slot. */ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); continue; @@ -1830,6 +1872,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) pgstat_report_wait_end(); CloseTransientFile(fd); + unlink(tmppath); LWLockRelease(&slot->io_in_progress_lock); /* if write didn't set errno, assume problem is no disk space */ @@ -1850,7 +1893,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) pgstat_report_wait_end(); CloseTransientFile(fd); + unlink(tmppath); LWLockRelease(&slot->io_in_progress_lock); + errno = save_errno; ereport(elevel, (errcode_for_file_access(), @@ -1864,7 +1909,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) { int save_errno = errno; + unlink(tmppath); LWLockRelease(&slot->io_in_progress_lock); + errno = save_errno; ereport(elevel, (errcode_for_file_access(), @@ -1878,7 +1925,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) { int save_errno = errno; + unlink(tmppath); LWLockRelease(&slot->io_in_progress_lock); + errno = save_errno; ereport(elevel, (errcode_for_file_access(), diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/walsender.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/walsender.c index 62dd5559342..c96ba89446a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/walsender.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/replication/walsender.c @@ -218,6 +218,20 @@ typedef struct int write_head; int read_heads[NUM_SYNC_REP_WAIT_MODE]; WalTimeSample last_read[NUM_SYNC_REP_WAIT_MODE]; + + /* + * Overflow entries for read heads that collide with the write head. + * + * When the cyclic buffer fills (write head is about to collide with a + * read head), we save that read head's current sample here and mark it as + * using overflow (read_heads[i] = -1). This allows the write head to + * continue advancing while the overflowed mode continues lag computation + * using the saved sample. + * + * Once the standby's reported LSN advances past the overflow entry's LSN, + * we transition back to normal buffer-based tracking. + */ + WalTimeSample overflowed[NUM_SYNC_REP_WAIT_MODE]; } LagTracker; static __thread LagTracker *lag_tracker; @@ -1585,9 +1599,15 @@ WalSndWaitForWal(XLogRecPtr loc) * If we're shutting down, trigger pending WAL to be written out, * otherwise we'd possibly end up waiting for WAL that never gets * written, because walwriter has shut down already. + * + * Note that GetXLogInsertEndRecPtr() is used to obtain the WAL flush + * request location instead of GetXLogInsertRecPtr(). Because if the + * last WAL record ends at a page boundary, GetXLogInsertRecPtr() can + * return an LSN pointing past the page header, which may cause + * XLogFlush() to report an error. */ - if (got_STOPPING) - XLogBackgroundFlush(); + if (got_STOPPING && !RecoveryInProgress()) + XLogFlush(GetXLogInsertEndRecPtr()); /* Update our idea of the currently flushed position. */ if (!RecoveryInProgress()) @@ -2095,7 +2115,9 @@ ProcessStandbyReplyMessage(void) TimestampTz now; TimestampTz replyTime; - static __thread bool fullyAppliedLastTime = false; + static __thread XLogRecPtr prevWritePtr = InvalidXLogRecPtr; + static __thread XLogRecPtr prevFlushPtr = InvalidXLogRecPtr; + static __thread XLogRecPtr prevApplyPtr = InvalidXLogRecPtr; /* the caller already consumed the msgtype byte */ writePtr = pq_getmsgint64(&reply_message); @@ -2128,22 +2150,23 @@ ProcessStandbyReplyMessage(void) applyLag = LagTrackerRead(SYNC_REP_WAIT_APPLY, applyPtr, now); /* - * If the standby reports that it has fully replayed the WAL in two - * consecutive reply messages, then the second such message must result - * from wal_receiver_status_interval expiring on the standby. This is a - * convenient time to forget the lag times measured when it last - * wrote/flushed/applied a WAL record, to avoid displaying stale lag data - * until more WAL traffic arrives. + * If the standby reports that it has fully replayed the WAL, and the + * write/flush/apply positions remain unchanged across two consecutive + * reply messages, forget the lag times measured when it last + * wrote/flushed/applied a WAL record. + * + * The second message with unchanged positions typically results from + * wal_receiver_status_interval expiring on the standby, so lag values are + * usually cleared after that interval when there is no activity. This + * avoids displaying stale lag data until more WAL traffic arrives. */ - clearLagTimes = false; - if (applyPtr == sentPtr) - { - if (fullyAppliedLastTime) - clearLagTimes = true; - fullyAppliedLastTime = true; - } - else - fullyAppliedLastTime = false; + clearLagTimes = (applyPtr == sentPtr && flushPtr == sentPtr && + writePtr == prevWritePtr && flushPtr == prevFlushPtr && + applyPtr == prevApplyPtr); + + prevWritePtr = writePtr; + prevFlushPtr = flushPtr; + prevApplyPtr = applyPtr; /* Send a reply if the standby requested one. */ if (replyRequested) @@ -3797,7 +3820,6 @@ WalSndKeepaliveIfNecessary(void) static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time) { - bool buffer_full; int new_write_head; int i; @@ -3819,25 +3841,19 @@ LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time) * of space. */ new_write_head = (lag_tracker->write_head + 1) % LAG_TRACKER_BUFFER_SIZE; - buffer_full = false; for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; ++i) { + /* + * If the buffer is full, move the slowest reader to a separate + * overflow entry and free its space in the buffer so the write head + * can advance. + */ if (new_write_head == lag_tracker->read_heads[i]) - buffer_full = true; - } - - /* - * If the buffer is full, for now we just rewind by one slot and overwrite - * the last sample, as a simple (if somewhat uneven) way to lower the - * sampling rate. There may be better adaptive compaction algorithms. - */ - if (buffer_full) - { - new_write_head = lag_tracker->write_head; - if (lag_tracker->write_head > 0) - lag_tracker->write_head--; - else - lag_tracker->write_head = LAG_TRACKER_BUFFER_SIZE - 1; + { + lag_tracker->overflowed[i] = + lag_tracker->buffer[lag_tracker->read_heads[i]]; + lag_tracker->read_heads[i] = -1; + } } /* Store a sample at the current write head position. */ @@ -3864,6 +3880,28 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) { TimestampTz time = 0; + /* + * If 'lsn' has not passed the WAL position stored in the overflow entry, + * return the elapsed time (in microseconds) since the saved local flush + * time. If the flush time is in the future (due to clock drift), return + * -1 to treat as no valid sample. + * + * Otherwise, switch back to using the buffer to control the read head and + * compute the elapsed time. The read head is then reset to point to the + * oldest entry in the buffer. + */ + if (lag_tracker->read_heads[head] == -1) + { + if (lag_tracker->overflowed[head].lsn > lsn) + return (now >= lag_tracker->overflowed[head].time) ? + now - lag_tracker->overflowed[head].time : -1; + + time = lag_tracker->overflowed[head].time; + lag_tracker->last_read[head] = lag_tracker->overflowed[head]; + lag_tracker->read_heads[head] = + (lag_tracker->write_head + 1) % LAG_TRACKER_BUFFER_SIZE; + } + /* Read all unread samples up to this LSN or end of buffer. */ while (lag_tracker->read_heads[head] != lag_tracker->write_head && lag_tracker->buffer[lag_tracker->read_heads[head]].lsn <= lsn) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/rewrite/rewriteHandler.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/rewrite/rewriteHandler.c index 57f075de394..d02a040ccd6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/rewrite/rewriteHandler.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/rewrite/rewriteHandler.c @@ -98,6 +98,7 @@ static List *matchLocks(CmdType event, RuleLock *rulelocks, static Query *fireRIRrules(Query *parsetree, List *activeRIRs); static bool view_has_instead_trigger(Relation view, CmdType event); static Bitmapset *adjust_view_column_set(Bitmapset *cols, List *targetlist); +static List *get_generated_columns(Relation rel, int rt_index); /* @@ -592,7 +593,10 @@ rewriteRuleAction(Query *parsetree, } } - /* OK, it's safe to combine the CTE lists */ + /* + * OK, it's safe to combine the CTE lists. Beware that RewriteQuery + * knows we concatenate the lists in this order. + */ sub_action->cteList = list_concat(sub_action->cteList, copyObject(parsetree->cteList)); /* ... and don't forget about the associated flags */ @@ -637,12 +641,45 @@ rewriteRuleAction(Query *parsetree, if ((event == CMD_INSERT || event == CMD_UPDATE) && sub_action->commandType != CMD_UTILITY) { + RangeTblEntry *new_rte = rt_fetch(new_varno, sub_action->rtable); + Relation new_rel; + List *gen_cols; + + /* + * The target list does not contain entries for generated columns + * (they are removed by rewriteTargetListIU), so we must build entries + * for them here, so that new.gen_col can be rewritten correctly. + */ + new_rel = relation_open(new_rte->relid, NoLock); + gen_cols = get_generated_columns(new_rel, new_varno); + relation_close(new_rel, NoLock); + + /* + * The generated column expressions refer to new.attribute, so they + * must be rewritten before they can be used as replacements. + */ + gen_cols = (List *) + ReplaceVarsFromTargetList((Node *) gen_cols, + new_varno, + 0, + new_rte, + parsetree->targetList, + (event == CMD_UPDATE) ? + REPLACEVARS_CHANGE_VARNO : + REPLACEVARS_SUBSTITUTE_NULL, + current_varno, + &sub_action->hasSubLinks); + + /* + * Now rewrite new.attribute in sub_action, using both the target list + * and the rewritten generated column expressions. + */ sub_action = (Query *) ReplaceVarsFromTargetList((Node *) sub_action, new_varno, 0, - rt_fetch(new_varno, sub_action->rtable), - parsetree->targetList, + new_rte, + list_concat(gen_cols, parsetree->targetList), (event == CMD_UPDATE) ? REPLACEVARS_CHANGE_VARNO : REPLACEVARS_SUBSTITUTE_NULL, @@ -2340,17 +2377,48 @@ CopyAndAddInvertedQual(Query *parsetree, ChangeVarNodes(new_qual, PRS2_OLD_VARNO, rt_index, 0); /* Fix references to NEW */ if (event == CMD_INSERT || event == CMD_UPDATE) + { + RangeTblEntry *rte = rt_fetch(rt_index, parsetree->rtable); + Relation rel; + List *gen_cols; + + /* + * As in rewriteRuleAction, build entries for generated columns so + * that new.gen_col in the rule qualification can be rewritten + * correctly. + */ + rel = relation_open(rte->relid, NoLock); + gen_cols = get_generated_columns(rel, PRS2_NEW_VARNO); + relation_close(rel, NoLock); + + /* + * The generated column expressions refer to new.attribute, so they + * must be rewritten before they can be used as replacements. + */ + gen_cols = (List *) + ReplaceVarsFromTargetList((Node *) gen_cols, + PRS2_NEW_VARNO, + 0, + rte, + parsetree->targetList, + (event == CMD_UPDATE) ? + REPLACEVARS_CHANGE_VARNO : + REPLACEVARS_SUBSTITUTE_NULL, + rt_index, + &parsetree->hasSubLinks); + new_qual = ReplaceVarsFromTargetList(new_qual, PRS2_NEW_VARNO, 0, - rt_fetch(rt_index, - parsetree->rtable), - parsetree->targetList, + rte, + list_concat(gen_cols, + parsetree->targetList), (event == CMD_UPDATE) ? REPLACEVARS_CHANGE_VARNO : REPLACEVARS_SUBSTITUTE_NULL, rt_index, &parsetree->hasSubLinks); + } /* And attach the fixed qual */ AddInvertedQual(parsetree, new_qual); @@ -3673,9 +3741,13 @@ rewriteTargetView(Query *parsetree, Relation view) * orig_rt_length is the length of the originating query's rtable, for product * queries created by fireRules(), and 0 otherwise. This is used to skip any * already-processed VALUES RTEs from the original query. + * + * num_ctes_processed is the number of CTEs at the end of the query's cteList + * that have already been rewritten, and must not be rewritten again. */ static List * -RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length) +RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length, + int num_ctes_processed) { CmdType event = parsetree->commandType; bool instead = false; @@ -3689,17 +3761,29 @@ RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length) * First, recursively process any insert/update/delete statements in WITH * clauses. (We have to do this first because the WITH clauses may get * copied into rule actions below.) + * + * Any new WITH clauses from rule actions are processed when we recurse + * into product queries below. However, when recursing, we must take care + * to avoid rewriting a CTE query more than once (because expanding + * generated columns in the targetlist more than once would fail). Since + * new CTEs from product queries are added to the start of the list (see + * rewriteRuleAction), we just skip the last num_ctes_processed items. */ foreach(lc1, parsetree->cteList) { CommonTableExpr *cte = lfirst_node(CommonTableExpr, lc1); Query *ctequery = castNode(Query, cte->ctequery); + int i = foreach_current_index(lc1); List *newstuff; + /* Skip already-processed CTEs at the end of the list */ + if (i >= list_length(parsetree->cteList) - num_ctes_processed) + break; + if (ctequery->commandType == CMD_SELECT) continue; - newstuff = RewriteQuery(ctequery, rewrite_events, 0); + newstuff = RewriteQuery(ctequery, rewrite_events, 0, 0); /* * Currently we can only handle unconditional, single-statement DO @@ -3758,6 +3842,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length) errmsg("multi-statement DO INSTEAD rules are not supported for data-modifying statements in WITH"))); } } + num_ctes_processed = list_length(parsetree->cteList); /* * If the statement is an insert, update, delete, or merge, adjust its @@ -4120,7 +4205,8 @@ RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length) newstuff = RewriteQuery(pt, rewrite_events, pt == parsetree ? orig_rt_length : - product_orig_rt_length); + product_orig_rt_length, + num_ctes_processed); rewritten = list_concat(rewritten, newstuff); } @@ -4242,6 +4328,65 @@ RewriteQuery(Query *parsetree, List *rewrite_events, int orig_rt_length) /* + * Get a table's generated columns + * + * Returns a list of TargetEntry, one for each generated column, containing + * the attribute numbers and generation expressions. + */ +static List * +get_generated_columns(Relation rel, int rt_index) +{ + List *gen_cols = NIL; + TupleDesc tupdesc; + + tupdesc = RelationGetDescr(rel); + if (tupdesc->constr && tupdesc->constr->has_generated_stored) + { + for (int i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED) + { + Node *defexpr; + TargetEntry *te; + Oid attcollid; + + defexpr = build_column_default(rel, i + 1); + if (defexpr == NULL) + elog(ERROR, "no generation expression found for column number %d of table \"%s\"", + i + 1, RelationGetRelationName(rel)); + + /* + * If the column definition has a collation and it is + * different from the collation of the generation expression, + * put a COLLATE clause around the expression. + */ + attcollid = attr->attcollation; + if (attcollid && attcollid != exprCollation(defexpr)) + { + CollateExpr *ce = makeNode(CollateExpr); + + ce->arg = (Expr *) defexpr; + ce->collOid = attcollid; + ce->location = -1; + + defexpr = (Node *) ce; + } + + ChangeVarNodes(defexpr, 1, rt_index, 0); + + te = makeTargetEntry((Expr *) defexpr, i + 1, 0, false); + gen_cols = lappend(gen_cols, te); + } + } + } + + return gen_cols; +} + + +/* * QueryRewrite - * Primary entry point to the query rewriter. * Rewrite one query via query rewrite system, possibly returning 0 @@ -4272,7 +4417,7 @@ QueryRewrite(Query *parsetree) * * Apply all non-SELECT rules possibly getting 0 or many queries */ - querylist = RewriteQuery(parsetree, NIL, 0); + querylist = RewriteQuery(parsetree, NIL, 0, 0); /* * Step 2 diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/statistics/extended_stats.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/statistics/extended_stats.c index 1e4ba8d6e16..aeba6ef896a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/statistics/extended_stats.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/statistics/extended_stats.c @@ -764,6 +764,16 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs, stats[i] = examine_attribute(expr); /* + * If the expression has been found as non-analyzable, give up. We + * will not be able to build extended stats with it. + */ + if (stats[i] == NULL) + { + pfree(stats); + return NULL; + } + + /* * XXX We need tuple descriptor later, and we just grab it from * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded * examine_attribute does not set that, so just grab it from the first @@ -2425,6 +2435,9 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs) /* * Loads pg_statistic record from expression statistics for expression * identified by the supplied index. + * + * Returns the pg_statistic record found, or NULL if there is no statistics + * data to use. */ HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx) @@ -2453,6 +2466,13 @@ statext_expressions_load(Oid stxoid, bool inh, int idx) deconstruct_expanded_array(eah); + if (eah->dnulls && eah->dnulls[idx]) + { + /* No data found for this expression, give up. */ + ReleaseSysCache(htup); + return NULL; + } + td = DatumGetHeapTupleHeader(eah->dvalues[idx]); /* Build a temporary HeapTuple control structure */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/buffer/bufmgr.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/buffer/bufmgr.c index 3202b33b10f..1eb29b03be5 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/buffer/bufmgr.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/buffer/bufmgr.c @@ -37,6 +37,9 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#ifdef USE_ASSERT_CHECKING +#include "catalog/pg_tablespace_d.h" +#endif #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "executor/instrument.h" @@ -498,6 +501,10 @@ static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, ForkNumber forkNum, bool permanent); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); +#ifdef USE_ASSERT_CHECKING +static void AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode, + void *unused_context); +#endif static int rlocator_comparator(const void *p1, const void *p2); static inline int buffertag_comparator(const BufferTag *ba, const BufferTag *bb); static inline int ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b); @@ -3225,6 +3232,66 @@ CheckForBufferLeaks(void) #endif } +#ifdef USE_ASSERT_CHECKING +/* + * Check for exclusive-locked catalog buffers. This is the core of + * AssertCouldGetRelation(). + * + * A backend would self-deadlock on LWLocks if the catalog scan read the + * exclusive-locked buffer. The main threat is exclusive-locked buffers of + * catalogs used in relcache, because a catcache search on any catalog may + * build that catalog's relcache entry. We don't have an inventory of + * catalogs relcache uses, so just check buffers of most catalogs. + * + * It's better to minimize waits while holding an exclusive buffer lock, so it + * would be nice to broaden this check not to be catalog-specific. However, + * bttextcmp() accesses pg_collation, and non-core opclasses might similarly + * read tables. That is deadlock-free as long as there's no loop in the + * dependency graph: modifying table A may cause an opclass to read table B, + * but it must not cause a read of table A. + */ +void +AssertBufferLocksPermitCatalogRead(void) +{ + ForEachLWLockHeldByMe(AssertNotCatalogBufferLock, NULL); +} + +static void +AssertNotCatalogBufferLock(LWLock *lock, LWLockMode mode, + void *unused_context) +{ + BufferDesc *bufHdr; + BufferTag tag; + Oid relid; + + if (mode != LW_EXCLUSIVE) + return; + + if (!((BufferDescPadded *) lock > BufferDescriptors && + (BufferDescPadded *) lock < BufferDescriptors + NBuffers)) + return; /* not a buffer lock */ + + bufHdr = (BufferDesc *) + ((char *) lock - offsetof(BufferDesc, content_lock)); + tag = bufHdr->tag; + + /* + * This relNumber==relid assumption holds until a catalog experiences + * VACUUM FULL or similar. After a command like that, relNumber will be + * in the normal (non-catalog) range, and we lose the ability to detect + * hazardous access to that catalog. Calling RelidByRelfilenumber() would + * close that gap, but RelidByRelfilenumber() might then deadlock with a + * held lock. + */ + relid = tag.relNumber; + + Assert(!IsCatalogRelationOid(relid)); + /* Shared rels are always catalogs: detect even after VACUUM FULL. */ + Assert(tag.spcOid != GLOBALTABLESPACE_OID); +} +#endif + + /* * Helper routine to issue warnings when a buffer is unexpectedly pinned */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/file/fd.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/file/fd.c index ec2520b80aa..5d1f045a38c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/file/fd.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/file/fd.c @@ -166,6 +166,9 @@ __thread bool data_sync_retry = false; /* How SyncDataDirectory() should do its job. */ __thread int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC; +/* How data files should be bulk-extended with zeros. */ +__thread int file_extend_method = DEFAULT_FILE_EXTEND_METHOD; + /* Which kinds of files should be opened with PG_O_DIRECT. */ __thread int io_direct_flags; @@ -519,7 +522,7 @@ retry: { int elevel; - if (rc == EINTR) + if (errno == EINTR) goto retry; /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/freespace/freespace.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/freespace/freespace.c index e27e8ca8ca4..152f3ba0ab9 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/freespace/freespace.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/freespace/freespace.c @@ -231,8 +231,18 @@ XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk, if (PageIsNew(page)) PageInit(page, BLCKSZ, 0); + /* + * Changes to FSM are usually marked as changed using MarkBufferDirtyHint; + * however, during recovery, it does nothing if checksums are enabled. It + * is assumed that the page should not be dirtied during recovery while + * modifying hints to prevent torn pages, since no new WAL data can be + * generated at this point to store FPI. This is not relevant to the FSM + * case, as its blocks are zeroed when a checksum mismatch occurs. So, we + * need to use regular MarkBufferDirty here to mark the FSM block as + * modified during recovery, otherwise changes to the FSM may be lost. + */ if (fsm_set_avail(page, slot, new_cat)) - MarkBufferDirtyHint(buf, false); + MarkBufferDirty(buf); UnlockReleaseBuffer(buf); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/ipc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/ipc.c index a53a5f11938..5a5e6473f28 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/ipc.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/ipc.c @@ -29,6 +29,7 @@ #endif #include "storage/dsm.h" #include "storage/ipc.h" +#include "storage/lwlock.h" #include "tcop/tcopprot.h" @@ -230,12 +231,18 @@ shmem_exit(int code) shmem_exit_inprogress = true; /* + * Release any LWLocks we might be holding before callbacks run. This + * prevents accessing locks in detached DSM segments and allows callbacks + * to acquire new locks. + */ + LWLockReleaseAll(); + + /* * Call before_shmem_exit callbacks. * * These should be things that need most of the system to still be up and * working, such as cleanup of temp relations, which requires catalog - * access; or things that need to be completed because later cleanup steps - * depend on them, such as releasing lwlocks. + * access. */ elog(DEBUG3, "shmem_exit(%d): %d before_shmem_exit callbacks to make", code, before_shmem_exit_index); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/shmem.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/shmem.c index 91b78321429..661c448ad36 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/shmem.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/ipc/shmem.c @@ -495,42 +495,6 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr) } -/* - * Add two Size values, checking for overflow - */ -Size -add_size(Size s1, Size s2) -{ - Size result; - - result = s1 + s2; - /* We are assuming Size is an unsigned type here... */ - if (result < s1 || result < s2) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("requested shared memory size overflows size_t"))); - return result; -} - -/* - * Multiply two Size values, checking for overflow - */ -Size -mul_size(Size s1, Size s2) -{ - Size result; - - if (s1 == 0 || s2 == 0) - return 0; - result = s1 * s2; - /* We are assuming Size is an unsigned type here... */ - if (result / s2 != s1) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("requested shared memory size overflows size_t"))); - return result; -} - /* SQL SRF showing allocated shared memory */ Datum pg_get_shmem_allocations(PG_FUNCTION_ARGS) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/lwlock.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/lwlock.c index fc61b184c73..1c0463a0543 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/lwlock.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/lwlock.c @@ -1019,7 +1019,7 @@ LWLockWakeup(LWLock *lock) else desired_state &= ~LW_FLAG_RELEASE_OK; - if (proclist_is_empty(&wakeup)) + if (proclist_is_empty(&lock->waiters)) desired_state &= ~LW_FLAG_HAS_WAITERS; desired_state &= ~LW_FLAG_LOCKED; /* release lock */ @@ -1897,6 +1897,10 @@ LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val) * unchanged by this operation. This is necessary since InterruptHoldoffCount * has been set to an appropriate level earlier in error recovery. We could * decrement it below zero if we allow it to drop for each released lock! + * + * Note that this function must be safe to call even before the LWLock + * subsystem has been initialized (e.g., during early startup failures). + * In that case, num_held_lwlocks will be 0 and we do nothing. */ void LWLockReleaseAll(void) @@ -1907,10 +1911,27 @@ LWLockReleaseAll(void) LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock); } + + Assert(num_held_lwlocks == 0); } /* + * ForEachLWLockHeldByMe - run a callback for each held lock + * + * This is meant as debug support only. + */ +void +ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *), + void *context) +{ + int i; + + for (i = 0; i < num_held_lwlocks; i++) + callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context); +} + +/* * LWLockHeldByMe - test whether my process holds a lock in any mode * * This is meant as debug support only. diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/proc.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/proc.c index 8081aa74b93..58cfa516285 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/proc.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/lmgr/proc.c @@ -1630,7 +1630,7 @@ ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus) proc->waitLock = NULL; proc->waitProcLock = NULL; proc->waitStatus = waitStatus; - pg_atomic_write_u64(&MyProc->waitStart, 0); + pg_atomic_write_u64(&proc->waitStart, 0); /* And awaken it */ SetLatch(&proc->procLatch); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/md.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/md.c index 6bc4e78396d..592ffdc32d6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/md.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/md.c @@ -577,13 +577,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * that decision should be made though? For now just use a cutoff of * 8, anything between 4 and 8 worked OK in some local testing. */ - if (numblocks > 8) + if (numblocks > 8 && + file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS) { - int ret; + int ret = 0; - ret = FileFallocate(v->mdfd_vfd, - seekpos, (off_t) BLCKSZ * numblocks, - WAIT_EVENT_DATA_FILE_EXTEND); +#ifdef HAVE_POSIX_FALLOCATE + if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE) + { + ret = FileFallocate(v->mdfd_vfd, + seekpos, (off_t) BLCKSZ * numblocks, + WAIT_EVENT_DATA_FILE_EXTEND); + } + else +#endif + { + elog(ERROR, "unsupported file_extend_method: %d", + file_extend_method); + } if (ret != 0) { ereport(ERROR, @@ -997,6 +1008,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * functions for this relation or handled interrupts in between. This makes * sure we have opened all active segments, so that truncate loop will get * them all! + * + * If nblocks > curnblk, the request is ignored when we are InRecovery, + * otherwise, an error is raised. */ void mdtruncate(SMgrRelation reln, ForkNumber forknum, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/smgr.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/smgr.c index 906b4f07eb8..be1aff2e8df 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/smgr.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/storage/smgr/smgr.c @@ -713,11 +713,20 @@ smgrtruncate2(SMgrRelation reln, ForkNumber *forknum, int nforks, /* * We might as well update the local smgr_cached_nblocks values. The * smgr cache inval message that this function sent will cause other - * backends to invalidate their copies of smgr_fsm_nblocks and - * smgr_vm_nblocks, and these ones too at the next command boundary. - * But these ensure they aren't outright wrong until then. + * backends to invalidate their copies of smgr_cached_nblocks, and + * these ones too at the next command boundary. But ensure they aren't + * outright wrong until then. + * + * We can have nblocks > old_nblocks when a relation was truncated + * multiple times, a replica applied all the truncations, and later + * restarts from a restartpoint located before the truncations. The + * relation on disk will be the size of the last truncate. When + * replaying the first truncate, we will have nblocks > current size. + * In such cases, smgr_truncate does nothing, so set the cached size + * to the old size rather than the requested size. */ - reln->smgr_cached_nblocks[forknum[i]] = nblocks[i]; + reln->smgr_cached_nblocks[forknum[i]] = + nblocks[i] > old_nblocks[i] ? old_nblocks[i] : nblocks[i]; } } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tcop/utility.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tcop/utility.c index 97df8227ca1..4b0ed7f4993 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tcop/utility.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tcop/utility.c @@ -1884,7 +1884,7 @@ ProcessUtilitySlow(ParseState *pstate, if (!IsA(rel, RangeVar)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only a single relation is allowed in CREATE STATISTICS"))); + errmsg("CREATE STATISTICS only supports relation names in the FROM clause"))); /* * CREATE STATISTICS will influence future execution plans @@ -1902,7 +1902,7 @@ ProcessUtilitySlow(ParseState *pstate, /* Run parse analysis ... */ stmt = transformStatsStmt(relid, stmt, queryString); - address = CreateStatistics(stmt); + address = CreateStatistics(stmt, true); } break; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_synonym.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_synonym.c index c7cf7c04b60..fe7ca30ce97 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_synonym.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_synonym.c @@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags) char *lastchar; /* Skip leading spaces */ - while (*in && t_isspace(in)) - in += pg_mblen(in); + while (*in && t_isspace_cstr(in)) + in += pg_mblen_cstr(in); /* Return NULL on empty lines */ if (*in == '\0') @@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags) lastchar = start = in; /* Find end of word */ - while (*in && !t_isspace(in)) + while (*in && !t_isspace_cstr(in)) { lastchar = in; - in += pg_mblen(in); + in += pg_mblen_cstr(in); } if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_thesaurus.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_thesaurus.c index 5f38354188f..a18d1851ddd 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_thesaurus.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/dict_thesaurus.c @@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d) ptr = line; /* is it a comment? */ - while (*ptr && t_isspace(ptr)) - ptr += pg_mblen(ptr); + while (*ptr && t_isspace_cstr(ptr)) + ptr += pg_mblen_cstr(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) @@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } - else if (!t_isspace(ptr)) + else if (!t_isspace_cstr(ptr)) { beginwrd = ptr; state = TR_INLEX; @@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } - else if (t_isspace(ptr)) + else if (t_isspace_cstr(ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; @@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d) { useasis = true; state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); + beginwrd = ptr + pg_mblen_cstr(ptr); } else if (t_iseq(ptr, '\\')) { useasis = false; state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); + beginwrd = ptr + pg_mblen_cstr(ptr); } - else if (!t_isspace(ptr)) + else if (!t_isspace_cstr(ptr)) { useasis = false; beginwrd = ptr; @@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) } else if (state == TR_INSUBS) { - if (t_isspace(ptr)) + if (t_isspace_cstr(ptr)) { if (ptr == beginwrd) ereport(ERROR, @@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) else elog(ERROR, "unrecognized thesaurus state: %d", state); - ptr += pg_mblen(ptr); + ptr += pg_mblen_cstr(ptr); } if (state == TR_INSUBS) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/regis.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/regis.c index 0c74c6d0c1c..ee5bc378350 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/regis.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/regis.c @@ -37,7 +37,7 @@ RS_isRegis(const char *str) { if (state == RS_IN_WAIT) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) /* okay */ ; else if (t_iseq(c, '[')) state = RS_IN_ONEOF; @@ -48,14 +48,14 @@ RS_isRegis(const char *str) { if (t_iseq(c, '^')) state = RS_IN_NONEOF; - else if (t_isalpha(c)) + else if (t_isalpha_cstr(c)) state = RS_IN_ONEOF_IN; else return false; } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) /* okay */ ; else if (t_iseq(c, ']')) state = RS_IN_WAIT; @@ -64,7 +64,7 @@ RS_isRegis(const char *str) } else elog(ERROR, "internal error in RS_isRegis: state %d", state); - c += pg_mblen(c); + c += pg_mblen_cstr(c); } return (state == RS_IN_WAIT); @@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str) { if (state == RS_IN_WAIT) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); - COPYCHAR(ptr->data, c); ptr->type = RSF_ONEOF; - ptr->len = pg_mblen(c); + ptr->len = ts_copychar_cstr(ptr->data, c); } else if (t_iseq(c, '[')) { @@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str) ptr->type = RSF_NONEOF; state = RS_IN_NONEOF; } - else if (t_isalpha(c)) + else if (t_isalpha_cstr(c)) { - COPYCHAR(ptr->data, c); - ptr->len = pg_mblen(c); + ptr->len = ts_copychar_cstr(ptr->data, c); state = RS_IN_ONEOF_IN; } else /* shouldn't get here */ @@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str) } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { - if (t_isalpha(c)) - { - COPYCHAR(ptr->data + ptr->len, c); - ptr->len += pg_mblen(c); - } + if (t_isalpha_cstr(c)) + ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c); else if (t_iseq(c, ']')) state = RS_IN_WAIT; else /* shouldn't get here */ @@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str) } else elog(ERROR, "internal error in RS_compile: state %d", state); - c += pg_mblen(c); + c += pg_mblen_cstr(c); } if (state != RS_IN_WAIT) /* shouldn't get here */ @@ -187,10 +182,10 @@ mb_strchr(char *str, char *c) char *ptr = str; bool res = false; - clen = pg_mblen(c); + clen = pg_mblen_cstr(c); while (*ptr && !res) { - plen = pg_mblen(ptr); + plen = pg_mblen_cstr(ptr); if (plen == clen) { i = plen; @@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str) while (*c) { len++; - c += pg_mblen(c); + c += pg_mblen_cstr(c); } if (len < r->nchar) @@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str) { len -= r->nchar; while (len-- > 0) - c += pg_mblen(c); + c += pg_mblen_cstr(c); } @@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str) elog(ERROR, "unrecognized regis node type: %d", ptr->type); } ptr = ptr->next; - c += pg_mblen(c); + c += pg_mblen_cstr(c); } return true; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/spell.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/spell.c index 932c6b460b0..436e37dca4d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/spell.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/spell.c @@ -232,7 +232,7 @@ findchar(char *str, int c) { if (t_iseq(str, c)) return str; - str += pg_mblen(str); + str += pg_mblen_cstr(str); } return NULL; @@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2) { if (t_iseq(str, c1) || t_iseq(str, c2)) return str; - str += pg_mblen(str); + str += pg_mblen_cstr(str); } return NULL; @@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) char *next, *sbuf = *sflagset; int maxstep; + int clen; bool stop = false; bool met_comma = false; @@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) { case FM_LONG: case FM_CHAR: - COPYCHAR(sflag, *sflagset); - sflag += pg_mblen(*sflagset); + clen = ts_copychar_cstr(sflag, *sflagset); + sflag += clen; /* Go to start of the next flag */ - *sflagset += pg_mblen(*sflagset); + *sflagset += clen; /* Check if we get all characters of flag */ maxstep--; @@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset = next; while (**sflagset) { - if (t_isdigit(*sflagset)) + if (t_isdigit_cstr(*sflagset)) { if (!met_comma) ereport(ERROR, @@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset))); met_comma = true; } - else if (!t_isspace(*sflagset)) + else if (!t_isspace_cstr(*sflagset)) { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset))); } - *sflagset += pg_mblen(*sflagset); + *sflagset += pg_mblen_cstr(*sflagset); } stop = true; break; @@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) while (*s) { /* we allow only single encoded flags for faster works */ - if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) + if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s)) s++; else { @@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename) s = line; while (*s) { - if (t_isspace(s)) + if (t_isspace_cstr(s)) { *s = '\0'; break; } - s += pg_mblen(s); + s += pg_mblen_cstr(s); } pstr = lowerstr_ctx(Conf, line); @@ -796,17 +797,17 @@ get_nextfield(char **str, char *next) while (**str) { + int clen = pg_mblen_cstr(*str); + if (state == PAE_WAIT_MASK) { if (t_iseq(*str, '#')) return false; - else if (!t_isspace(*str)) + else if (!t_isspace_cstr(*str)) { - int clen = pg_mblen(*str); - if (clen < avail) { - COPYCHAR(next, *str); + ts_copychar_with_len(next, *str, clen); next += clen; avail -= clen; } @@ -815,24 +816,22 @@ get_nextfield(char **str, char *next) } else /* state == PAE_INMASK */ { - if (t_isspace(*str)) + if (t_isspace_cstr(*str)) { *next = '\0'; return true; } else { - int clen = pg_mblen(*str); - if (clen < avail) { - COPYCHAR(next, *str); + ts_copychar_with_len(next, *str, clen); next += clen; avail -= clen; } } } - *str += pg_mblen(*str); + *str += clen; } *next = '\0'; @@ -909,27 +908,35 @@ parse_ooaffentry(char *str, char *type, char *flag, char *find, * * An .affix file entry has the following format: * <mask> > [-<find>,]<replace> + * + * Output buffers mask, find, repl must be of length BUFSIZ; + * we truncate the input to fit. */ static bool -parse_affentry(char *str, char *mask, char *find, char *repl) +parse_affentry(const char *str, char *mask, char *find, char *repl) { int state = PAE_WAIT_MASK; char *pmask = mask, *pfind = find, *prepl = repl; + char *emask = mask + BUFSIZ; + char *efind = find + BUFSIZ; + char *erepl = repl + BUFSIZ; *mask = *find = *repl = '\0'; while (*str) { + int clen = pg_mblen_cstr(str); + if (state == PAE_WAIT_MASK) { if (t_iseq(str, '#')) return false; - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); + if (pmask < emask - clen) + pmask += ts_copychar_with_len(pmask, str, clen); state = PAE_INMASK; } } @@ -940,10 +947,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pmask = '\0'; state = PAE_WAIT_FIND; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); + if (pmask < emask - clen) + pmask += ts_copychar_with_len(pmask, str, clen); } } else if (state == PAE_WAIT_FIND) @@ -952,13 +959,13 @@ parse_affentry(char *str, char *mask, char *find, char *repl) { state = PAE_INFIND; } - else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ ) + else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ ) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + if (prepl < erepl - clen) + prepl += ts_copychar_with_len(prepl, str, clen); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -970,12 +977,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pfind = '\0'; state = PAE_WAIT_REPL; } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(pfind, str); - pfind += pg_mblen(str); + if (pfind < efind - clen) + pfind += ts_copychar_with_len(pfind, str, clen); } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -986,13 +993,13 @@ parse_affentry(char *str, char *mask, char *find, char *repl) { break; /* void repl */ } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + if (prepl < erepl - clen) + prepl += ts_copychar_with_len(prepl, str, clen); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1004,12 +1011,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *prepl = '\0'; break; } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + if (prepl < erepl - clen) + prepl += ts_copychar_with_len(prepl, str, clen); } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1017,7 +1024,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) else elog(ERROR, "unrecognized state in parse_affentry: %d", state); - str += pg_mblen(str); + str += clen; } *pmask = *pfind = *prepl = '\0'; @@ -1065,15 +1072,14 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, * val: affix parameter. */ static void -addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) +addCompoundAffixFlagValue(IspellDict *Conf, const char *s, uint32 val) { CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; char *sflag; - int clen; - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (!*s) ereport(ERROR, @@ -1082,11 +1088,13 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) /* Get flag without \n */ sflag = sbuf; - while (*s && !t_isspace(s) && *s != '\n') + while (*s && !t_isspace_cstr(s) && *s != '\n') { - clen = pg_mblen(s); - COPYCHAR(sflag, s); - sflag += clen; + int clen = pg_mblen_cstr(s); + + /* Truncate the input to fit in BUFSIZ */ + if (sflag < sbuf + BUFSIZ - clen) + sflag += ts_copychar_with_len(sflag, s, clen); s += clen; } *sflag = '\0'; @@ -1228,7 +1236,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) while ((recoded = tsearch_readline(&trst)) != NULL) { - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#')) { pfree(recoded); continue; @@ -1265,8 +1273,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { char *s = recoded + strlen("FLAG"); - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (*s) { @@ -1301,7 +1309,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { int fields_read; - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#')) goto nextline; fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask); @@ -1464,12 +1472,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = findchar2(recoded, 'l', 'L'); if (s) { - while (*s && !t_isspace(s)) - s += pg_mblen(s); - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && !t_isspace_cstr(s)) + s += pg_mblen_cstr(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); - if (*s && pg_mblen(s) == 1) + if (*s && pg_mblen_cstr(s) == 1) { addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); Conf->usecompound = true; @@ -1497,8 +1505,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = recoded + 4; /* we need non-lowercased string */ flagflags = 0; - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (*s == '*') { @@ -1519,14 +1527,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename) * be followed by EOL, whitespace, or ':'. Otherwise this is a * new-format flag command. */ - if (*s && pg_mblen(s) == 1) + if (*s && pg_mblen_cstr(s) == 1) { - COPYCHAR(flag, s); + flag[0] = *s++; flag[1] = '\0'; - s++; if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' || - t_isspace(s)) + t_isspace_cstr(s)) { oldformat = true; goto nextline; @@ -1750,7 +1757,7 @@ NISortDictionary(IspellDict *Conf) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", Conf->Spell[i]->p.flag))); - if (*end != '\0' && !t_isdigit(end) && !t_isspace(end)) + if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", @@ -2067,9 +2074,32 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) return NULL; } +/* + * Checks to see if affix applies to word, transforms word if so. + * The transformation consists of replacing Affix->replen leading or + * trailing bytes with the Affix->find string. + * + * word: input word + * len: length of input word + * Affix: affix to consider + * flagflags: context flags showing whether we are handling a compound word + * newword: output buffer (MUST be of length 2 * MAXNORMLEN) + * baselen: input/output argument + * + * If baselen isn't NULL, then *baselen is used to return the length of + * the non-changed part of the word when applying a suffix, and is used + * to detect whether the input contained only a prefix and suffix when + * later applying a prefix. + * + * Returns newword on success, or NULL if the affix can't be applied. + * On success, the modified word is stored into newword. + */ static char * CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) { + size_t keeplen, + findlen; + /* * Check compound allow flags */ @@ -2103,14 +2133,26 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww } /* + * Protect against output buffer overrun (len < Affix->replen would be + * caller error, but check anyway) + */ + Assert(len == strlen(word)); + if (len < Affix->replen) + return NULL; + keeplen = len - Affix->replen; /* how much of word we will keep */ + findlen = strlen(Affix->find); + if (keeplen + findlen >= 2 * MAXNORMLEN) + return NULL; + + /* * make replace pattern of affix */ if (Affix->type == FF_SUFFIX) { - strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); + memcpy(newword, word, keeplen); + strcpy(newword + keeplen, Affix->find); if (baselen) /* store length of non-changed part of word */ - *baselen = len - Affix->replen; + *baselen = keeplen; } else { @@ -2118,10 +2160,10 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww * if prefix is an all non-changed part's length then all word * contains only prefix and suffix, so out */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + if (baselen && *baselen + findlen <= Affix->replen) return NULL; - strcpy(newword, Affix->find); - strcat(newword, word + Affix->replen); + memcpy(newword, Affix->find, findlen); + strcpy(newword + findlen, word + Affix->replen); } /* @@ -2315,7 +2357,7 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) } else { - char *affbegin; + const char *affbegin; while ((*ptr)->affix) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_locale.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_locale.c index f1150d30b71..4a01b65d577 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_locale.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_locale.c @@ -33,81 +33,44 @@ static void tsearch_readline_callback(void *arg); */ #define WC_BUF_LEN 3 -int -t_isdigit(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isdigit(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswdigit((wint_t) character[0]); -} - -int -t_isspace(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isspace(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswspace((wint_t) character[0]); -} - -int -t_isalpha(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalpha(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalpha((wint_t) character[0]); -} - -int -t_isalnum(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalnum(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalnum((wint_t) character[0]); -} - -int -t_isprint(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isprint(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswprint((wint_t) character[0]); +#define GENERATE_T_ISCLASS_DEF(character_class) \ +/* mblen shall be that of the first character */ \ +int \ +t_is##character_class##_with_len(const char *ptr, int mblen) \ +{ \ + int clen = pg_mblen_with_len(ptr, mblen); \ + wchar_t character[WC_BUF_LEN]; \ + pg_locale_t mylocale = 0; /* TODO */ \ + if (clen == 1 || database_ctype_is_c) \ + return is##character_class(TOUCHAR(ptr)); \ + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \ + return isw##character_class((wint_t) character[0]); \ +} \ +\ +/* ptr shall point to a NUL-terminated string */ \ +int \ +t_is##character_class##_cstr(const char *ptr) \ +{ \ + return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \ +} \ +/* ptr shall point to a string with pre-validated encoding */ \ +int \ +t_is##character_class##_unbounded(const char *ptr) \ +{ \ + return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \ +} \ +/* historical name for _unbounded */ \ +int \ +t_is##character_class(const char *ptr) \ +{ \ + return t_is##character_class##_unbounded(ptr); \ } +GENERATE_T_ISCLASS_DEF(alnum) +GENERATE_T_ISCLASS_DEF(alpha) +GENERATE_T_ISCLASS_DEF(digit) +GENERATE_T_ISCLASS_DEF(print) +GENERATE_T_ISCLASS_DEF(space) /* * Set up to read a file using tsearch_readline(). This facility is diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_selfuncs.c index 92afc67a5c4..511ba6be7ff 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_selfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_selfuncs.c @@ -109,12 +109,14 @@ tsmatchsel(PG_FUNCTION_ARGS) * OK, there's a Var and a Const we're dealing with here. We need the * Const to be a TSQuery, else we can't do anything useful. We have to * check this because the Var might be the TSQuery not the TSVector. + * + * Also check that the Var really is a TSVector, in case this estimator is + * mistakenly attached to some other operator. */ - if (((Const *) other)->consttype == TSQUERYOID) + if (((Const *) other)->consttype == TSQUERYOID && + vardata.vartype == TSVECTOROID) { /* tsvector @@ tsquery or the other way around */ - Assert(vardata.vartype == TSVECTOROID); - selec = tsquerysel(&vardata, ((Const *) other)->constvalue); } else diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_utils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_utils.c index 7c4c2a91123..463e5253558 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_utils.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/ts_utils.c @@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) char *pbuf = line; /* Trim trailing space */ - while (*pbuf && !t_isspace(pbuf)) - pbuf += pg_mblen(pbuf); + while (*pbuf && !t_isspace_cstr(pbuf)) + pbuf += pg_mblen_cstr(pbuf); *pbuf = '\0'; /* Skip empty lines */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/wparser_def.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/wparser_def.c index fb80fdd63f2..05d605ade51 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/wparser_def.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/tsearch/wparser_def.c @@ -1727,7 +1727,8 @@ TParserGet(TParser *prs) prs->state->charlen = 0; else prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen : - pg_mblen(prs->str + prs->state->posbyte); + pg_mblen_range(prs->str + prs->state->posbyte, + prs->str + prs->lenstr); Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr); Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null); @@ -2626,6 +2627,9 @@ prsd_headline(PG_FUNCTION_ARGS) int max_fragments = 0; bool highlightall = false; ListCell *l; + size_t startsellen; + size_t stopsellen; + size_t fragdelimlen; /* Extract configuration option values */ prs->startsel = NULL; @@ -2715,9 +2719,24 @@ prsd_headline(PG_FUNCTION_ARGS) prs->fragdelim = pstrdup(" ... "); /* Caller will need these lengths, too */ - prs->startsellen = strlen(prs->startsel); - prs->stopsellen = strlen(prs->stopsel); - prs->fragdelimlen = strlen(prs->fragdelim); + startsellen = strlen(prs->startsel); + stopsellen = strlen(prs->stopsel); + fragdelimlen = strlen(prs->fragdelim); + if (startsellen > PG_INT16_MAX) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("value for \"%s\" is too long", "StartSel"))); + if (stopsellen > PG_INT16_MAX) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("value for \"%s\" is too long", "StopSel"))); + if (fragdelimlen > PG_INT16_MAX) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("value for \"%s\" is too long", "FragmentDelimiter"))); + prs->startsellen = startsellen; + prs->stopsellen = stopsellen; + prs->fragdelimlen = fragdelimlen; PG_RETURN_POINTER(prs); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat.c index 44e75579547..0eae608773c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat.c @@ -1652,6 +1652,16 @@ pgstat_read_statsfile(void) header = pgstat_init_entry(key.kind, p); dshash_release_lock(pgStatLocal.shared_hash, p); + if (header == NULL) + { + /* + * It would be tempting to switch this ERROR to a + * WARNING, but it would mean that all the statistics + * are discarded when the environment fails on OOM. + */ + elog(ERROR, "could not allocate entry %d/%u/%u", + key.kind, key.dboid, key.objoid); + } if (!read_chunk(fpin, pgstat_get_entry_data(key.kind, header), diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_database.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_database.c index 616d1f06336..39fff816c4f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_database.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_database.c @@ -197,7 +197,7 @@ pgstat_report_connect(Oid dboid) pgLastSessionReportTime = MyStartTimestamp; - dbentry = pgstat_prep_database_pending(MyDatabaseId); + dbentry = pgstat_prep_database_pending(dboid); dbentry->sessions++; } @@ -212,7 +212,7 @@ pgstat_report_disconnect(Oid dboid) if (!pgstat_should_report_connstat()) return; - dbentry = pgstat_prep_database_pending(MyDatabaseId); + dbentry = pgstat_prep_database_pending(dboid); switch (pgStatSessionEndCause) { @@ -356,7 +356,7 @@ pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts) PgStat_EntryRef *dbref; PgStatShared_Database *dbentry; - dbref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, MyDatabaseId, InvalidOid, + dbref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, dboid, InvalidOid, false); dbentry = (PgStatShared_Database *) dbref->shared_stats; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_shmem.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_shmem.c index 006cdb841fd..18b2523e925 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_shmem.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/activity/pgstat_shmem.c @@ -262,6 +262,13 @@ pgstat_detach_shmem(void) * ------------------------------------------------------------ */ +/* + * Initialize entry newly-created. + * + * Returns NULL in the event of an allocation failure, so as callers can + * take cleanup actions as the entry initialized is already inserted in the + * shared hashtable. + */ PgStatShared_Common * pgstat_init_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent) @@ -284,7 +291,12 @@ pgstat_init_entry(PgStat_Kind kind, pg_atomic_init_u32(&shhashent->generation, 0); shhashent->dropped = false; - chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size); + chunk = dsa_allocate_extended(pgStatLocal.dsa, + pgstat_get_kind_info(kind)->shared_size, + DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM); + if (chunk == InvalidDsaPointer) + return NULL; + shheader = dsa_get_address(pgStatLocal.dsa, chunk); shheader->magic = 0xdeadbeef; @@ -482,6 +494,20 @@ pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create, if (!shfound) { shheader = pgstat_init_entry(kind, shhashent); + if (shheader == NULL) + { + /* + * Failed the allocation of a new entry, so clean up the + * shared hashtable before giving up. + */ + dshash_delete_entry(pgStatLocal.shared_hash, shhashent); + + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating entry %d/%u/%u.", + key.kind, key.dboid, key.objoid))); + } pgstat_acquire_entry_ref(entry_ref, shhashent, shheader); if (created_entry != NULL) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c index 5c4fdcfba46..69e9cf13e76 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/array_userfuncs.c @@ -21,6 +21,7 @@ #include "utils/datum.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" #include "utils/typcache.h" /* @@ -969,10 +970,11 @@ array_agg_array_combine(PG_FUNCTION_ARGS) } /* We only need to combine the two states if state2 has any items */ - else if (state2->nitems > 0) + if (state2->nitems > 0) { MemoryContext oldContext; - int reqsize = state1->nbytes + state2->nbytes; + int reqsize; + int newnitems; int i; /* @@ -995,6 +997,17 @@ array_agg_array_combine(PG_FUNCTION_ARGS) errmsg("cannot accumulate arrays of different dimensionality"))); } + /* Types should match already. */ + Assert(state1->array_type == state2->array_type); + Assert(state1->element_type == state2->element_type); + + /* Calculate new sizes, guarding against overflow. */ + if (pg_add_s32_overflow(state1->nbytes, state2->nbytes, &reqsize) || + pg_add_s32_overflow(state1->nitems, state2->nitems, &newnitems)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%zu)", + MaxArraySize))); oldContext = MemoryContextSwitchTo(state1->mcontext); @@ -1009,17 +1022,16 @@ array_agg_array_combine(PG_FUNCTION_ARGS) state1->data = (char *) repalloc(state1->data, state1->abytes); } - if (state2->nullbitmap) + /* Combine the null bitmaps, if present. */ + if (state1->nullbitmap || state2->nullbitmap) { - int newnitems = state1->nitems + state2->nitems; - if (state1->nullbitmap == NULL) { /* * First input with nulls; we must retrospectively handle any * previous inputs by marking all their items non-null. */ - state1->aitems = pg_nextpower2_32(Max(256, newnitems + 1)); + state1->aitems = pg_nextpower2_32(Max(256, newnitems)); state1->nullbitmap = (bits8 *) palloc((state1->aitems + 7) / 8); array_bitmap_copy(state1->nullbitmap, 0, NULL, 0, @@ -1027,17 +1039,17 @@ array_agg_array_combine(PG_FUNCTION_ARGS) } else if (newnitems > state1->aitems) { - int newaitems = state1->aitems + state2->aitems; - - state1->aitems = pg_nextpower2_32(newaitems); + state1->aitems = pg_nextpower2_32(newnitems); state1->nullbitmap = (bits8 *) repalloc(state1->nullbitmap, (state1->aitems + 7) / 8); } + /* This will do the right thing if state2->nullbitmap is NULL: */ array_bitmap_copy(state1->nullbitmap, state1->nitems, state2->nullbitmap, 0, state2->nitems); } + /* Finally, combine the data and adjust sizes. */ memcpy(state1->data + state1->nbytes, state2->data, state2->nbytes); state1->nbytes += state2->nbytes; state1->nitems += state2->nitems; @@ -1045,9 +1057,6 @@ array_agg_array_combine(PG_FUNCTION_ARGS) state1->dims[0] += state2->dims[0]; /* remaining dims already match, per test above */ - Assert(state1->array_type == state2->array_type); - Assert(state1->element_type == state2->element_type); - MemoryContextSwitchTo(oldContext); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c index 934d4cd2096..372ae4cb6c1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/arrayfuncs.c @@ -3746,6 +3746,12 @@ deconstruct_array_builtin(ArrayType *array, elmalign = TYPALIGN_SHORT; break; + case INT4OID: + elmlen = sizeof(int32); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + case OIDOID: elmlen = sizeof(Oid); elmbyval = true; @@ -5573,6 +5579,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate, ndatabytes; char *data; int i; + int newnitems; /* * We disallow accumulating null subarrays. Another plausible definition @@ -5602,6 +5609,14 @@ accumArrayResultArr(ArrayBuildStateArr *astate, nitems = ArrayGetNItems(ndims, dims); ndatabytes = ARR_SIZE(arg) - ARR_DATA_OFFSET(arg); + /* Check that the array doesn't grow too large */ + newnitems = astate->nitems + nitems; + if (newnitems > MaxArraySize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array size exceeds the maximum allowed (%zu)", + MaxArraySize))); + if (astate->ndims == 0) { /* First input; check/save the dimensionality info */ @@ -5667,8 +5682,6 @@ accumArrayResultArr(ArrayBuildStateArr *astate, /* Deal with null bitmap if needed */ if (astate->nullbitmap || ARR_HASNULL(arg)) { - int newnitems = astate->nitems + nitems; - if (astate->nullbitmap == NULL) { /* @@ -5692,7 +5705,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate, nitems); } - astate->nitems += nitems; + astate->nitems = newnitems; astate->dims[0] += 1; MemoryContextSwitchTo(oldcontext); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c index 251dd23ca81..b0b20e60272 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/datum.c @@ -259,8 +259,13 @@ datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) /*------------------------------------------------------------------------- * datum_image_eq * - * Compares two datums for identical contents, based on byte images. Return - * true if the two datums are equal, false otherwise. + * Compares two datums for identical contents when coerced to a signed integer + * of typLen bytes. Return true if the two datums are equal, false otherwise. + * + * The coercion is required as we're not always careful to use the correct + * PG_RETURN_* macro. If we didn't do this, a Datum that's been formed and + * deformed into a tuple may not have the same signed representation as the + * other datum value. *------------------------------------------------------------------------- */ bool @@ -272,7 +277,21 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) if (typByVal) { - result = (value1 == value2); + switch (typLen) + { + case sizeof(char): + result = (DatumGetChar(value1) == DatumGetChar(value2)); + break; + case sizeof(int16): + result = (DatumGetInt16(value1) == DatumGetInt16(value2)); + break; + case sizeof(int32): + result = (DatumGetInt32(value1) == DatumGetInt32(value2)); + break; + default: + result = (value1 == value2); + break; + } } else if (typLen > 0) { @@ -329,10 +348,11 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) /*------------------------------------------------------------------------- * datum_image_hash * - * Generate a hash value based on the binary representation of 'value'. Most - * use cases will want to use the hash function specific to the Datum's type, - * however, some corner cases require generating a hash value based on the - * actual bits rather than the logical value. + * Generate a hash value based on the binary representation of 'value' when + * represented as a signed integer of typLen bytes. Most use cases will want + * to use the hash function specific to the Datum's type, however, some corner + * cases require generating a hash value based on the actual bits rather than + * the logical value. *------------------------------------------------------------------------- */ uint32 @@ -342,7 +362,23 @@ datum_image_hash(Datum value, bool typByVal, int typLen) uint32 result; if (typByVal) + { + switch (typLen) + { + case sizeof(char): + value = CharGetDatum(DatumGetChar(value)); + break; + case sizeof(int16): + value = Int16GetDatum(DatumGetInt16(value)); + break; + case sizeof(int32): + value = Int32GetDatum(DatumGetInt32(value)); + break; + /* Nothing needs done for 64-bit types */ + } + result = hash_bytes((unsigned char *) &value, sizeof(Datum)); + } else if (typLen > 0) result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); else if (typLen == -1) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c index 066cc44381a..8a5af5d1717 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/dbsize.c @@ -927,6 +927,9 @@ pg_relation_filenode(PG_FUNCTION_ARGS) * * We don't fail but return NULL if we cannot find a mapping. * + * Temporary relations are not detected, returning NULL (see + * RelidByRelfilenumber() for the reasons). + * * InvalidOid can be passed instead of the current database's default * tablespace. */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c index e5ac3ad23df..a20fbf18c24 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/encode.c @@ -215,7 +215,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) ereturn(escontext, 0, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%.*s\"", - pg_mblen(s), s))); + pg_mblen_range(s, srcend), s))); s++; if (s >= srcend) ereturn(escontext, 0, @@ -225,7 +225,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) ereturn(escontext, 0, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%.*s\"", - pg_mblen(s), s))); + pg_mblen_range(s, srcend), s))); s++; *p++ = (v1 << 4) | v2; } @@ -354,7 +354,7 @@ pg_base64_decode(const char *src, size_t len, char *dst) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence", - pg_mblen(s - 1), s - 1))); + pg_mblen_range(s - 1, srcend), s - 1))); } /* add it to buffer */ buf = (buf << 6) + b; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c index 12402a06379..a9054d11b0c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/format_type.c @@ -444,11 +444,15 @@ oidvectortypes(PG_FUNCTION_ARGS) { oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); char *result; - int numargs = oidArray->dim1; + int numargs; int num; size_t total; size_t left; + /* validate input before fetching dim1 */ + check_valid_oidvector(oidArray); + numargs = oidArray->dim1; + total = 20 * numargs + 1; result = palloc(total); result[0] = '\0'; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c index 1ca53a8c88a..8843cf232ee 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/formatting.c @@ -1001,8 +1001,9 @@ typedef struct NUMProc char *number, /* string with number */ *number_p, /* pointer to current number position */ *inout, /* in / out buffer */ - *inout_p, /* pointer to current inout position */ - *last_relevant, /* last relevant number after decimal point */ + *inout_p; /* pointer to current inout position */ + + const char *last_relevant, /* last relevant number after decimal point */ *L_negative_sign, /* Locale */ *L_positive_sign, @@ -1067,6 +1068,7 @@ static void NUM_prepare_locale(NUMProc *Np); static char *get_last_relevant_decnum(char *num); static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); static void NUM_numpart_to_char(NUMProc *Np, int id); +static void NUM_add_locale_symbol(NUMProc *Np, const char *pattern); static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, int input_len, int to_char_out_pre_spaces, int sign, bool is_to_char, Oid collid); @@ -1385,7 +1387,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ereport(ERROR, (errcode(ERRCODE_INVALID_DATETIME_FORMAT), errmsg("invalid datetime format separator: \"%s\"", - pnstrdup(str, pg_mblen(str))))); + pnstrdup(str, pg_mblen_cstr(str))))); if (*str == ' ') n->type = NODE_TYPE_SPACE; @@ -1415,7 +1417,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, /* backslash quotes the next character, if any */ if (*str == '\\' && *(str + 1)) str++; - chlen = pg_mblen(str); + chlen = pg_mblen_cstr(str); n->type = NODE_TYPE_CHAR; memcpy(n->character, str, chlen); n->character[chlen] = '\0'; @@ -1433,7 +1435,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, */ if (*str == '\\' && *(str + 1) == '"') str++; - chlen = pg_mblen(str); + chlen = pg_mblen_cstr(str); if ((flags & DCH_FLAG) && is_separator_char(str)) n->type = NODE_TYPE_SEPARATOR; @@ -2138,8 +2140,8 @@ asc_toupper_z(const char *buff) do { \ if (S_THth(_suf)) \ { \ - if (*(ptr)) (ptr) += pg_mblen(ptr); \ - if (*(ptr)) (ptr) += pg_mblen(ptr); \ + if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \ + if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \ } \ } while (0) @@ -3345,7 +3347,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, * insist that the consumed character match the format's * character. */ - s += pg_mblen(s); + s += pg_mblen_cstr(s); } continue; } @@ -3367,11 +3369,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, if (extra_skip > 0) extra_skip--; else - s += pg_mblen(s); + s += pg_mblen_cstr(s); } else { - int chlen = pg_mblen(s); + int chlen = pg_mblen_cstr(s); /* * Standard mode requires strict match of format characters. @@ -4008,7 +4010,7 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) /* * Allocate workspace for result as C string */ - result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1); + result = palloc(mul_size(fmt_len, DCH_MAX_ITEM_SIZ) + 1); *result = '\0'; if (fmt_len > DCH_CACHE_SIZE) @@ -4019,7 +4021,7 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) */ incache = false; - format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + format = palloc_array(FormatNode, fmt_len + 1); parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, DCH_FLAG, NULL); @@ -4507,7 +4509,7 @@ do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, * Allocate new memory if format picture is bigger than static * cache and do not use cache (call parser always) */ - format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + format = palloc_array(FormatNode, fmt_len + 1); parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, DCH_FLAG | (std ? STD_FLAG : 0), NULL); @@ -4959,7 +4961,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) * Allocate new memory if format picture is bigger than static cache * and do not use cache (call parser always) */ - format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode)); + format = palloc_array(FormatNode, len + 1); *shouldFree = true; @@ -5422,11 +5424,9 @@ NUM_numpart_to_char(NUMProc *Np, int id) { if (Np->Num->lsign == NUM_LSIGN_PRE) { - if (Np->sign == '-') - strcpy(Np->inout_p, Np->L_negative_sign); - else - strcpy(Np->inout_p, Np->L_positive_sign); - Np->inout_p += strlen(Np->inout_p); + NUM_add_locale_symbol(Np, (Np->sign == '-') ? + Np->L_negative_sign : + Np->L_positive_sign); Np->sign_wrote = true; } } @@ -5491,8 +5491,7 @@ NUM_numpart_to_char(NUMProc *Np, int id) { if (!Np->last_relevant || *Np->last_relevant != '.') { - strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ - Np->inout_p += strlen(Np->inout_p); + NUM_add_locale_symbol(Np, Np->decimal); /* Write DEC/D */ } /* @@ -5501,8 +5500,7 @@ NUM_numpart_to_char(NUMProc *Np, int id) else if (IS_FILLMODE(Np->Num) && Np->last_relevant && *Np->last_relevant == '.') { - strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */ - Np->inout_p += strlen(Np->inout_p); + NUM_add_locale_symbol(Np, Np->decimal); /* Write DEC/D */ } } else @@ -5560,11 +5558,9 @@ NUM_numpart_to_char(NUMProc *Np, int id) } else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST) { - if (Np->sign == '-') - strcpy(Np->inout_p, Np->L_negative_sign); - else - strcpy(Np->inout_p, Np->L_positive_sign); - Np->inout_p += strlen(Np->inout_p); + NUM_add_locale_symbol(Np, (Np->sign == '-') ? + Np->L_negative_sign : + Np->L_positive_sign); } } } @@ -5573,18 +5569,37 @@ NUM_numpart_to_char(NUMProc *Np, int id) } /* + * Append locale-specific symbol to Np->inout. + * Note we don't null-terminate the output + */ +static void +NUM_add_locale_symbol(NUMProc *Np, const char *pattern) +{ + size_t pattern_len = strlen(pattern); + + /* Truncate symbol if it's potentially too long */ + if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ)) + pattern_len = pg_mbcliplen(pattern, pattern_len, + NUM_MAX_ITEM_SIZ); + memcpy(Np->inout_p, pattern, pattern_len); + Np->inout_p += pattern_len; +} + +/* * Skip over "n" input characters, but only if they aren't numeric data */ static void NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) { + const char *end = Np->inout + input_len; + while (n-- > 0) { if (OVERLOAD_TEST) break; /* end of input */ if (strchr("0123456789.,+-", *Np->inout_p) != NULL) break; /* it's a data character */ - Np->inout_p += pg_mblen(Np->inout_p); + Np->inout_p += pg_mblen_range(Np->inout_p, end); } } @@ -5840,6 +5855,10 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, pattern_len = strlen(pattern); if (Np->is_to_char) { + /* Truncate symbol if it's potentially too long */ + if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ)) + pattern_len = pg_mbcliplen(pattern, pattern_len, + NUM_MAX_ITEM_SIZ); if (!Np->num_in) { if (IS_FILLMODE(Np->Num)) @@ -5847,19 +5866,21 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, else { /* just in case there are MB chars */ - pattern_len = pg_mbstrlen(pattern); + pattern_len = pg_mbstrlen_with_len(pattern, + pattern_len); memset(Np->inout_p, ' ', pattern_len); Np->inout_p += pattern_len - 1; } } else { - strcpy(Np->inout_p, pattern); + memcpy(Np->inout_p, pattern, pattern_len); Np->inout_p += pattern_len - 1; } } else { + /* Here we do not truncate the symbol ... */ if (!Np->num_in) { if (IS_FILLMODE(Np->Num)) @@ -5884,11 +5905,18 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, pattern = Np->L_currency_symbol; if (Np->is_to_char) { - strcpy(Np->inout_p, pattern); - Np->inout_p += strlen(pattern) - 1; + /* Truncate symbol if it's potentially too long */ + pattern_len = strlen(pattern); + if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ)) + pattern_len = pg_mbcliplen(pattern, pattern_len, + NUM_MAX_ITEM_SIZ); + + memcpy(Np->inout_p, pattern, pattern_len); + Np->inout_p += pattern_len - 1; } else { + /* Here we do not truncate the symbol ... */ NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); continue; } @@ -6037,7 +6065,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, } else { - Np->inout_p += pg_mblen(Np->inout_p); + Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len); } continue; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c index 44d1c7ad0c4..f9a08257ac3 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/int.c @@ -135,6 +135,30 @@ buildint2vector(const int16 *int2s, int n) } /* + * validate that an array object meets the restrictions of int2vector + * + * We need this because there are pathways by which a general int2[] array can + * be cast to int2vector, allowing the type's restrictions to be violated. + * All code that receives an int2vector as a SQL parameter should check this. + */ +static void +check_valid_int2vector(const int2vector *int2Array) +{ + /* + * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because + * otherwise the array's layout will not be what calling code expects. We + * needn't be picky about the index lower bound though. Checking elemtype + * is just paranoia. + */ + if (int2Array->ndim != 1 || + int2Array->dataoffset != 0 || + int2Array->elemtype != INT2OID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array is not a valid int2vector"))); +} + +/* * int2vectorin - converts "num num ..." to internal form */ Datum @@ -208,10 +232,14 @@ int2vectorout(PG_FUNCTION_ARGS) { int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0); int num, - nnums = int2Array->dim1; + nnums; char *rp; char *result; + /* validate input before fetching dim1 */ + check_valid_int2vector(int2Array); + nnums = int2Array->dim1; + /* assumes sign, 5 digits, ' ' */ rp = result = (char *) palloc(nnums * 7 + 1); for (num = 0; num < nnums; num++) @@ -272,6 +300,7 @@ int2vectorrecv(PG_FUNCTION_ARGS) Datum int2vectorsend(PG_FUNCTION_ARGS) { + /* We don't do check_valid_int2vector, since array_send won't care */ return array_send(fcinfo); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c index 70cb922e6b7..42b886c621a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonfuncs.c @@ -682,7 +682,7 @@ report_json_context(JsonLexContext *lex) { /* Advance to next multibyte character */ if (IS_HIGHBIT_SET(*context_start)) - context_start += pg_mblen(context_start); + context_start += pg_mblen_range(context_start, context_end); else context_start++; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c index e178a58e8e0..64e51b6615a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/jsonpath_gram.c @@ -2235,7 +2235,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid input syntax for type %s", "jsonpath"), errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.", - pg_mblen(flags->val + i), flags->val + i))); + pg_mblen_range(flags->val + i, flags->val + flags->len), + flags->val + i))); break; } } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c index 33a2f46aab0..776112c695f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like.c @@ -55,20 +55,20 @@ static int Generic_Text_IC_like(text *str, text *pat, Oid collation); *-------------------- */ static inline int -wchareq(const char *p1, const char *p2) +wchareq(const char *p1, int p1len, const char *p2, int p2len) { - int p1_len; + int p1clen; /* Optimization: quickly compare the first byte. */ if (*p1 != *p2) return 0; - p1_len = pg_mblen(p1); - if (pg_mblen(p2) != p1_len) + p1clen = pg_mblen_with_len(p1, p1len); + if (pg_mblen_with_len(p2, p2len) != p1clen) return 0; /* They are the same length */ - while (p1_len--) + while (p1clen--) { if (*p1++ != *p2++) return 0; @@ -107,11 +107,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) #define NextByte(p, plen) ((p)++, (plen)--) /* Set up to compile like_match.c for multibyte characters */ -#define CHAREQ(p1, p2) wchareq((p1), (p2)) +#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len)) #define NextChar(p, plen) \ - do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) + do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0) #define CopyAdvChar(dst, src, srclen) \ - do { int __l = pg_mblen(src); \ + do { int __l = pg_mblen_with_len((src), (srclen)); \ (srclen) -= __l; \ while (__l-- > 0) \ *(dst)++ = *(src)++; \ @@ -123,7 +123,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) #include "like_match.c" /* Set up to compile like_match.c for single-byte characters */ -#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2)) #define NextChar(p, plen) NextByte((p), (plen)) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c index 2f32cdaf020..e586de9efd1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/like_match.c @@ -16,7 +16,7 @@ * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars * - * Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Copyright (c) 1996-2023, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/adt/like_match.c @@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc) errhint("Escape string must be empty or one character."))); e = VARDATA_ANY(esc); + elen = VARSIZE_ANY_EXHDR(esc); /* * If specified escape is '\', just copy the pattern as-is. @@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc) afterescape = false; while (plen > 0) { - if (CHAREQ(p, e) && !afterescape) + if (CHAREQ(p, plen, e, elen) && !afterescape) { *r++ = '\\'; NextChar(p, plen); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c index 9443c2b884a..5f98d856422 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/multirangetypes.c @@ -341,7 +341,7 @@ multirange_recv(PG_FUNCTION_ARGS) Oid mltrngtypoid = PG_GETARG_OID(1); int32 typmod = PG_GETARG_INT32(2); MultirangeIOData *cache; - uint32 range_count; + int32 range_count; RangeType **ranges; MultirangeType *ret; StringInfoData tmpbuf; @@ -349,7 +349,8 @@ multirange_recv(PG_FUNCTION_ARGS) cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_receive); range_count = pq_getmsgint(buf, 4); - ranges = palloc(range_count * sizeof(RangeType *)); + /* palloc_array will enforce a more-or-less-sane range_count value */ + ranges = palloc_array(RangeType *, range_count); initStringInfo(&tmpbuf); for (int i = 0; i < range_count; i++) @@ -484,8 +485,9 @@ multirange_canonicalize(TypeCacheEntry *rangetyp, int32 input_range_count, int32 output_range_count = 0; /* Sort the ranges so we can find the ones that overlap/meet. */ - qsort_arg(ranges, input_range_count, sizeof(RangeType *), range_compare, - rangetyp); + if (ranges != NULL) + qsort_arg(ranges, input_range_count, sizeof(RangeType *), + range_compare, rangetyp); /* Now merge where possible: */ for (i = 0; i < input_range_count; i++) @@ -835,7 +837,7 @@ multirange_deserialize(TypeCacheEntry *rangetyp, { int i; - *ranges = palloc(*range_count * sizeof(RangeType *)); + *ranges = palloc_array(RangeType *, *range_count); for (i = 0; i < *range_count; i++) (*ranges)[i] = multirange_get_range(rangetyp, multirange, i); } @@ -999,7 +1001,7 @@ multirange_constructor2(PG_FUNCTION_ARGS) deconstruct_array(rangeArray, rngtypid, rangetyp->typlen, rangetyp->typbyval, rangetyp->typalign, &elements, &nulls, &range_count); - ranges = palloc0(range_count * sizeof(RangeType *)); + ranges = palloc_array(RangeType *, range_count); for (i = 0; i < range_count; i++) { if (nulls[i]) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c index 315985215c3..141d26332fa 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/network_selfuncs.c @@ -43,9 +43,9 @@ /* Maximum number of items to consider in join selectivity calculations */ #define MAX_CONSIDERED_ELEMS 1024 -static Selectivity networkjoinsel_inner(Oid operator, +static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2); -static Selectivity networkjoinsel_semi(Oid operator, +static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2); static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues); static Selectivity inet_hist_value_sel(Datum *values, int nvalues, @@ -82,6 +82,7 @@ networksel(PG_FUNCTION_ARGS) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + int opr_codenum; VariableStatData vardata; Node *other; bool varonleft; @@ -96,6 +97,14 @@ networksel(PG_FUNCTION_ARGS) FmgrInfo proc; /* + * Before all else, verify that the operator is one of the ones supported + * by this function, which in turn proves that the input datatypes are + * what we expect. Otherwise, attaching this selectivity function to some + * unexpected operator could cause trouble. + */ + opr_codenum = inet_opr_codenum(operator); + + /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ @@ -150,13 +159,12 @@ networksel(PG_FUNCTION_ARGS) STATISTIC_KIND_HISTOGRAM, InvalidOid, ATTSTATSSLOT_VALUES)) { - int opr_codenum = inet_opr_codenum(operator); + int h_codenum; /* Commute if needed, so we can consider histogram to be on the left */ - if (!varonleft) - opr_codenum = -opr_codenum; + h_codenum = varonleft ? opr_codenum : -opr_codenum; non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues, - constvalue, opr_codenum); + constvalue, h_codenum); free_attstatsslot(&hslot); } @@ -203,10 +211,19 @@ networkjoinsel(PG_FUNCTION_ARGS) #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; + int opr_codenum; VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; + /* + * Before all else, verify that the operator is one of the ones supported + * by this function, which in turn proves that the input datatypes are + * what we expect. Otherwise, attaching this selectivity function to some + * unexpected operator could cause trouble. + */ + opr_codenum = inet_opr_codenum(operator); + get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); @@ -220,15 +237,18 @@ networkjoinsel(PG_FUNCTION_ARGS) * Selectivity for left/full join is not exactly the same as inner * join, but we neglect the difference, as eqjoinsel does. */ - selec = networkjoinsel_inner(operator, &vardata1, &vardata2); + selec = networkjoinsel_inner(operator, opr_codenum, + &vardata1, &vardata2); break; case JOIN_SEMI: case JOIN_ANTI: /* Here, it's important that we pass the outer var on the left. */ if (!join_is_reversed) - selec = networkjoinsel_semi(operator, &vardata1, &vardata2); + selec = networkjoinsel_semi(operator, opr_codenum, + &vardata1, &vardata2); else selec = networkjoinsel_semi(get_commutator(operator), + -opr_codenum, &vardata2, &vardata1); break; default: @@ -260,7 +280,7 @@ networkjoinsel(PG_FUNCTION_ARGS) * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner(). */ static Selectivity -networkjoinsel_inner(Oid operator, +networkjoinsel_inner(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; @@ -273,7 +293,6 @@ networkjoinsel_inner(Oid operator, mcv2_exists = false, hist1_exists = false, hist2_exists = false; - int opr_codenum; int mcv1_length = 0, mcv2_length = 0; AttStatsSlot mcv1_slot; @@ -325,8 +344,6 @@ networkjoinsel_inner(Oid operator, memset(&hist2_slot, 0, sizeof(hist2_slot)); } - opr_codenum = inet_opr_codenum(operator); - /* * Calculate selectivity for MCV vs MCV matches. */ @@ -387,7 +404,7 @@ networkjoinsel_inner(Oid operator, * histogram selectivity for semi/anti join cases. */ static Selectivity -networkjoinsel_semi(Oid operator, +networkjoinsel_semi(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; @@ -401,7 +418,6 @@ networkjoinsel_semi(Oid operator, mcv2_exists = false, hist1_exists = false, hist2_exists = false; - int opr_codenum; FmgrInfo proc; int i, mcv1_length = 0, @@ -455,7 +471,6 @@ networkjoinsel_semi(Oid operator, memset(&hist2_slot, 0, sizeof(hist2_slot)); } - opr_codenum = inet_opr_codenum(operator); fmgr_info(get_opcode(operator), &proc); /* Estimate number of input rows represented by RHS histogram. */ @@ -827,6 +842,9 @@ inet_semi_join_sel(Datum lhs_value, /* * Assign useful code numbers for the subnet inclusion/overlap operators * + * This will throw an error if the operator is not one of the ones we + * support in networksel() and networkjoinsel(). + * * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend * on the exact codes assigned here; but many other places in this file * know that they can negate a code to obtain the code for the commutator diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c index 3f7af5b3a06..066511443cf 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oid.c @@ -107,6 +107,30 @@ buildoidvector(const Oid *oids, int n) } /* + * validate that an array object meets the restrictions of oidvector + * + * We need this because there are pathways by which a general oid[] array can + * be cast to oidvector, allowing the type's restrictions to be violated. + * All code that receives an oidvector as a SQL parameter should check this. + */ +void +check_valid_oidvector(const oidvector *oidArray) +{ + /* + * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because + * otherwise the array's layout will not be what calling code expects. We + * needn't be picky about the index lower bound though. Checking elemtype + * is just paranoia. + */ + if (oidArray->ndim != 1 || + oidArray->dataoffset != 0 || + oidArray->elemtype != OIDOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array is not a valid oidvector"))); +} + +/* * oidvectorin - converts "num num ..." to internal form */ Datum @@ -158,10 +182,14 @@ oidvectorout(PG_FUNCTION_ARGS) { oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); int num, - nnums = oidArray->dim1; + nnums; char *rp; char *result; + /* validate input before fetching dim1 */ + check_valid_oidvector(oidArray); + nnums = oidArray->dim1; + /* assumes sign, 10 digits, ' ' */ rp = result = (char *) palloc(nnums * 12 + 1); for (num = 0; num < nnums; num++) @@ -224,6 +252,7 @@ oidvectorrecv(PG_FUNCTION_ARGS) Datum oidvectorsend(PG_FUNCTION_ARGS) { + /* We don't do check_valid_oidvector, since array_send won't care */ return array_send(fcinfo); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c index 3b5b794afb3..8d025011e2f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/oracle_compat.c @@ -153,8 +153,8 @@ lpad(PG_FUNCTION_ARGS) char *ptr1, *ptr2, *ptr2start, - *ptr2end, *ptr_ret; + const char *ptr2end; int m, s1len, s2len; @@ -199,7 +199,7 @@ lpad(PG_FUNCTION_ARGS) while (m--) { - int mlen = pg_mblen(ptr2); + int mlen = pg_mblen_range(ptr2, ptr2end); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; @@ -212,7 +212,7 @@ lpad(PG_FUNCTION_ARGS) while (s1len--) { - int mlen = pg_mblen(ptr1); + int mlen = pg_mblen_unbounded(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; @@ -251,8 +251,8 @@ rpad(PG_FUNCTION_ARGS) char *ptr1, *ptr2, *ptr2start, - *ptr2end, *ptr_ret; + const char *ptr2end; int m, s1len, s2len; @@ -292,11 +292,12 @@ rpad(PG_FUNCTION_ARGS) m = len - s1len; ptr1 = VARDATA_ANY(string1); + ptr_ret = VARDATA(ret); while (s1len--) { - int mlen = pg_mblen(ptr1); + int mlen = pg_mblen_unbounded(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; @@ -308,7 +309,7 @@ rpad(PG_FUNCTION_ARGS) while (m--) { - int mlen = pg_mblen(ptr2); + int mlen = pg_mblen_range(ptr2, ptr2end); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; @@ -393,6 +394,7 @@ dotrim(const char *string, int stringlen, */ const char **stringchars; const char **setchars; + const char *setend; int *stringmblen; int *setmblen; int stringnchars; @@ -400,6 +402,7 @@ dotrim(const char *string, int stringlen, int resultndx; int resultnchars; const char *p; + const char *pend; int len; int mblen; const char *str_pos; @@ -410,10 +413,11 @@ dotrim(const char *string, int stringlen, stringnchars = 0; p = string; len = stringlen; + pend = p + len; while (len > 0) { stringchars[stringnchars] = p; - stringmblen[stringnchars] = mblen = pg_mblen(p); + stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend); stringnchars++; p += mblen; len -= mblen; @@ -424,10 +428,11 @@ dotrim(const char *string, int stringlen, setnchars = 0; p = set; len = setlen; + setend = set + setlen; while (len > 0) { setchars[setnchars] = p; - setmblen[setnchars] = mblen = pg_mblen(p); + setmblen[setnchars] = mblen = pg_mblen_range(p, setend); setnchars++; p += mblen; len -= mblen; @@ -805,6 +810,8 @@ translate(PG_FUNCTION_ARGS) *to_end; char *source, *target; + const char *source_end; + const char *from_end; int m, fromlen, tolen, @@ -819,9 +826,11 @@ translate(PG_FUNCTION_ARGS) if (m <= 0) PG_RETURN_TEXT_P(string); source = VARDATA_ANY(string); + source_end = source + m; fromlen = VARSIZE_ANY_EXHDR(from); from_ptr = VARDATA_ANY(from); + from_end = from_ptr + fromlen; tolen = VARSIZE_ANY_EXHDR(to); to_ptr = VARDATA_ANY(to); to_end = to_ptr + tolen; @@ -845,12 +854,12 @@ translate(PG_FUNCTION_ARGS) while (m > 0) { - source_len = pg_mblen(source); + source_len = pg_mblen_range(source, source_end); from_index = 0; for (i = 0; i < fromlen; i += len) { - len = pg_mblen(&from_ptr[i]); + len = pg_mblen_range(&from_ptr[i], from_end); if (len == source_len && memcmp(source, &from_ptr[i], len) == 0) break; @@ -866,11 +875,11 @@ translate(PG_FUNCTION_ARGS) { if (p >= to_end) break; - p += pg_mblen(p); + p += pg_mblen_range(p, to_end); } if (p < to_end) { - len = pg_mblen(p); + len = pg_mblen_range(p, to_end); memcpy(target, p, len); target += len; retlen += len; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c index c38d4b077c5..d6f3fd33335 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pg_locale.c @@ -67,6 +67,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/relcache.h" #include "utils/syscache.h" #ifdef USE_ICU @@ -152,8 +153,8 @@ static __thread UConverter *icu_converter = NULL; static UCollator *pg_ucol_open(const char *loc_str); static void init_icu_converter(void); -static size_t uchar_length(UConverter *converter, - const char *str, int32_t len); +static int32_t uchar_length(UConverter *converter, + const char *str, int32_t len); static int32_t uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, const char *src, int32_t srclen); @@ -1100,7 +1101,7 @@ get_iso_localename(const char *winlocname) wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH]; wchar_t buffer[LOCALE_NAME_MAX_LENGTH]; static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; - char *period; + const char *period; int len; int ret_val; @@ -1230,6 +1231,8 @@ lookup_collation_cache(Oid collation, bool set_flags) Assert(OidIsValid(collation)); Assert(collation != DEFAULT_COLLATION_OID); + AssertCouldGetRelation(); + if (collation_cache == NULL) { /* First time through, initialize the hash table */ @@ -1440,6 +1443,9 @@ make_icu_collator(const char *iculocstr, ereport(ERROR, (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s", iculocstr, icurules, u_errorName(status)))); + + pfree(my_rules); + pfree(agg_rules); } /* We will leak this string if the caller errors later :-( */ @@ -1778,8 +1784,9 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, char *buf = sbuf; char *a1p, *a2p; - int a1len = len1 * 2 + 2; - int a2len = len2 * 2 + 2; + size_t a1len, + a2len, + buflen; int r; int result; @@ -1789,8 +1796,16 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, Assert(false); #endif - if (a1len + a2len > TEXTBUFLEN) - buf = palloc(a1len + a2len); + /* + * In a 32-bit build, twice the input length can overflow size_t, so we + * must be careful. + */ + a1len = add_size(add_size(len1, len1), 2); + a2len = add_size(add_size(len2, len2), 2); + buflen = add_size(a1len, a2len); + + if (buflen > TEXTBUFLEN) + buf = palloc(buflen); a1p = buf; a2p = buf + a1len; @@ -1941,12 +1956,11 @@ static int pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1, const char *arg2, int32_t len2, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; + UChar sbuf[TEXTBUFLEN / sizeof(UChar)]; + UChar *buf = sbuf; int32_t ulen1; int32_t ulen2; - size_t bufsize1; - size_t bufsize2; + size_t bufsize; UChar *uchar1, *uchar2; int result; @@ -1961,14 +1975,13 @@ pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1, ulen1 = uchar_length(icu_converter, arg1, len1); ulen2 = uchar_length(icu_converter, arg2, len2); - bufsize1 = (ulen1 + 1) * sizeof(UChar); - bufsize2 = (ulen2 + 1) * sizeof(UChar); + /* ulen1+1 or ulen2+1 doesn't risk overflow, but summing them might */ + bufsize = add_size(ulen1 + 1, ulen2 + 1); + if (bufsize > lengthof(sbuf)) + buf = palloc_array(UChar, bufsize); - if (bufsize1 + bufsize2 > TEXTBUFLEN) - buf = palloc(bufsize1 + bufsize2); - - uchar1 = (UChar *) buf; - uchar2 = (UChar *) (buf + bufsize1); + uchar1 = buf; + uchar2 = buf + ulen1 + 1; ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); @@ -2149,11 +2162,9 @@ static size_t pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; - UChar *uchar; + UChar sbuf[TEXTBUFLEN / sizeof(UChar)]; + UChar *uchar = sbuf; int32_t ulen; - size_t uchar_bsize; Size result_bsize; Assert(locale->provider == COLLPROVIDER_ICU); @@ -2162,12 +2173,8 @@ pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize, ulen = uchar_length(icu_converter, src, srclen); - uchar_bsize = (ulen + 1) * sizeof(UChar); - - if (uchar_bsize > TEXTBUFLEN) - buf = palloc(uchar_bsize); - - uchar = (UChar *) buf; + if (ulen >= lengthof(sbuf)) + uchar = palloc_array(UChar, ulen + 1); ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); @@ -2182,8 +2189,8 @@ pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize, Assert(result_bsize > 0); result_bsize--; - if (buf != sbuf) - pfree(buf); + if (uchar != sbuf) + pfree(uchar); /* if dest is defined, it should be nul-terminated */ Assert(result_bsize >= destsize || dest[result_bsize] == '\0'); @@ -2196,14 +2203,12 @@ static size_t pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen, int32_t destsize, pg_locale_t locale) { - char sbuf[TEXTBUFLEN]; - char *buf = sbuf; + UChar sbuf[TEXTBUFLEN / sizeof(UChar)]; + UChar *uchar = sbuf; UCharIterator iter; uint32_t state[2]; UErrorCode status; - int32_t ulen = -1; - UChar *uchar = NULL; - size_t uchar_bsize; + int32_t ulen; Size result_bsize; Assert(locale->provider == COLLPROVIDER_ICU); @@ -2213,12 +2218,8 @@ pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen, ulen = uchar_length(icu_converter, src, srclen); - uchar_bsize = (ulen + 1) * sizeof(UChar); - - if (uchar_bsize > TEXTBUFLEN) - buf = palloc(uchar_bsize); - - uchar = (UChar *) buf; + if (ulen >= lengthof(sbuf)) + uchar = palloc_array(UChar, ulen + 1); ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); @@ -2236,6 +2237,9 @@ pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen, (errmsg("sort key generation failed: %s", u_errorName(status)))); + if (uchar != sbuf) + pfree(uchar); + return result_bsize; } @@ -2579,8 +2583,12 @@ init_icu_converter(void) /* * Find length, in UChars, of given string if converted to UChar string. + * + * Note: given the assumption that the input string fits in MaxAllocSize, + * the result cannot overflow int32_t. But callers must be careful about + * multiplying the result by sizeof(UChar). */ -static size_t +static int32_t uchar_length(UConverter *converter, const char *str, int32_t len) { UErrorCode status = U_ZERO_ERROR; @@ -2604,7 +2612,6 @@ uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, UErrorCode status = U_ZERO_ERROR; int32_t ulen; - status = U_ZERO_ERROR; ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status); if (U_FAILURE(status)) ereport(ERROR, @@ -2633,7 +2640,7 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) len_uchar = uchar_length(icu_converter, buff, nbytes); - *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar)); + *buff_uchar = palloc_array(UChar, len_uchar + 1); len_uchar = uchar_convert(icu_converter, *buff_uchar, len_uchar + 1, buff, nbytes); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c index 05e7a048075..2d19bb2fe58 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/pgstatfuncs.c @@ -762,7 +762,7 @@ pg_stat_get_backend_activity(PG_FUNCTION_ARGS) activity = beentry->st_activity_raw; clipped_activity = pgstat_clip_activity(activity); - ret = cstring_to_text(activity); + ret = cstring_to_text(clipped_activity); pfree(clipped_activity); PG_RETURN_TEXT_P(ret); @@ -780,8 +780,14 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS) wait_event_type = "<backend information not available>"; else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) wait_event_type = "<insufficient privilege>"; - else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL) - wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); + else + { + proc = BackendPidGetProc(beentry->st_procpid); + if (!proc) + proc = AuxiliaryPidGetProc(beentry->st_procpid); + if (proc) + wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); + } if (!wait_event_type) PG_RETURN_NULL(); @@ -801,8 +807,14 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS) wait_event = "<backend information not available>"; else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid)) wait_event = "<insufficient privilege>"; - else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL) - wait_event = pgstat_get_wait_event(proc->wait_event_info); + else + { + proc = BackendPidGetProc(beentry->st_procpid); + if (!proc) + proc = AuxiliaryPidGetProc(beentry->st_procpid); + if (proc) + wait_event = pgstat_get_wait_event(proc->wait_event_info); + } if (!wait_event) PG_RETURN_NULL(); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c index aaeb6c86d1f..2300d56a5ab 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/regexp.c @@ -452,7 +452,7 @@ parse_re_flags(pg_re_flags *flags, text *opts) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regular expression option: \"%.*s\"", - pg_mblen(opt_p + i), opt_p + i))); + pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i))); break; } } @@ -682,12 +682,13 @@ textregexreplace(PG_FUNCTION_ARGS) if (VARSIZE_ANY_EXHDR(opt) > 0) { char *opt_p = VARDATA_ANY(opt); + const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt); if (*opt_p >= '0' && *opt_p <= '9') ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regular expression option: \"%.*s\"", - pg_mblen(opt_p), opt_p), + pg_mblen_range(opt_p, end_p), opt_p), errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly."))); } @@ -781,15 +782,15 @@ similar_escape_internal(text *pat_text, text *esc_text) *r; int plen, elen; + const char *pend; bool afterescape = false; int nquotes = 0; - int charclass_depth = 0; /* Nesting level of character classes, - * encompassed by square brackets */ - int charclass_start = 0; /* State of the character class start, - * for carets */ + int bracket_depth = 0; /* square bracket nesting level */ + int charclass_pos = 0; /* position inside a character class */ p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); + pend = p + plen; if (esc_text == NULL) { /* No ESCAPE clause provided; default to backslash as escape */ @@ -845,6 +846,17 @@ similar_escape_internal(text *pat_text, text *esc_text) * the relevant part separators in the above expansion. If the result * of this function is used in a plain regexp match (SIMILAR TO), the * escape-double-quotes have no effect on the match behavior. + * + * While we don't fully validate character classes (bracket expressions), + * we do need to parse them well enough to know where they end. + * "charclass_pos" tracks where we are in a character class. + * Its value is uninteresting when bracket_depth is 0. + * But when bracket_depth > 0, it will be + * 1: right after the opening '[' (a following '^' will negate + * the class, while ']' is a literal character) + * 2: right after a '^' after the opening '[' (']' is still a literal + * character) + * 3 or more: further inside the character class (']' ends the class) *---------- */ @@ -878,7 +890,7 @@ similar_escape_internal(text *pat_text, text *esc_text) if (elen > 1) { - int mblen = pg_mblen(p); + int mblen = pg_mblen_range(p, pend); if (mblen > 1) { @@ -916,7 +928,7 @@ similar_escape_internal(text *pat_text, text *esc_text) /* fast path */ if (afterescape) { - if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */ + if (pchar == '"' && bracket_depth < 1) /* escape-double-quote? */ { /* emit appropriate part separator, per notes above */ if (nquotes == 0) @@ -957,6 +969,12 @@ similar_escape_internal(text *pat_text, text *esc_text) */ *r++ = '\\'; *r++ = pchar; + + /* + * If we encounter an escaped character in a character class, + * we are no longer at the beginning. + */ + charclass_pos = 3; } afterescape = false; } @@ -965,41 +983,69 @@ similar_escape_internal(text *pat_text, text *esc_text) /* SQL escape character; do not send to output */ afterescape = true; } - else if (charclass_depth > 0) + else if (bracket_depth > 0) { + /* inside a character class */ if (pchar == '\\') + { + /* + * If we're here, backslash is not the SQL escape character, + * so treat it as a literal class element, which requires + * doubling it. (This matches our behavior for backslashes + * outside character classes.) + */ *r++ = '\\'; + } *r++ = pchar; - /* - * Ignore a closing bracket at the start of a character class. - * Such a bracket is taken literally rather than closing the - * class. "charclass_start" is 1 right at the beginning of a - * class and 2 after an initial caret. - */ - if (pchar == ']' && charclass_start > 2) - charclass_depth--; + /* parse the character class well enough to identify ending ']' */ + if (pchar == ']' && charclass_pos > 2) + { + /* found the real end of a bracket pair */ + bracket_depth--; + /* don't reset charclass_pos, this may be an inner bracket */ + } else if (pchar == '[') - charclass_depth++; + { + /* start of a nested bracket pair */ + bracket_depth++; - /* - * If there is a caret right after the opening bracket, it negates - * the character class, but a following closing bracket should - * still be treated as a normal character. That holds only for - * the first caret, so only the values 1 and 2 mean that closing - * brackets should be taken literally. - */ - if (pchar == '^') - charclass_start++; + /* + * We are no longer at the beginning of a character class. + * (The nested bracket pair is a collating element, not a + * character class in its own right.) + */ + charclass_pos = 3; + } + else if (pchar == '^') + { + /* + * A caret right after the opening bracket negates the + * character class. In that case, the following will + * increment charclass_pos from 1 to 2, so that a following + * ']' is still a literal character and does not end the + * character class. If we are further inside a character + * class, charclass_pos might get incremented past 3, which is + * fine. + */ + charclass_pos++; + } else - charclass_start = 3; /* definitely past the start */ + { + /* + * Anything else (including a backslash or leading ']') is an + * element of the character class, so we are no longer at the + * beginning of the class. + */ + charclass_pos = 3; + } } else if (pchar == '[') { /* start of a character class */ *r++ = pchar; - charclass_depth++; - charclass_start = 1; + bracket_depth = 1; + charclass_pos = 1; } else if (pchar == '%') { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c index 1a8ed01a551..0c447970c66 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/selfuncs.c @@ -5260,7 +5260,11 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->statsTuple = statext_expressions_load(info->statOid, rte->inh, pos); - vardata->freefunc = ReleaseDummy; + /* Nothing to release if no data found */ + if (vardata->statsTuple != NULL) + { + vardata->freefunc = ReleaseDummy; + } /* * Test if user has permission to access all rows from the diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c index b585551bca8..6a560d7ad6d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/timestamp.c @@ -1629,15 +1629,19 @@ Datum timeofday(PG_FUNCTION_ARGS) { struct timeval tp; - char templ[128]; - char buf[128]; pg_time_t tt; + struct pg_tm *tm; + char part1[128]; + char part2[128]; + char buf[128 + 128 + 10]; gettimeofday(&tp, NULL); tt = (pg_time_t) tp.tv_sec; - pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z", - pg_localtime(&tt, session_timezone)); - snprintf(buf, sizeof(buf), templ, tp.tv_usec); + tm = pg_localtime(&tt, session_timezone); + + pg_strftime(part1, sizeof(part1), "%a %b %d %H:%M:%S", tm); + pg_strftime(part2, sizeof(part2), "%Y %Z", tm); + snprintf(buf, sizeof(buf), "%s.%06d %s", part1, (int) tp.tv_usec, part2); PG_RETURN_TEXT_P(cstring_to_text(buf)); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c index 77947f952ce..e692a0f9d11 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsquery.c @@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix) return buf; buf++; - while (*buf && pg_mblen(buf) == 1) + while (*buf && pg_mblen_cstr(buf) == 1) { switch (*buf) { @@ -197,7 +197,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance) continue; } - if (!t_isdigit(ptr)) + if (!t_isdigit_cstr(ptr)) return false; errno = 0; @@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate) return false; /* it shouldn't be a part of any word */ - if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr)) + if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr)) return false; for (;;) { - ptr += pg_mblen(ptr); + ptr += pg_mblen_cstr(ptr); if (*ptr == '\0') /* got end of string without operand */ return false; @@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate) * So we still treat OR literal as operation with possibly incorrect * operand and will not search it as lexeme */ - if (!t_isspace(ptr)) + if (!t_isspace_cstr(ptr)) break; } @@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, /* generic syntax error message is fine */ return PT_ERR; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -383,14 +383,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, { return (state->count) ? PT_ERR : PT_END; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { return PT_ERR; } break; } - state->buf += pg_mblen(state->buf); + state->buf += pg_mblen_cstr(state->buf); } } @@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->state = WAITOPERAND; continue; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->buf++; continue; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* insert implicit AND between operands */ state->state = WAITOPERAND; @@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, break; } - state->buf += pg_mblen(state->buf); + state->buf += pg_mblen_cstr(state->buf); } } @@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp) *(in->cur) = '\\'; in->cur++; } - COPYCHAR(in->cur, op); - clen = pg_mblen(op); + clen = ts_copychar_cstr(in->cur, op); op += clen; in->cur += clen; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c index 85c492d122a..39e16f8a7cd 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector.c @@ -320,9 +320,9 @@ tsvectorout(PG_FUNCTION_ARGS) lenbuf = 0, pp; WordEntry *ptr = ARRPTR(out); - char *curbegin, - *curin, + char *curin, *curout; + const char *curend; lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; for (i = 0; i < out->size; i++) @@ -335,13 +335,14 @@ tsvectorout(PG_FUNCTION_ARGS) curout = outbuf = (char *) palloc(lenbuf); for (i = 0; i < out->size; i++) { - curbegin = curin = STRPTR(out) + ptr->pos; + curin = STRPTR(out) + ptr->pos; + curend = curin + ptr->len; if (i != 0) *curout++ = ' '; *curout++ = '\''; - while (curin - curbegin < ptr->len) + while (curin < curend) { - int len = pg_mblen(curin); + int len = pg_mblen_range(curin, curend); if (t_iseq(curin, '\'')) *curout++ = '\''; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c index 4457c5d4f9f..2e89ebac35d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_op.c @@ -2606,11 +2606,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws) if (ws) { char *buf; + const char *end; buf = VARDATA_ANY(ws); - while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws)) + end = buf + VARSIZE_ANY_EXHDR(ws); + while (buf < end) { - if (pg_mblen(buf) == 1) + int len = pg_mblen_range(buf, end); + + if (len == 1) { switch (*buf) { @@ -2634,7 +2638,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws) stat->weight |= 0; } } - buf += pg_mblen(buf); + buf += len; } } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c index 13e075831fe..e4b91f8d3c4 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/tsvector_parser.c @@ -206,10 +206,9 @@ gettoken_tsvector(TSVectorParseState state, else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) PRSSYNTAXERROR; - else if (!t_isspace(state->prsbuf)) + else if (!t_isspace_cstr(state->prsbuf)) { - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); statecode = WAITENDWORD; } } @@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } @@ -236,7 +234,7 @@ gettoken_tsvector(TSVectorParseState state, statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } - else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || + else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) { @@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); } } else if (statecode == WAITENDCMPLX) @@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); } } else if (statecode == WAITCHARCMPLX) @@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state, if (!state->is_web && t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); statecode = WAITENDCMPLX; } else @@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state, PRSSYNTAXERROR; if (state->oprisdelim) { - /* state->prsbuf+=pg_mblen(state->prsbuf); */ + /* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */ RETURN_TOKEN; } else @@ -317,7 +312,7 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == INPOSINFO) { - if (t_isdigit(state->prsbuf)) + if (t_isdigit_cstr(state->prsbuf)) { if (posalen == 0) { @@ -372,10 +367,10 @@ gettoken_tsvector(TSVectorParseState state, PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } - else if (t_isspace(state->prsbuf) || + else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; - else if (!t_isdigit(state->prsbuf)) + else if (!t_isdigit_cstr(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ @@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state, statecode); /* get next char */ - state->prsbuf += pg_mblen(state->prsbuf); + state->prsbuf += pg_mblen_cstr(state->prsbuf); } } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c index 3dbbd1207f9..cbf4d70fd8a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varbit.c @@ -232,7 +232,7 @@ bit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid binary digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); x >>= 1; if (x == 0) @@ -257,7 +257,7 @@ bit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid hexadecimal digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); if (bc) { @@ -533,7 +533,7 @@ varbit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid binary digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); x >>= 1; if (x == 0) @@ -558,7 +558,7 @@ varbit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid hexadecimal digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); if (bc) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c index 06cc9fdd41a..ac18d178234 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/varlena.c @@ -128,6 +128,7 @@ static text *text_substring(Datum str, int32 start, int32 length, bool length_not_specified); +static int pg_mbcharcliplen_chars(const char *mbstr, int len, int limit); static text *text_overlay(text *t1, text *t2, int sp, int sl); static int text_position(text *t1, text *t2, Oid collid); static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state); @@ -797,8 +798,11 @@ text_catenate(text *t1, text *t2) * charlen_to_bytelen() * Compute the number of bytes occupied by n characters starting at *p * - * It is caller's responsibility that there actually are n characters; - * the string need not be null-terminated. + * The caller shall ensure there are n complete characters. Callers achieve + * this by deriving "n" from regmatch_t findings from searching a wchar array. + * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex + * matches will end no later than the last complete character. (The string + * need not be null-terminated.) */ static int charlen_to_bytelen(const char *p, int n) @@ -813,7 +817,7 @@ charlen_to_bytelen(const char *p, int n) const char *s; for (s = p; n > 0; n--) - s += pg_mblen(s); + s += pg_mblen_unbounded(s); /* caller verified encoding */ return s - p; } @@ -886,7 +890,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) int32 S = start; /* start position */ int32 S1; /* adjusted start position */ int32 L1; /* adjusted substring length */ - int32 E; /* end position */ + int32 E; /* end position, exclusive */ /* * SQL99 says S can be zero or negative, but we still must fetch from the @@ -946,6 +950,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) int32 slice_start; int32 slice_size; int32 slice_strlen; + int32 slice_len; text *slice; int32 E1; int32 i; @@ -962,14 +967,14 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) if (length_not_specified) /* special case - get length to end of * string */ - slice_size = L1 = -1; + E = slice_size = L1 = -1; else if (length < 0) { /* SQL99 says to throw an error for E < S, i.e., negative length */ ereport(ERROR, (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("negative substring length not allowed"))); - slice_size = L1 = -1; /* silence stupider compilers */ + E = slice_size = L1 = -1; /* silence stupider compilers */ } else if (pg_add_s32_overflow(S, length, &E)) { @@ -982,11 +987,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) else { /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. + * Ending at position 1, exclusive, obviously yields an empty + * string. A zero or negative value can happen if the start was + * negative or one. SQL99 says to return a zero-length string. */ - if (E < 1) + if (E <= 1) return cstring_to_text(""); /* @@ -996,11 +1001,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) L1 = E - S1; /* - * Total slice size in bytes can't be any longer than the start - * position plus substring length times the encoding max length. - * If that overflows, we can just use -1. + * Total slice size in bytes can't be any longer than the + * inclusive end position times the encoding max length. If that + * overflows, we can just use -1. */ - if (pg_mul_s32_overflow(E, eml, &slice_size)) + if (pg_mul_s32_overflow(E - 1, eml, &slice_size)) slice_size = -1; } @@ -1015,16 +1020,25 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) slice = (text *) DatumGetPointer(str); /* see if we got back an empty string */ - if (VARSIZE_ANY_EXHDR(slice) == 0) + slice_len = VARSIZE_ANY_EXHDR(slice); + if (slice_len == 0) { if (slice != (text *) DatumGetPointer(str)) pfree(slice); return cstring_to_text(""); } - /* Now we can get the actual length of the slice in MB characters */ - slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice), - VARSIZE_ANY_EXHDR(slice)); + /* + * Now we can get the actual length of the slice in MB characters, + * stopping at the end of the substring. Continuing beyond the + * substring end could find an incomplete character attributable + * solely to DatumGetTextPSlice() chopping in the middle of a + * character, and it would be superfluous work at best. + */ + slice_strlen = + (slice_size == -1 ? + pg_mbstrlen_with_len(VARDATA_ANY(slice), slice_len) : + pg_mbcharcliplen_chars(VARDATA_ANY(slice), slice_len, E - 1)); /* * Check that the start position wasn't > slice_strlen. If so, SQL99 @@ -1051,7 +1065,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) */ p = VARDATA_ANY(slice); for (i = 0; i < S1 - 1; i++) - p += pg_mblen(p); + p += pg_mblen_unbounded(p); /* hang onto a pointer to our start position */ s = p; @@ -1061,7 +1075,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) * length. */ for (i = S1; i < E1; i++) - p += pg_mblen(p); + p += pg_mblen_unbounded(p); ret = (text *) palloc(VARHDRSZ + (p - s)); SET_VARSIZE(ret, VARHDRSZ + (p - s)); @@ -1080,6 +1094,35 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) } /* + * pg_mbcharcliplen_chars - + * Mirror pg_mbcharcliplen(), except return value unit is chars, not bytes. + * + * This mirrors all the dubious historical behavior, so it's static to + * discourage proliferation. The assertions are specific to the one caller. + */ +static int +pg_mbcharcliplen_chars(const char *mbstr, int len, int limit) +{ + int nch = 0; + int l; + + Assert(len > 0); + Assert(limit > 0); + Assert(pg_database_encoding_max_length() > 1); + + while (len > 0 && *mbstr) + { + l = pg_mblen_with_len(mbstr, len); + nch++; + if (nch == limit) + break; + len -= l; + mbstr += l; + } + return nch; +} + +/* * textoverlay * Replace specified substring of first string with second * @@ -1359,6 +1402,8 @@ retry: */ if (state->is_multibyte_char_in_char) { + const char *haystack_end = state->str1 + state->len1; + /* Walk one character at a time, until we reach the match. */ /* the search should never move backwards. */ @@ -1367,7 +1412,7 @@ retry: while (state->refpoint < matchptr) { /* step to next character. */ - state->refpoint += pg_mblen(state->refpoint); + state->refpoint += pg_mblen_range(state->refpoint, haystack_end); state->refpos++; /* @@ -4682,6 +4727,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate) } else { + const char *end_ptr; + /* * When fldsep is NULL, each character in the input string becomes a * separate element in the result set. The separator is effectively @@ -4690,10 +4737,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate) inputstring_len = VARSIZE_ANY_EXHDR(inputstring); start_ptr = VARDATA_ANY(inputstring); + end_ptr = start_ptr + inputstring_len; while (inputstring_len > 0) { - int chunk_len = pg_mblen(start_ptr); + int chunk_len = pg_mblen_range(start_ptr, end_ptr); CHECK_FOR_INTERRUPTS(); @@ -5524,7 +5572,7 @@ text_reverse(PG_FUNCTION_ARGS) { int sz; - sz = pg_mblen(p); + sz = pg_mblen_range(p, endp); dst -= sz; memcpy(dst, p, sz); p += sz; @@ -5685,7 +5733,7 @@ text_format(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized format() type specifier \"%.*s\"", - pg_mblen(cp), cp), + pg_mblen_range(cp, end_ptr), cp), errhint("For a single \"%%\" use \"%%%%\"."))); /* If indirect width was specified, get its value */ @@ -5806,7 +5854,7 @@ text_format(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized format() type specifier \"%.*s\"", - pg_mblen(cp), cp), + pg_mblen_range(cp, end_ptr), cp), errhint("For a single \"%%\" use \"%%%%\"."))); break; } @@ -6211,18 +6259,18 @@ unicode_normalize_func(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; text *result; - int i; + size_t i; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { @@ -6277,20 +6325,20 @@ unicode_is_normalized(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; - int i; + size_t i; UnicodeNormalizationQC quickcheck; - int output_size; + size_t output_size; bool result; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c index 6fbfb3a1cc2..09500e636cc 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xid8funcs.c @@ -232,7 +232,7 @@ is_visible_fxid(FullTransactionId value, const pg_snapshot *snap) #ifdef USE_BSEARCH_IF_NXIP_GREATER else if (snap->nxip > USE_BSEARCH_IF_NXIP_GREATER) { - void *res; + const void *res; res = bsearch(&value, snap->xip, snap->nxip, sizeof(FullTransactionId), cmp_fxid); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c index 873886adc51..92b729ceabe 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/adt/xml.c @@ -2319,8 +2319,7 @@ sqlchar_to_unicode(const char *s) char *utf8string; pg_wchar ret[2]; /* need space for trailing zero */ - /* note we're not assuming s is null-terminated */ - utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8); + utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8); pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_encoding_mblen(PG_UTF8, utf8string)); @@ -2373,7 +2372,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, initStringInfo(&buf); - for (p = ident; *p; p += pg_mblen(p)) + for (p = ident; *p; p += pg_mblen_cstr(p)) { if (*p == ':' && (p == ident || fully_escaped)) appendStringInfoString(&buf, "_x003A_"); @@ -2398,7 +2397,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, : !is_valid_xml_namechar(u)) appendStringInfo(&buf, "_x%04X_", (unsigned int) u); else - appendBinaryStringInfo(&buf, p, pg_mblen(p)); + appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p)); } } @@ -2421,7 +2420,7 @@ map_xml_name_to_sql_identifier(const char *name) initStringInfo(&buf); - for (p = name; *p; p += pg_mblen(p)) + for (p = name; *p; p += pg_mblen_cstr(p)) { if (*p == '_' && *(p + 1) == 'x' && isxdigit((unsigned char) *(p + 2)) @@ -2439,7 +2438,7 @@ map_xml_name_to_sql_identifier(const char *name) p += 6; } else - appendBinaryStringInfo(&buf, p, pg_mblen(p)); + appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p)); } return buf.data; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/catcache.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/catcache.c index 549454934bc..ec2b7a420fc 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/catcache.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/catcache.c @@ -1000,11 +1000,40 @@ RehashCatCacheLists(CatCache *cp) } /* + * ConditionalCatalogCacheInitializeCache + * + * Call CatalogCacheInitializeCache() if not yet done. + */ +pg_attribute_always_inline +static void +ConditionalCatalogCacheInitializeCache(CatCache *cache) +{ +#ifdef USE_ASSERT_CHECKING + /* + * TypeCacheRelCallback() runs outside transactions and relies on TYPEOID + * for hashing. This isn't ideal. Since lookup_type_cache() both + * registers the callback and searches TYPEOID, reaching trouble likely + * requires OOM at an unlucky moment. + * + * InvalidateAttoptCacheCallback() runs outside transactions and likewise + * relies on ATTNUM. InitPostgres() initializes ATTNUM, so it's reliable. + */ + if (!(cache->id == TYPEOID || cache->id == ATTNUM) || + IsTransactionState()) + AssertCouldGetRelation(); + else + Assert(cache->cc_tupdesc != NULL); +#endif + + if (unlikely(cache->cc_tupdesc == NULL)) + CatalogCacheInitializeCache(cache); +} + +/* * CatalogCacheInitializeCache * * This function does final initialization of a catcache: obtain the tuple - * descriptor and set up the hash and equality function links. We assume - * that the relcache entry can be opened at this point! + * descriptor and set up the hash and equality function links. */ #ifdef CACHEDEBUG #define CatalogCacheInitializeCache_DEBUG1 \ @@ -1139,8 +1168,7 @@ CatalogCacheInitializeCache(CatCache *cache) void InitCatCachePhase2(CatCache *cache, bool touch_index) { - if (cache->cc_tupdesc == NULL) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); if (touch_index && cache->id != AMOID && @@ -1319,16 +1347,12 @@ SearchCatCacheInternal(CatCache *cache, dlist_head *bucket; CatCTup *ct; - /* Make sure we're in an xact, even if this ends up being a cache hit */ - Assert(IsTransactionState()); - Assert(cache->cc_nkeys == nkeys); /* * one-time startup overhead for each cache */ - if (unlikely(cache->cc_tupdesc == NULL)) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); #ifdef CATCACHE_STATS cache->cc_searches++; @@ -1607,8 +1631,7 @@ GetCatCacheHashValue(CatCache *cache, /* * one-time startup overhead for each cache */ - if (cache->cc_tupdesc == NULL) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); /* * calculate the hash value @@ -1659,8 +1682,7 @@ SearchCatCacheList(CatCache *cache, /* * one-time startup overhead for each cache */ - if (unlikely(cache->cc_tupdesc == NULL)) - CatalogCacheInitializeCache(cache); + ConditionalCatalogCacheInitializeCache(cache); Assert(nkeys > 0 && nkeys < cache->cc_nkeys); @@ -2279,7 +2301,8 @@ void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)) + void (*function) (int, uint32, Oid, void *), + void *context) { slist_iter iter; Oid reloid; @@ -2314,13 +2337,12 @@ PrepareToInvalidateCacheTuple(Relation relation, continue; /* Just in case cache hasn't finished initialization yet... */ - if (ccp->cc_tupdesc == NULL) - CatalogCacheInitializeCache(ccp); + ConditionalCatalogCacheInitializeCache(ccp); hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple); dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId; - (*function) (ccp->id, hashvalue, dbid); + (*function) (ccp->id, hashvalue, dbid, context); if (newtuple) { @@ -2329,7 +2351,7 @@ PrepareToInvalidateCacheTuple(Relation relation, newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple); if (newhashvalue != hashvalue) - (*function) (ccp->id, newhashvalue, dbid); + (*function) (ccp->id, newhashvalue, dbid, context); } } } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/inval.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/inval.c index bc35da0b387..7e968ec6048 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/inval.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/inval.c @@ -94,6 +94,10 @@ * worth trying to avoid sending such inval traffic in the future, if those * problems can be overcome cheaply. * + * When making a nontransactional change to a cacheable object, we must + * likewise send the invalidation immediately, before ending the change's + * critical section. This includes inplace heap updates, relmap, and smgr. + * * When wal_level=logical, write invalidations into WAL at each command end to * support the decoding of the in-progress transactions. See * CommandEndInvalidationMessages. @@ -131,13 +135,15 @@ /* * Pending requests are stored as ready-to-send SharedInvalidationMessages. - * We keep the messages themselves in arrays in TopTransactionContext - * (there are separate arrays for catcache and relcache messages). Control - * information is kept in a chain of TransInvalidationInfo structs, also - * allocated in TopTransactionContext. (We could keep a subtransaction's - * TransInvalidationInfo in its CurTransactionContext; but that's more - * wasteful not less so, since in very many scenarios it'd be the only - * allocation in the subtransaction's CurTransactionContext.) + * We keep the messages themselves in arrays in TopTransactionContext (there + * are separate arrays for catcache and relcache messages). For transactional + * messages, control information is kept in a chain of TransInvalidationInfo + * structs, also allocated in TopTransactionContext. (We could keep a + * subtransaction's TransInvalidationInfo in its CurTransactionContext; but + * that's more wasteful not less so, since in very many scenarios it'd be the + * only allocation in the subtransaction's CurTransactionContext.) For + * inplace update messages, control information appears in an + * InvalidationInfo, allocated in CurrentMemoryContext. * * We can store the message arrays densely, and yet avoid moving data around * within an array, because within any one subtransaction we need only @@ -148,7 +154,9 @@ * struct. Similarly, we need distinguish messages of prior subtransactions * from those of the current subtransaction only until the subtransaction * completes, after which we adjust the array indexes in the parent's - * TransInvalidationInfo to include the subtransaction's messages. + * TransInvalidationInfo to include the subtransaction's messages. Inplace + * invalidations don't need a concept of command or subtransaction boundaries, + * since we send them during the WAL insertion critical section. * * The ordering of the individual messages within a command's or * subtransaction's output is not considered significant, although this @@ -201,7 +209,7 @@ typedef struct InvalidationMsgsGroup /*---------------- - * Invalidation messages are divided into two groups: + * Transactional invalidation messages are divided into two groups: * 1) events so far in current command, not yet reflected to caches. * 2) events in previous commands of current transaction; these have * been reflected to local caches, and must be either broadcast to @@ -217,26 +225,36 @@ typedef struct InvalidationMsgsGroup *---------------- */ -typedef struct TransInvalidationInfo +/* fields common to both transactional and inplace invalidation */ +typedef struct InvalidationInfo { - /* Back link to parent transaction's info */ - struct TransInvalidationInfo *parent; - - /* Subtransaction nesting depth */ - int my_level; - /* Events emitted by current command */ InvalidationMsgsGroup CurrentCmdInvalidMsgs; + /* init file must be invalidated? */ + bool RelcacheInitFileInval; +} InvalidationInfo; + +/* subclass adding fields specific to transactional invalidation */ +typedef struct TransInvalidationInfo +{ + /* Base class */ + struct InvalidationInfo ii; + /* Events emitted by previous commands of this (sub)transaction */ InvalidationMsgsGroup PriorCmdInvalidMsgs; - /* init file must be invalidated? */ - bool RelcacheInitFileInval; + /* Back link to parent transaction's info */ + struct TransInvalidationInfo *parent; + + /* Subtransaction nesting depth */ + int my_level; } TransInvalidationInfo; static __thread TransInvalidationInfo *transInvalInfo = NULL; +static __thread InvalidationInfo *inplaceInvalInfo = NULL; + /* GUC storage */ __thread int debug_discard_caches = 0; @@ -544,9 +562,12 @@ ProcessInvalidationMessagesMulti(InvalidationMsgsGroup *group, static void RegisterCatcacheInvalidation(int cacheId, uint32 hashValue, - Oid dbId) + Oid dbId, + void *context) { - AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + InvalidationInfo *info = (InvalidationInfo *) context; + + AddCatcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, cacheId, hashValue, dbId); } @@ -556,10 +577,9 @@ RegisterCatcacheInvalidation(int cacheId, * Register an invalidation event for all catcache entries from a catalog. */ static void -RegisterCatalogInvalidation(Oid dbId, Oid catId) +RegisterCatalogInvalidation(InvalidationInfo *info, Oid dbId, Oid catId) { - AddCatalogInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, catId); + AddCatalogInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, catId); } /* @@ -568,10 +588,9 @@ RegisterCatalogInvalidation(Oid dbId, Oid catId) * As above, but register a relcache invalidation event. */ static void -RegisterRelcacheInvalidation(Oid dbId, Oid relId) +RegisterRelcacheInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) { - AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + AddRelcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); /* * Most of the time, relcache invalidation is associated with system @@ -588,7 +607,7 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * as well. Also zap when we are invalidating whole relcache. */ if (relId == InvalidOid || RelationIdIsInInitFile(relId)) - transInvalInfo->RelcacheInitFileInval = true; + info->RelcacheInitFileInval = true; } /* @@ -598,10 +617,140 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * Only needed for catalogs that don't have catcaches. */ static void -RegisterSnapshotInvalidation(Oid dbId, Oid relId) +RegisterSnapshotInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) +{ + AddSnapshotInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); +} + +/* + * PrepareInvalidationState + * Initialize inval data for the current (sub)transaction. + */ +static InvalidationInfo * +PrepareInvalidationState(void) +{ + TransInvalidationInfo *myInfo; + + /* PrepareToInvalidateCacheTuple() needs relcache */ + AssertCouldGetRelation(); + /* Can't queue transactional message while collecting inplace messages. */ + Assert(inplaceInvalInfo == NULL); + + if (transInvalInfo != NULL && + transInvalInfo->my_level == GetCurrentTransactionNestLevel()) + return (InvalidationInfo *) transInvalInfo; + + myInfo = (TransInvalidationInfo *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransInvalidationInfo)); + myInfo->parent = transInvalInfo; + myInfo->my_level = GetCurrentTransactionNestLevel(); + + /* Now, do we have a previous stack entry? */ + if (transInvalInfo != NULL) + { + /* Yes; this one should be for a deeper nesting level. */ + Assert(myInfo->my_level > transInvalInfo->my_level); + + /* + * The parent (sub)transaction must not have any current (i.e., + * not-yet-locally-processed) messages. If it did, we'd have a + * semantic problem: the new subtransaction presumably ought not be + * able to see those events yet, but since the CommandCounter is + * linear, that can't work once the subtransaction advances the + * counter. This is a convenient place to check for that, as well as + * being important to keep management of the message arrays simple. + */ + if (NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs) != 0) + elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages"); + + /* + * MemoryContextAllocZero set firstmsg = nextmsg = 0 in each group, + * which is fine for the first (sub)transaction, but otherwise we need + * to update them to follow whatever is already in the arrays. + */ + SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + SetGroupToFollow(&myInfo->ii.CurrentCmdInvalidMsgs, + &myInfo->PriorCmdInvalidMsgs); + } + else + { + /* + * Here, we need only clear any array pointers left over from a prior + * transaction. + */ + InvalMessageArrays[CatCacheMsgs].msgs = NULL; + InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; + InvalMessageArrays[RelCacheMsgs].msgs = NULL; + InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; + } + + transInvalInfo = myInfo; + return (InvalidationInfo *) myInfo; +} + +/* + * PrepareInplaceInvalidationState + * Initialize inval data for an inplace update. + * + * See previous function for more background. + */ +static InvalidationInfo * +PrepareInplaceInvalidationState(void) { - AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + InvalidationInfo *myInfo; + + AssertCouldGetRelation(); + /* limit of one inplace update under assembly */ + Assert(inplaceInvalInfo == NULL); + + /* gone after WAL insertion CritSection ends, so use current context */ + myInfo = (InvalidationInfo *) palloc0(sizeof(InvalidationInfo)); + + /* Stash our messages past end of the transactional messages, if any. */ + if (transInvalInfo != NULL) + SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + else + { + InvalMessageArrays[CatCacheMsgs].msgs = NULL; + InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; + InvalMessageArrays[RelCacheMsgs].msgs = NULL; + InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; + } + + inplaceInvalInfo = myInfo; + return myInfo; +} + +/* ---------------------------------------------------------------- + * public functions + * ---------------------------------------------------------------- + */ + +void +InvalidateSystemCachesExtended(bool debug_discard) +{ + int i; + + InvalidateCatalogSnapshot(); + ResetCatalogCachesExt(debug_discard); + RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ + + for (i = 0; i < syscache_callback_count; i++) + { + struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; + + ccitem->function(ccitem->arg, ccitem->id, 0); + } + + for (i = 0; i < relcache_callback_count; i++) + { + struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; + + ccitem->function(ccitem->arg, InvalidOid); + } } /* @@ -704,36 +853,6 @@ InvalidateSystemCaches(void) InvalidateSystemCachesExtended(false); } -void -InvalidateSystemCachesExtended(bool debug_discard) -{ - int i; - - InvalidateCatalogSnapshot(); - ResetCatalogCachesExt(debug_discard); - RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ - - for (i = 0; i < syscache_callback_count; i++) - { - struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; - - ccitem->function(ccitem->arg, ccitem->id, 0); - } - - for (i = 0; i < relcache_callback_count; i++) - { - struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; - - ccitem->function(ccitem->arg, InvalidOid); - } -} - - -/* ---------------------------------------------------------------- - * public functions - * ---------------------------------------------------------------- - */ - /* * AcceptInvalidationMessages * Read and process invalidation messages from the shared invalidation @@ -745,6 +864,12 @@ InvalidateSystemCachesExtended(bool debug_discard) void AcceptInvalidationMessages_original(void) { +#ifdef USE_ASSERT_CHECKING + /* message handlers shall access catalogs only during transactions */ + if (IsTransactionState()) + AssertCouldGetRelation(); +#endif + ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage, InvalidateSystemCaches); @@ -788,68 +913,6 @@ AcceptInvalidationMessages_original(void) } /* - * PrepareInvalidationState - * Initialize inval data for the current (sub)transaction. - */ -static void -PrepareInvalidationState(void) -{ - TransInvalidationInfo *myInfo; - - if (transInvalInfo != NULL && - transInvalInfo->my_level == GetCurrentTransactionNestLevel()) - return; - - myInfo = (TransInvalidationInfo *) - MemoryContextAllocZero(TopTransactionContext, - sizeof(TransInvalidationInfo)); - myInfo->parent = transInvalInfo; - myInfo->my_level = GetCurrentTransactionNestLevel(); - - /* Now, do we have a previous stack entry? */ - if (transInvalInfo != NULL) - { - /* Yes; this one should be for a deeper nesting level. */ - Assert(myInfo->my_level > transInvalInfo->my_level); - - /* - * The parent (sub)transaction must not have any current (i.e., - * not-yet-locally-processed) messages. If it did, we'd have a - * semantic problem: the new subtransaction presumably ought not be - * able to see those events yet, but since the CommandCounter is - * linear, that can't work once the subtransaction advances the - * counter. This is a convenient place to check for that, as well as - * being important to keep management of the message arrays simple. - */ - if (NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs) != 0) - elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages"); - - /* - * MemoryContextAllocZero set firstmsg = nextmsg = 0 in each group, - * which is fine for the first (sub)transaction, but otherwise we need - * to update them to follow whatever is already in the arrays. - */ - SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); - SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, - &myInfo->PriorCmdInvalidMsgs); - } - else - { - /* - * Here, we need only clear any array pointers left over from a prior - * transaction. - */ - InvalMessageArrays[CatCacheMsgs].msgs = NULL; - InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; - InvalMessageArrays[RelCacheMsgs].msgs = NULL; - InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; - } - - transInvalInfo = myInfo; -} - -/* * PostPrepare_Inval * Clean up after successful PREPARE. * @@ -904,7 +967,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * after we send the SI messages. However, we need not do anything unless * we committed. */ - *RelcacheInitFileInval = transInvalInfo->RelcacheInitFileInval; + *RelcacheInitFileInval = transInvalInfo->ii.RelcacheInitFileInval; /* * Collect all the pending messages into a single contiguous array of @@ -915,7 +978,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * not new ones. */ nummsgs = NumMessagesInGroup(&transInvalInfo->PriorCmdInvalidMsgs) + - NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs); + NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs); *msgs = msgarray = (SharedInvalidationMessage *) MemoryContextAlloc(CurTransactionContext, @@ -928,7 +991,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, CatCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -940,7 +1003,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, RelCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -1027,7 +1090,9 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, void AtEOXact_Inval(bool isCommit) { - /* Quick exit if no messages */ + inplaceInvalInfo = NULL; + + /* Quick exit if no transactional messages */ if (transInvalInfo == NULL) return; @@ -1041,16 +1106,16 @@ AtEOXact_Inval(bool isCommit) * after we send the SI messages. However, we need not do anything * unless we committed. */ - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePreInvalidate(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, SendSharedInvalidMessages); - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePostInvalidate(); } else @@ -1064,6 +1129,56 @@ AtEOXact_Inval(bool isCommit) } /* + * PreInplace_Inval + * Process queued-up invalidation before inplace update critical section. + * + * Tasks belong here if they are safe even if the inplace update does not + * complete. Currently, this just unlinks a cache file, which can fail. The + * sum of this and AtInplace_Inval() mirrors AtEOXact_Inval(isCommit=true). + */ +void +PreInplace_Inval(void) +{ + Assert(CritSectionCount == 0); + + if (inplaceInvalInfo && inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePreInvalidate(); +} + +/* + * AtInplace_Inval + * Process queued-up invalidations after inplace update buffer mutation. + */ +void +AtInplace_Inval(void) +{ + Assert(CritSectionCount > 0); + + if (inplaceInvalInfo == NULL) + return; + + ProcessInvalidationMessagesMulti(&inplaceInvalInfo->CurrentCmdInvalidMsgs, + SendSharedInvalidMessages); + + if (inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); + + inplaceInvalInfo = NULL; +} + +/* + * ForgetInplace_Inval + * Alternative to PreInplace_Inval()+AtInplace_Inval(): discard queued-up + * invalidations. This lets inplace update enumerate invalidations + * optimistically, before locking the buffer. + */ +void +ForgetInplace_Inval(void) +{ + inplaceInvalInfo = NULL; +} + +/* * AtEOSubXact_Inval * Process queued-up invalidation messages at end of subtransaction. * @@ -1085,9 +1200,20 @@ void AtEOSubXact_Inval(bool isCommit) { int my_level; - TransInvalidationInfo *myInfo = transInvalInfo; + TransInvalidationInfo *myInfo; + + /* + * Successful inplace update must clear this, but we clear it on abort. + * Inplace updates allocate this in CurrentMemoryContext, which has + * lifespan <= subtransaction lifespan. Hence, don't free it explicitly. + */ + if (isCommit) + Assert(inplaceInvalInfo == NULL); + else + inplaceInvalInfo = NULL; - /* Quick exit if no messages. */ + /* Quick exit if no transactional messages. */ + myInfo = transInvalInfo; if (myInfo == NULL) return; @@ -1128,12 +1254,12 @@ AtEOSubXact_Inval(bool isCommit) &myInfo->PriorCmdInvalidMsgs); /* Must readjust parent's CurrentCmdInvalidMsgs indexes now */ - SetGroupToFollow(&myInfo->parent->CurrentCmdInvalidMsgs, + SetGroupToFollow(&myInfo->parent->ii.CurrentCmdInvalidMsgs, &myInfo->parent->PriorCmdInvalidMsgs); /* Pending relcache inval becomes parent's problem too */ - if (myInfo->RelcacheInitFileInval) - myInfo->parent->RelcacheInitFileInval = true; + if (myInfo->ii.RelcacheInitFileInval) + myInfo->parent->ii.RelcacheInitFileInval = true; /* Pop the transaction state stack */ transInvalInfo = myInfo->parent; @@ -1180,7 +1306,7 @@ CommandEndInvalidationMessages(void) if (transInvalInfo == NULL) return; - ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessInvalidationMessages(&transInvalInfo->ii.CurrentCmdInvalidMsgs, LocalExecuteInvalidationMessage); /* WAL Log per-command invalidation messages for wal_level=logical */ @@ -1188,30 +1314,28 @@ CommandEndInvalidationMessages(void) LogLogicalInvalidations(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); } /* - * CacheInvalidateHeapTuple - * Register the given tuple for invalidation at end of command - * (ie, current command is creating or outdating this tuple). - * Also, detect whether a relcache invalidation is implied. - * - * For an insert or delete, tuple is the target tuple and newtuple is NULL. - * For an update, we are called just once, with tuple being the old tuple - * version and newtuple the new version. This allows avoidance of duplicate - * effort during an update. + * CacheInvalidateHeapTupleCommon + * Common logic for end-of-command and inplace variants. */ -void -CacheInvalidateHeapTuple(Relation relation, - HeapTuple tuple, - HeapTuple newtuple) +static void +CacheInvalidateHeapTupleCommon(Relation relation, + HeapTuple tuple, + HeapTuple newtuple, + InvalidationInfo *(*prepare_callback) (void)) { + InvalidationInfo *info; Oid tupleRelId; Oid databaseId; Oid relationId; + /* PrepareToInvalidateCacheTuple() needs relcache */ + AssertCouldGetRelation(); + /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; @@ -1231,11 +1355,8 @@ CacheInvalidateHeapTuple(Relation relation, if (IsToastRelation(relation)) return; - /* - * If we're not prepared to queue invalidation messages for this - * subtransaction level, get ready now. - */ - PrepareInvalidationState(); + /* Allocate any required resources. */ + info = prepare_callback(); /* * First let the catcache do its thing @@ -1244,11 +1365,12 @@ CacheInvalidateHeapTuple(Relation relation, if (RelationInvalidatesSnapshotsOnly(tupleRelId)) { databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId; - RegisterSnapshotInvalidation(databaseId, tupleRelId); + RegisterSnapshotInvalidation(info, databaseId, tupleRelId); } else PrepareToInvalidateCacheTuple(relation, tuple, newtuple, - RegisterCatcacheInvalidation); + RegisterCatcacheInvalidation, + (void *) info); /* * Now, is this tuple one of the primary definers of a relcache entry? See @@ -1321,7 +1443,48 @@ CacheInvalidateHeapTuple(Relation relation, /* * Yes. We need to register a relcache invalidation event. */ - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(info, databaseId, relationId); +} + +/* + * CacheInvalidateHeapTuple + * Register the given tuple for invalidation at end of command + * (ie, current command is creating or outdating this tuple) and end of + * transaction. Also, detect whether a relcache invalidation is implied. + * + * For an insert or delete, tuple is the target tuple and newtuple is NULL. + * For an update, we are called just once, with tuple being the old tuple + * version and newtuple the new version. This allows avoidance of duplicate + * effort during an update. + */ +void +CacheInvalidateHeapTuple(Relation relation, + HeapTuple tuple, + HeapTuple newtuple) +{ + CacheInvalidateHeapTupleCommon(relation, tuple, newtuple, + PrepareInvalidationState); +} + +/* + * CacheInvalidateHeapTupleInplace + * Register the given tuple for nontransactional invalidation pertaining + * to an inplace update. Also, detect whether a relcache invalidation is + * implied. + * + * Like CacheInvalidateHeapTuple(), but for inplace updates. + * + * Just before and just after the inplace update, the tuple's cache keys must + * match those in key_equivalent_tuple. Cache keys consist of catcache lookup + * key columns and columns referencing pg_class.oid values, + * e.g. pg_constraint.conrelid, which would trigger relcache inval. + */ +void +CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple) +{ + CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL, + PrepareInplaceInvalidationState); } /* @@ -1340,14 +1503,13 @@ CacheInvalidateCatalog(Oid catalogId) { Oid databaseId; - PrepareInvalidationState(); - if (IsSharedRelation(catalogId)) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterCatalogInvalidation(databaseId, catalogId); + RegisterCatalogInvalidation(PrepareInvalidationState(), + databaseId, catalogId); } /* @@ -1365,15 +1527,14 @@ CacheInvalidateRelcache(Relation relation) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = RelationGetRelid(relation); if (relation->rd_rel->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1386,9 +1547,8 @@ CacheInvalidateRelcache(Relation relation) void CacheInvalidateRelcacheAll(void) { - PrepareInvalidationState(); - - RegisterRelcacheInvalidation(InvalidOid, InvalidOid); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + InvalidOid, InvalidOid); } /* @@ -1402,14 +1562,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = classtup->oid; if (classtup->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1423,8 +1582,6 @@ CacheInvalidateRelcacheByRelid(Oid relid) { HeapTuple tup; - PrepareInvalidationState(); - tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for relation %u", relid); @@ -1614,7 +1771,7 @@ LogLogicalInvalidations(void) if (transInvalInfo == NULL) return; - group = &transInvalInfo->CurrentCmdInvalidMsgs; + group = &transInvalInfo->ii.CurrentCmdInvalidMsgs; nmsgs = NumMessagesInGroup(group); if (nmsgs > 0) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/lsyscache.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/lsyscache.c index aee07f817c5..2440316d5e7 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/lsyscache.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/lsyscache.c @@ -781,6 +781,44 @@ comparison_ops_are_compatible(Oid opno1, Oid opno2) return result; } +/* + * collations_agree_on_equality + * Return true if the two collations have equivalent notions of equality, + * so that a uniqueness or equality proof established under one side + * carries over to a comparison performed under the other side. + * + * Note: this is equality compatibility only. Do NOT use this to reason + * about ordering. + * + * An InvalidOid on either side denotes the absence of a collation -- that + * side's operation is not collation-sensitive (e.g. a non-collatable column + * type). Absence of a collation cannot conflict with the other side's + * collation, so we treat such pairs as agreeing on equality. This generalizes + * the asymmetric treatment in IndexCollMatchesExprColl(). + * + * Otherwise the collations have equivalent equality if they match, or if both + * are deterministic: by definition a deterministic collation treats two + * strings as equal iff they are byte-wise equal (see CREATE COLLATION), so any + * two deterministic collations share the same equality relation. A mismatch + * involving a nondeterministic collation, however, may mean the two equality + * relations disagree, and the proof is unsound. + */ +bool +collations_agree_on_equality(Oid coll1, Oid coll2) +{ + if (!OidIsValid(coll1) || !OidIsValid(coll2)) + return true; + + if (coll1 == coll2) + return true; + + if (!get_collation_isdeterministic(coll1) || + !get_collation_isdeterministic(coll2)) + return false; + + return true; +} + /* ---------- AMPROC CACHES ---------- */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relcache.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relcache.c index fb3ff45d53b..438e1850369 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relcache.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relcache.c @@ -2028,6 +2028,23 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_isvalid = true; } +#ifdef USE_ASSERT_CHECKING +/* + * AssertCouldGetRelation + * + * Check safety of calling RelationIdGetRelation(). + * + * In code that reads catalogs in the event of a cache miss, call this + * before checking the cache. + */ +void +AssertCouldGetRelation(void) +{ + Assert(IsTransactionState()); + AssertBufferLocksPermitCatalogRead(); +} +#endif + /* ---------------------------------------------------------------- * Relation Descriptor Lookup Interface @@ -2055,8 +2072,7 @@ RelationIdGetRelation(Oid relationId) { Relation rd; - /* Make sure we're in an xact, even if this ends up being a cache hit */ - Assert(IsTransactionState()); + AssertCouldGetRelation(); /* * first try to find reldesc in the cache diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relfilenumbermap.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relfilenumbermap.c index 85f01a18ac9..7cd9631ed30 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relfilenumbermap.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/relfilenumbermap.c @@ -132,6 +132,11 @@ InitializeRelfilenumberMap(void) * Map a relation's (tablespace, relfilenumber) to a relation's oid and cache * the result. * + * A temporary relation may share its relfilenumber with a permanent relation + * or temporary relations created in other backends. Being able to uniquely + * identify a temporary relation would require a backend's proc number, which + * we do not know about. Hence, this function ignores this case. + * * Returns InvalidOid if no relation matching the criteria could be found. */ Oid @@ -211,6 +216,9 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) { Form_pg_class classform = (Form_pg_class) GETSTRUCT(ntp); + if (classform->relpersistence == RELPERSISTENCE_TEMP) + continue; + if (found) elog(ERROR, "unexpected duplicate for tablespace %u, relfilenumber %u", diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/syscache.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/syscache.c index 74a0d5fc38b..7a92fdc2dec 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/syscache.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/cache/syscache.c @@ -39,6 +39,7 @@ #include "catalog/pg_description.h" #include "catalog/pg_enum.h" #include "catalog/pg_event_trigger.h" +#include "catalog/pg_extension.h" #include "catalog/pg_foreign_data_wrapper.h" #include "catalog/pg_foreign_server.h" #include "catalog/pg_foreign_table.h" @@ -679,6 +680,13 @@ static const struct cachedesc cacheinfo[] = { KEY(Anum_pg_user_mapping_umuser, Anum_pg_user_mapping_umserver), 2 + }, + /* intentionally out of alphabetical order, to avoid an ABI break: */ + [EXTENSIONOID] = { + ExtensionRelationId, + ExtensionOidIndexId, + KEY(Anum_pg_extension_oid), + 2 } }; @@ -956,8 +964,7 @@ SearchSysCacheLocked1(int cacheId, /* * If an inplace update just finished, ensure we process the syscache - * inval. XXX this is insufficient: the inplace updater may not yet - * have reached AtEOXact_Inval(). See test at inplace-inval.spec. + * inval. * * If a heap_update() call just released its LOCKTAG_TUPLE, we'll * probably find the old tuple and reach "tuple concurrently updated". diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/error/elog.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/error/elog.c index 6de7ac9e941..fe595c94c4e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/error/elog.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/error/elog.c @@ -3745,13 +3745,24 @@ write_stderr(const char *fmt,...) { va_list ap; + va_start(ap, fmt); + vwrite_stderr(fmt, ap); + va_end(ap); +} + + +/* + * Write errors to stderr (or by equal means when stderr is + * not available) - va_list version + */ +void +vwrite_stderr(const char *fmt, va_list ap) +{ #ifdef WIN32 char errbuf[2048]; /* Arbitrary size? */ #endif fmt = _(fmt); - - va_start(ap, fmt); #ifndef WIN32 /* On Unix, we just fprintf to stderr */ vfprintf(stderr, fmt, ap); @@ -3774,7 +3785,6 @@ write_stderr(const char *fmt,...) fflush(stderr); } #endif - va_end(ap); } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/hash/dynahash.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/hash/dynahash.c index fa5321e6694..0f8f22d0dd7 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/hash/dynahash.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/hash/dynahash.c @@ -758,7 +758,7 @@ init_htab(HTAB *hashp, long nelem) hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); #ifdef HASH_DEBUG - fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n", + fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%u\n%s%x\n%s%x\n%s%ld\n", "TABLE POINTER ", hashp, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/init/miscinit.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/init/miscinit.c index bbb48de7c69..e5085d65106 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/init/miscinit.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/init/miscinit.c @@ -1095,7 +1095,8 @@ EstimateClientConnectionInfoSpace(void) * Serialize MyClientConnectionInfo for use by parallel workers. */ void -SerializeClientConnectionInfo(Size maxsize, char *start_address) +SerializeClientConnectionInfo(Size maxsize PG_USED_FOR_ASSERTS_ONLY, + char *start_address) { SerializedClientConnectionInfo serialized = {0}; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c index 9788a0aae87..376424c11a6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mb/mbutils.c @@ -38,7 +38,9 @@ #include "catalog/namespace.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" +#include "utils/memdebug.h" #include "utils/memutils.h" +#include "utils/relcache.h" #include "utils/syscache.h" #include "varatt.h" @@ -97,6 +99,13 @@ static char *perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server); static int cliplen(const char *str, int len, int limit); +pg_attribute_noreturn() +static void report_invalid_encoding_int(int encoding, const char *mbstr, + int mblen, int len); + +pg_attribute_noreturn() +static void report_invalid_encoding_db(const char *mbstr, int mblen, int len); + /* * Prepare for a future call to SetClientEncoding. Success should mean @@ -311,7 +320,7 @@ InitializeClientEncoding(void) { Oid utf8_to_server_proc; - Assert(IsTransactionState()); + AssertCouldGetRelation(); utf8_to_server_proc = FindDefaultConversionProc(PG_UTF8, current_server_encoding); @@ -1019,11 +1028,128 @@ pg_encoding_wchar2mb_with_len(int encoding, return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len); } -/* returns the byte length of a multibyte character */ +/* + * Returns the byte length of a multibyte character sequence in a + * null-terminated string. Raises an illegal byte sequence error if the + * sequence would hit a null terminator. + * + * The caller is expected to have checked for a terminator at *mbstr == 0 + * before calling, but some callers want 1 in that case, so this function + * continues that tradition. + * + * This must only be used for strings that have a null-terminator to enable + * bounds detection. + */ +int +pg_mblen_cstr(const char *mbstr) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + /* + * The .mblen functions return 1 when given a pointer to a terminator. + * Some callers depend on that, so we tolerate it for now. Well-behaved + * callers check the leading byte for a terminator *before* calling. + */ + for (int i = 1; i < length; ++i) + if (unlikely(mbstr[i] == 0)) + report_invalid_encoding_db(mbstr, length, i); + + /* + * String should be NUL-terminated, but checking that would make typical + * callers O(N^2), tripling Valgrind check-world time. Unless + * VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we + * found a character, not a terminator, the next byte must be a terminator + * or the start of the next character.) If the caller iterates the whole + * string, the last call will diagnose a missing terminator. + */ + if (mbstr[0] != '\0') + { +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr)); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1); +#endif + } + + return length; +} + +/* + * Returns the byte length of a multibyte character sequence bounded by a range + * [mbstr, end) of at least one byte in size. Raises an illegal byte sequence + * error if the sequence would exceed the range. + */ +int +pg_mblen_range(const char *mbstr, const char *end) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + Assert(end > mbstr); + + if (unlikely(mbstr + length > end)) + report_invalid_encoding_db(mbstr, length, end - mbstr); + +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); +#endif + + return length; +} + +/* + * Returns the byte length of a multibyte character sequence bounded by a range + * extending for 'limit' bytes, which must be at least one. Raises an illegal + * byte sequence error if the sequence would exceed the range. + */ +int +pg_mblen_with_len(const char *mbstr, int limit) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + Assert(limit >= 1); + + if (unlikely(length > limit)) + report_invalid_encoding_db(mbstr, length, limit); + +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); +#endif + + return length; +} + + +/* + * Returns the length of a multibyte character sequence, without any + * validation of bounds. + * + * PLEASE NOTE: This function can only be used safely if the caller has + * already verified the input string, since otherwise there is a risk of + * overrunning the buffer if the string is invalid. A prior call to a + * pg_mbstrlen* function suffices. + */ +int +pg_mblen_unbounded(const char *mbstr) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); + + return length; +} + +/* + * Historical name for pg_mblen_unbounded(). Should not be used and will be + * removed in a later version. + */ int pg_mblen(const char *mbstr) { - return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + return pg_mblen_unbounded(mbstr); } /* returns the display length of a multibyte character */ @@ -1045,14 +1171,14 @@ pg_mbstrlen(const char *mbstr) while (*mbstr) { - mbstr += pg_mblen(mbstr); + mbstr += pg_mblen_cstr(mbstr); len++; } return len; } /* returns the length (counted in wchars) of a multibyte string - * (not necessarily NULL terminated) + * (stops at the first of "limit" or a NUL) */ int pg_mbstrlen_with_len(const char *mbstr, int limit) @@ -1065,7 +1191,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit) while (limit > 0 && *mbstr) { - int l = pg_mblen(mbstr); + int l = pg_mblen_with_len(mbstr, limit); limit -= l; mbstr += l; @@ -1135,7 +1261,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit) while (len > 0 && *mbstr) { - l = pg_mblen(mbstr); + l = pg_mblen_with_len(mbstr, len); nch++; if (nch > limit) break; @@ -1705,12 +1831,19 @@ void report_invalid_encoding(int encoding, const char *mbstr, int len) { int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len); + + report_invalid_encoding_int(encoding, mbstr, l, len); +} + +static void +report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len) +{ char buf[8 * 5 + 1]; char *p = buf; int j, jlimit; - jlimit = Min(l, len); + jlimit = Min(mblen, len); jlimit = Min(jlimit, 8); /* prevent buffer overrun */ for (j = 0; j < jlimit; j++) @@ -1727,6 +1860,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len) buf))); } +static void +report_invalid_encoding_db(const char *mbstr, int mblen, int len) +{ + report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len); +} + /* * report_untranslatable_char: complain about untranslatable character * diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/guc_tables.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/guc_tables.c index 29116ce4724..0c40a22f0d9 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/guc_tables.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/guc_tables.c @@ -66,6 +66,7 @@ #include "replication/slot.h" #include "replication/syncrep.h" #include "storage/bufmgr.h" +#include "storage/fd.h" #include "storage/large_object.h" #include "storage/pg_shmem.h" #include "storage/predicate.h" @@ -470,6 +471,14 @@ static const struct config_enum_entry wal_compression_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry file_extend_method_options[] = { +#ifdef HAVE_POSIX_FALLOCATE + {"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false}, +#endif + {"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/ps_status.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/ps_status.c index 92fd11b38d1..82f3af21504 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/ps_status.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/misc/ps_status.c @@ -25,7 +25,9 @@ #include "utils/guc.h" #include "utils/ps_status.h" +#if !defined(WIN32) || defined(_MSC_VER) extern char **environ; +#endif /* GUC variable */ __thread bool update_process_title = DEFAULT_UPDATE_PROCESS_TITLE; @@ -52,7 +54,7 @@ __thread bool update_process_title = DEFAULT_UPDATE_PROCESS_TITLE; #define PS_USE_SETPROCTITLE_FAST #elif defined(HAVE_SETPROCTITLE) #define PS_USE_SETPROCTITLE -#elif defined(__linux__) || defined(_AIX) || defined(__sun) || defined(__darwin__) +#elif defined(__linux__) || defined(_AIX) || defined(__sun) || defined(__darwin__) || defined(__GNU__) #define PS_USE_CLOBBER_ARGV #elif defined(WIN32) #define PS_USE_WIN32 @@ -62,7 +64,7 @@ __thread bool update_process_title = DEFAULT_UPDATE_PROCESS_TITLE; /* Different systems want the buffer padded differently */ -#if defined(_AIX) || defined(__linux__) || defined(__darwin__) +#if defined(_AIX) || defined(__linux__) || defined(__darwin__) || defined(__GNU__) #define PS_PADDING '\0' #else #define PS_PADDING ' ' diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/aset.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/aset.c index fa39038a388..44ff0d65b1d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/aset.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/aset.c @@ -1063,7 +1063,26 @@ AllocSetFree(void *pointer) Assert(FreeListIdxIsValid(fidx)); link = GetFreeListLink(chunk); + /* + * It might seem odd that we use elevel ERROR for double-pfree but + * only WARNING for write-past-chunk-end. But the two conditions are + * not very comparable. In the double-pfree case we can prevent + * corruption before it happens; while if we let it go through, the + * result would be a corrupted freelist that allows this chunk to get + * re-allocated twice. Thus the original bug could cascade into + * hard-to-understand misbehavior that might manifest far away from + * the actual source of the problem. On the other hand, a write past + * chunk end can be relatively benign if just a few bytes too many + * were written: often, only padding or unused space gets affected. + * Moreover, whatever damage was done is already done, and we're just + * reporting after the fact with no ability to clean it up. So just + * warn, like AllocSetCheck would do if the chunk didn't get freed. + */ #ifdef MEMORY_CONTEXT_CHECKING + /* Test for previously-freed chunk */ + if (unlikely(chunk->requested_size == InvalidAllocSize)) + elog(ERROR, "detected double pfree in %s %p", + set->header.name, chunk); /* Test for someone scribbling on unused space in chunk */ if (chunk->requested_size < GetChunkSizeFromFreeListIdx(fidx)) if (!sentinel_ok(pointer, chunk->requested_size)) @@ -1251,6 +1270,11 @@ AllocSetRealloc(void *pointer, Size size) oldchksize = GetChunkSizeFromFreeListIdx(fidx); #ifdef MEMORY_CONTEXT_CHECKING + /* See comments in AllocSetFree about uses of ERROR and WARNING here */ + /* Test for previously-freed chunk */ + if (unlikely(chunk->requested_size == InvalidAllocSize)) + elog(ERROR, "detected realloc of freed chunk in %s %p", + set->header.name, chunk); /* Test for someone scribbling on unused space in chunk */ if (chunk->requested_size < oldchksize) if (!sentinel_ok(pointer, chunk->requested_size)) @@ -1544,9 +1568,9 @@ AllocSetCheck(MemoryContext context) prevblock = block, block = block->next) { char *bpoz = ((char *) block) + ALLOC_BLOCKHDRSZ; - long blk_used = block->freeptr - bpoz; - long blk_data = 0; - long nchunks = 0; + Size blk_used = block->freeptr - bpoz; + Size blk_data = 0; + Size nchunks = 0; bool has_external_chunk = false; if (set->keeper == block) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/dsa.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/dsa.c index 2739169165e..079d2c8b9fa 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/dsa.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/dsa.c @@ -2136,6 +2136,8 @@ make_new_segment(dsa_area *area, size_t requested_pages) /* See if that is enough... */ if (requested_pages > usable_pages) { + size_t total_requested_pages PG_USED_FOR_ASSERTS_ONLY; + /* * We'll make an odd-sized segment, working forward from the requested * number of pages. @@ -2146,10 +2148,37 @@ make_new_segment(dsa_area *area, size_t requested_pages) MAXALIGN(sizeof(FreePageManager)) + usable_pages * sizeof(dsa_pointer); + /* + * We must also account for pagemap entries needed to cover the + * metadata pages themselves. The pagemap must track all pages in the + * segment, including the pages occupied by metadata. + * + * This formula uses integer ceiling division to compute the exact + * number of additional entries needed. The divisor (FPM_PAGE_SIZE - + * sizeof(dsa_pointer)) accounts for the fact that each metadata page + * consumes one pagemap entry of sizeof(dsa_pointer) bytes, leaving + * only (FPM_PAGE_SIZE - sizeof(dsa_pointer)) net bytes per metadata + * page. + */ + metadata_bytes += + ((metadata_bytes + (FPM_PAGE_SIZE - sizeof(dsa_pointer)) - 1) / + (FPM_PAGE_SIZE - sizeof(dsa_pointer))) * + sizeof(dsa_pointer); + /* Add padding up to next page boundary. */ if (metadata_bytes % FPM_PAGE_SIZE != 0) metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE); total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE; + total_requested_pages = total_size / FPM_PAGE_SIZE; + + /* + * Verify that we allocated enough pagemap entries for metadata and + * usable pages. This reverse-engineers the new calculation of + * "metadata_bytes" done based on the new "requested_pages" for an + * odd-sized segment. + */ + Assert((metadata_bytes - MAXALIGN(sizeof(dsa_segment_header)) - + MAXALIGN(sizeof(FreePageManager))) / sizeof(dsa_pointer) >= total_requested_pages); /* Is that too large for dsa_pointer's addressing scheme? */ if (total_size > DSA_MAX_SEGMENT_SIZE) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/freepage.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/freepage.c index 8f9ea090faa..4410e587845 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/freepage.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/freepage.c @@ -894,14 +894,14 @@ FreePageBtreeGetRecycled(FreePageManager *fpm) } /* - * Insert an item into an internal page. + * Insert an item into an internal page (there must be room). */ static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index, Size first_page, FreePageBtree *child) { Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC); - Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE); + Assert(btp->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE); Assert(index <= btp->hdr.nused); memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index], sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index)); @@ -911,14 +911,14 @@ FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index, } /* - * Insert an item into a leaf page. + * Insert an item into a leaf page (there must be room). */ static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page, Size npages) { Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC); - Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE); + Assert(btp->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE); Assert(index <= btp->hdr.nused); memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index], sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index)); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/generation.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/generation.c index 8c2ca87705d..d9c1428b2bf 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/generation.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/generation.c @@ -663,6 +663,11 @@ GenerationFree(void *pointer) } #ifdef MEMORY_CONTEXT_CHECKING + /* See comments in AllocSetFree about uses of ERROR and WARNING here */ + /* Test for previously-freed chunk */ + if (unlikely(chunk->requested_size == InvalidAllocSize)) + elog(ERROR, "detected double pfree in %s %p", + ((MemoryContext) block->context)->name, chunk); /* Test for someone scribbling on unused space in chunk */ Assert(chunk->requested_size < chunksize); if (!sentinel_ok(pointer, chunk->requested_size)) @@ -772,6 +777,11 @@ GenerationRealloc(void *pointer, Size size) set = block->context; #ifdef MEMORY_CONTEXT_CHECKING + /* See comments in AllocSetFree about uses of ERROR and WARNING here */ + /* Test for previously-freed chunk */ + if (unlikely(chunk->requested_size == InvalidAllocSize)) + elog(ERROR, "detected realloc of freed chunk in %s %p", + ((MemoryContext) set)->name, chunk); /* Test for someone scribbling on unused space in chunk */ Assert(chunk->requested_size < oldsize); if (!sentinel_ok(pointer, chunk->requested_size)) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/mcxt.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/mcxt.c index beabfec00f4..4a9ac43adbd 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/mcxt.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/mcxt.c @@ -21,6 +21,7 @@ #include "postgres.h" +#include "common/int.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "storage/proc.h" @@ -201,6 +202,9 @@ __thread MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ __thread MemoryContext PortalContext = NULL; +/* Is memory context logging currently in progress? */ +static __thread bool LogMemoryContextInProgress = false; + static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, bool print, int max_children, @@ -209,6 +213,8 @@ static void MemoryContextStatsInternal(MemoryContext context, int level, static void MemoryContextStatsPrint(MemoryContext context, void *passthru, const char *stats_string, bool print_to_stderr); +static pg_noinline void add_size_error(Size s1, Size s2) pg_attribute_noreturn(); +static pg_noinline void mul_size_error(Size s1, Size s2) pg_attribute_noreturn(); /* * You should not do memory allocations within a critical section, because @@ -1253,25 +1259,45 @@ ProcessLogMemoryContextInterrupt(void) LogMemoryContextPending = false; /* - * Use LOG_SERVER_ONLY to prevent this message from being sent to the - * connected client. + * Exit immediately if memory context logging is already in progress. This + * prevents recursive calls, which could occur if logging is requested + * repeatedly and rapidly, potentially leading to infinite recursion and a + * crash. */ - ereport(LOG_SERVER_ONLY, - (errhidestmt(true), - errhidecontext(true), - errmsg("logging memory contexts of PID %d", MyProcPid))); + if (LogMemoryContextInProgress) + return; + LogMemoryContextInProgress = true; - /* - * When a backend process is consuming huge memory, logging all its memory - * contexts might overrun available disk space. To prevent this, we limit - * the number of child contexts to log per parent to 100. - * - * As with MemoryContextStats(), we suppose that practical cases where the - * dump gets long will typically be huge numbers of siblings under the - * same parent context; while the additional debugging value from seeing - * details about individual siblings beyond 100 will not be large. - */ - MemoryContextStatsDetail(TopMemoryContext, 100, false); + PG_TRY(); + { + /* + * Use LOG_SERVER_ONLY to prevent this message from being sent to the + * connected client. + */ + ereport(LOG_SERVER_ONLY, + (errhidestmt(true), + errhidecontext(true), + errmsg("logging memory contexts of PID %d", MyProcPid))); + + /* + * When a backend process is consuming huge memory, logging all its + * memory contexts might overrun available disk space. To prevent + * this, we limit the number of child contexts to log per parent to + * 100. + * + * As with MemoryContextStats(), we suppose that practical cases where + * the dump gets long will typically be huge numbers of siblings under + * the same parent context; while the additional debugging value from + * seeing details about individual siblings beyond 100 will not be + * large. + */ + MemoryContextStatsDetail(TopMemoryContext, 100, false); + } + PG_FINALLY(); + { + LogMemoryContextInProgress = false; + } + PG_END_TRY(); } void * @@ -1629,6 +1655,132 @@ repalloc0(void *pointer, Size oldsize, Size size) } /* + * Support for safe calculation of memory request sizes + * + * These functions perform the requested calculation, but throw error if the + * result overflows. + * + * An important property of these functions is that if an argument was a + * negative signed int before promotion (implying overflow in calculating it) + * we will detect that as an error. That happens because we reject results + * larger than SIZE_MAX / 2 later on, in the actual allocation step. + */ +Size +add_size(Size s1, Size s2) +{ + Size result; + + if (unlikely(pg_add_size_overflow(s1, s2, &result))) + add_size_error(s1, s2); + return result; +} + +static pg_noinline void +add_size_error(Size s1, Size s2) +{ + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("invalid memory allocation request size %zu + %zu", + s1, s2))); +} + +Size +mul_size(Size s1, Size s2) +{ + Size result; + + if (unlikely(pg_mul_size_overflow(s1, s2, &result))) + mul_size_error(s1, s2); + return result; +} + +static pg_noinline void +mul_size_error(Size s1, Size s2) +{ + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("invalid memory allocation request size %zu * %zu", + s1, s2))); +} + +/* + * palloc_mul + * Equivalent to palloc(mul_size(s1, s2)). + */ +void * +palloc_mul(Size s1, Size s2) +{ + /* inline mul_size() for efficiency */ + Size req; + + if (unlikely(pg_mul_size_overflow(s1, s2, &req))) + mul_size_error(s1, s2); + return palloc(req); +} + +/* + * palloc0_mul + * Equivalent to palloc0(mul_size(s1, s2)). + * + * This is comparable to standard calloc's behavior. + */ +void * +palloc0_mul(Size s1, Size s2) +{ + /* inline mul_size() for efficiency */ + Size req; + + if (unlikely(pg_mul_size_overflow(s1, s2, &req))) + mul_size_error(s1, s2); + return palloc0(req); +} + +/* + * palloc_mul_extended + * Equivalent to palloc_extended(mul_size(s1, s2), flags). + */ +void * +palloc_mul_extended(Size s1, Size s2, int flags) +{ + /* inline mul_size() for efficiency */ + Size req; + + if (unlikely(pg_mul_size_overflow(s1, s2, &req))) + mul_size_error(s1, s2); + return palloc_extended(req, flags); +} + +/* + * repalloc_mul + * Equivalent to repalloc(p, mul_size(s1, s2)). + */ +void * +repalloc_mul(void *p, Size s1, Size s2) +{ + /* inline mul_size() for efficiency */ + Size req; + + if (unlikely(pg_mul_size_overflow(s1, s2, &req))) + mul_size_error(s1, s2); + return repalloc(p, req); +} + +/* + * repalloc_mul_extended + * Equivalent to repalloc_extended(p, mul_size(s1, s2), flags). + */ +void * +repalloc_mul_extended(void *p, Size s1, Size s2, int flags) +{ + /* inline mul_size() for efficiency */ + Size req; + + if (unlikely(pg_mul_size_overflow(s1, s2, &req))) + mul_size_error(s1, s2); + return repalloc_extended(p, req, flags); +} + +/* * MemoryContextAllocHuge * Allocate (possibly-expansive) space within the specified context. * diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/slab.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/slab.c index 718dd2ba03c..8eb0dcd26ac 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/slab.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/mmgr/slab.c @@ -616,6 +616,7 @@ SlabAlloc(MemoryContext context, Size size) MemoryChunkSetHdrMask(chunk, block, MAXALIGN(slab->chunkSize), MCTX_SLAB_ID); #ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = size; /* slab mark to catch clobber of "unused" space */ Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ)); set_sentinel(MemoryChunkGetPointer(chunk), size); @@ -663,11 +664,18 @@ SlabFree(void *pointer) slab = block->slab; #ifdef MEMORY_CONTEXT_CHECKING + /* See comments in AllocSetFree about uses of ERROR and WARNING here */ + /* Test for previously-freed chunk */ + if (unlikely(chunk->requested_size == InvalidAllocSize)) + elog(ERROR, "detected double pfree in %s %p", + slab->header.name, chunk); /* Test for someone scribbling on unused space in chunk */ Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ)); if (!sentinel_ok(pointer, slab->chunkSize)) elog(WARNING, "detected write past chunk end in %s %p", slab->header.name, chunk); + /* Reset requested_size to InvalidAllocSize in free chunks */ + chunk->requested_size = InvalidAllocSize; #endif /* push this chunk onto the head of the block's free list */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/sharedtuplestore.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/sharedtuplestore.c index 236be65f221..c01704d2f82 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/sharedtuplestore.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/sharedtuplestore.c @@ -325,7 +325,8 @@ sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, /* Do we have space? */ size = accessor->sts->meta_data_size + tuple->t_len; - if (accessor->write_pointer + size > accessor->write_end) + if (accessor->write_pointer == NULL || + accessor->write_pointer + size > accessor->write_end) { if (accessor->write_chunk == NULL) { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/tuplestore.c b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/tuplestore.c index f60633df241..ce405775058 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/tuplestore.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/sort/tuplestore.c @@ -673,10 +673,10 @@ grow_memtuples(Tuplestorestate *state) /* OK, do it */ FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); - state->memtupsize = newmemtupsize; state->memtuples = (void **) repalloc_huge(state->memtuples, - state->memtupsize * sizeof(void *)); + newmemtupsize * sizeof(void *)); + state->memtupsize = newmemtupsize; USEMEM(state, GetMemoryChunkSpace(state->memtuples)); if (LACKMEM(state)) elog(ERROR, "unexpected out-of-memory situation in tuplestore"); @@ -1221,7 +1221,19 @@ dumptuples(Tuplestorestate *state) if (i >= state->memtupcount) break; WRITETUP(state, state->memtuples[i]); + + /* + * Increase memtupdeleted to track the fact that we just deleted that + * tuple. Think not to remove this on the grounds that we'll reset + * memtupdeleted to zero below. We might not reach that if some later + * WRITETUP fails (e.g. due to overrunning temp_file_limit). If so, + * we'd error out leaving an effectively-corrupt tuplestore, which + * would be quite bad if it's a persistent data structure such as a + * Portal's holdStore. + */ + state->memtupdeleted++; } + /* Now we can reset memtupdeleted along with memtupcount */ state->memtupdeleted = 0; state->memtupcount = 0; } @@ -1408,8 +1420,10 @@ tuplestore_trim(Tuplestorestate *state) FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i])); pfree(state->memtuples[i]); state->memtuples[i] = NULL; + /* As in dumptuples(), increment memtupdeleted synchronously */ + state->memtupdeleted++; } - state->memtupdeleted = nremove; + Assert(state->memtupdeleted == nremove); /* mark tuplestore as truncated (used for Assert crosschecks only) */ state->truncated = true; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c index 47b18b8c600..580c460b637 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c @@ -425,7 +425,7 @@ validate_compress_specification(pg_compress_specification *spec) void parse_compress_options(const char *option, char **algorithm, char **detail) { - char *sep; + const char *sep; char *endp; long result; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c index 0412a8220ef..2009e514c91 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/encnames.c @@ -61,8 +61,9 @@ static const pg_encname pg_encname_tbl[] = * Japanese, standard OSF */ { "euckr", PG_EUC_KR - }, /* EUC-KR; Extended Unix Code for Korean , KS - * X 1001 standard */ + }, /* EUC-KR; Extended Unix Code for Korean + * precomposed (Wansung) encoding, standard KS + * X 1001 */ { "euctw", PG_EUC_TW }, /* EUC-TW; Extended Unix Code for @@ -119,8 +120,8 @@ static const pg_encname pg_encname_tbl[] = }, /* ISO-8859-9; RFC1345,KXS2 */ { "johab", PG_JOHAB - }, /* JOHAB; Extended Unix Code for simplified - * Chinese */ + }, /* JOHAB; Korean combining (Johab) encoding, + * standard KS X 1001 annex 3 */ { "koi8", PG_KOI8R }, /* _dirty_ alias for KOI8-R (backward @@ -189,7 +190,9 @@ static const pg_encname pg_encname_tbl[] = }, /* alias for WIN1258 */ { "uhc", PG_UHC - }, /* UHC; Korean Windows CodePage 949 */ + }, /* UHC; Unified Hangul Code, Microsoft Windows + * CodePage 949; superset of EUC-KR covering + * all 11,172 precomposed Hangul syllables */ { "unicode", PG_UTF8 }, /* alias for UTF8 */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c index 20b5938e074..2e1cb94dbca 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/pg_lzcompress.c @@ -727,22 +727,33 @@ pglz_decompress(const char *source, int32 slen, char *dest, int32 len; int32 off; + /* + * A match tag is at least 2 bytes; if the length nibble is + * 0x0f the tag is 3 bytes (extended length). Verify we have + * enough source data before reading them. + */ + if (unlikely(sp + 2 > srcend)) + return -1; + len = (sp[0] & 0x0f) + 3; off = ((sp[0] & 0xf0) << 4) | sp[1]; sp += 2; if (len == 18) + { + if (unlikely(sp >= srcend)) + return -1; len += *sp++; + } /* - * Check for corrupt data: if we fell off the end of the - * source, or if we obtained off = 0, or if off is more than - * the distance back to the buffer start, we have problems. - * (We must check for off = 0, else we risk an infinite loop - * below in the face of corrupt data. Likewise, the upper - * limit on off prevents accessing outside the buffer - * boundaries.) + * Check for corrupt data: if we obtained off = 0, or if off + * is more than the distance back to the buffer start, we have + * problems. (We must check for off = 0, else we risk an + * infinite loop below in the face of corrupt data. Likewise, + * the upper limit on off prevents accessing outside the + * buffer boundaries.) */ - if (unlikely(sp > srcend || off == 0 || + if (unlikely(off == 0 || off > (dp - (unsigned char *) dest))) return -1; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c index 089cbd33cca..12018ebc137 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/protocol_openssl.c @@ -114,4 +114,10 @@ SSL_CTX_set_max_proto_version(SSL_CTX *ctx, int version) return 1; /* success */ } -#endif /* !SSL_CTX_set_min_proto_version */ +#else /* !SSL_CTX_set_min_proto_version */ + +/* prevent linker complaints about empty module */ +extern __thread int protocol_openssl_dummy_variable; +__thread int protocol_openssl_dummy_variable = 0; + +#endif /* SSL_CTX_set_min_proto_version */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c index c1d2807cea9..54a8fe93dee 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/psprintf.c @@ -24,9 +24,6 @@ #include "postgres_fe.h" -/* It's possible we could use a different value for this in frontend code */ -#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ - #endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c index e7e909a0c87..bc1b0240594 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/saslprep.c @@ -24,10 +24,6 @@ #include "utils/memutils.h" #else #include "postgres_fe.h" - -/* It's possible we could use a different value for this in frontend code */ -#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ - #endif #include "common/saslprep.h" @@ -1009,15 +1005,17 @@ pg_utf8_string_len(const char *source) const unsigned char *p = (const unsigned char *) source; int l; int num_chars = 0; + size_t len = strlen(source); - while (*p) + while (len) { l = pg_utf_mblen(p); - if (!pg_utf8_islegal(p, l)) + if (len < l || !pg_utf8_islegal(p, l)) return -1; p += l; + len -= l; num_chars++; } diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c index 05b22b5c53c..de4a1775848 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/stringinfo.c @@ -24,9 +24,6 @@ #include "postgres_fe.h" -/* It's possible we could use a different value for this in frontend code */ -#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ - #endif #include "lib/stringinfo.h" diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c index da6728605be..18e77ce2d23 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/unicode_norm.c @@ -23,6 +23,7 @@ #include "common/unicode_norm_hashfunc.h" #include "common/unicode_normprops_table.h" #include "port/pg_bswap.h" +#include "utils/memutils.h" #else #include "common/unicode_norm_table.h" #endif @@ -420,10 +421,28 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) /* * Calculate how many characters long the decomposed version will be. + * + * Some characters decompose to quite a few code points, so that the + * decomposed version's size could overrun MaxAllocSize, and even 32-bit + * size_t, even though the input string presumably fits in that. In + * frontend we want to just return NULL in that case, so monitor the sum + * and exit early once we'd need more than MaxAllocSize bytes. */ decomp_size = 0; for (p = input; *p; p++) + { decomp_size += get_decomposed_size(*p, compat); + if (unlikely(decomp_size > MaxAllocSize / sizeof(pg_wchar))) + { +#ifndef FRONTEND + /* Exit loop and let palloc() throw error below */ + break; +#else + /* Just return NULL with no explicit error */ + return NULL; +#endif + } + } decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); if (decomp_chars == NULL) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c index 82ea3a4e834..f98aa8a0c0b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/wchar.c @@ -63,6 +63,9 @@ * subset to the ASCII routines to ensure consistency. */ +/* No error-reporting facility. Ignore incomplete trailing byte sequence. */ +#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break + /* * SQL/ASCII */ @@ -108,22 +111,24 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte - * KANA") */ + if (*from == SS2) /* JIS X 0201 (so called "1 byte KANA") */ { + MB2CHAR_NEED_AT_LEAST(len, 2); from++; *to = (SS2 << 8) | *from++; len -= 2; } - else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ + else if (*from == SS3) /* JIS X 0212 KANJI */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ + else if (IS_HIGHBIT_SET(*from)) /* JIS X 0208 KANJI */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -235,22 +240,25 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ + if (*from == SS2) /* code set 2 (unused?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS2 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */ + else if (*from == SS3) /* code set 3 (unused ?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */ + else if (IS_HIGHBIT_SET(*from)) /* code set 1 */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -267,12 +275,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) return cnt; } +/* + * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for + * EUC_CN), but mb2wchar_with_len does. Tell a coherent story for code that + * relies on agreement between mb2wchar_with_len and mblen. Invalid text + * datums (e.g. from shared catalogs) reach this. + */ static int pg_euccn_mblen(const unsigned char *s) { int len; - if (IS_HIGHBIT_SET(*s)) + if (*s == SS2) + len = 3; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; @@ -302,23 +320,26 @@ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 4) /* code set 2 */ + if (*from == SS2) /* code set 2 */ { + MB2CHAR_NEED_AT_LEAST(len, 4); from++; *to = (((uint32) SS2) << 24) | (*from++ << 16); *to |= *from++ << 8; *to |= *from++; len -= 4; } - else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ + else if (*from == SS3) /* code set 3 (unused?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ + else if (IS_HIGHBIT_SET(*from)) /* code set 2 */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -455,8 +476,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xe0) == 0xc0) { - if (len < 2) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 2); c1 = *from++ & 0x1f; c2 = *from++ & 0x3f; *to = (c1 << 6) | c2; @@ -464,8 +484,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xf0) == 0xe0) { - if (len < 3) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 3); c1 = *from++ & 0x0f; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; @@ -474,8 +493,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xf8) == 0xf0) { - if (len < 4) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 4); c1 = *from++ & 0x07; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; @@ -738,28 +756,32 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (IS_LC1(*from) && len >= 2) + if (IS_LC1(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 16; *to |= *from++; len -= 2; } - else if (IS_LCPRV1(*from) && len >= 3) + else if (IS_LCPRV1(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = *from++ << 16; *to |= *from++; len -= 3; } - else if (IS_LC2(*from) && len >= 3) + else if (IS_LC2(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 3); *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; len -= 3; } - else if (IS_LCPRV2(*from) && len >= 4) + else if (IS_LCPRV2(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 4); from++; *to = *from++ << 16; *to |= *from++ << 8; @@ -2126,7 +2148,7 @@ pg_encoding_set_invalid(int encoding, char *dst) const pg_wchar_tbl pg_wchar_table[] = { {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */ - {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */ + {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3}, /* PG_EUC_CN */ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/hash_xlog.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/hash_xlog.h index b93619d1a81..03ad47af435 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/hash_xlog.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/hash_xlog.h @@ -129,7 +129,7 @@ typedef struct xl_hash_split_complete * * This data record is used for XLOG_HASH_MOVE_PAGE_CONTENTS * - * Backup Blk 0: bucket page + * Backup Blk 0: primary bucket page * Backup Blk 1: page containing moved tuples * Backup Blk 2: page from which tuples will be removed */ @@ -149,12 +149,13 @@ typedef struct xl_hash_move_page_contents * * This data record is used for XLOG_HASH_SQUEEZE_PAGE * - * Backup Blk 0: page containing tuples moved from freed overflow page - * Backup Blk 1: freed overflow page - * Backup Blk 2: page previous to the freed overflow page - * Backup Blk 3: page next to the freed overflow page - * Backup Blk 4: bitmap page containing info of freed overflow page - * Backup Blk 5: meta page + * Backup Blk 0: primary bucket page + * Backup Blk 1: page containing tuples moved from freed overflow page + * Backup Blk 2: freed overflow page + * Backup Blk 3: page previous to the freed overflow page + * Backup Blk 4: page next to the freed overflow page + * Backup Blk 5: bitmap page containing info of freed overflow page + * Backup Blk 6: meta page */ typedef struct xl_hash_squeeze_page { @@ -245,7 +246,7 @@ typedef struct xl_hash_init_bitmap_page * * This data record is used for XLOG_HASH_VACUUM_ONE_PAGE * - * Backup Blk 0: bucket page + * Backup Blk 0: primary bucket page * Backup Blk 1: meta page */ typedef struct xl_hash_vacuum_one_page diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/tableam.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/tableam.h index 1c674fefe48..be44f294656 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/tableam.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/tableam.h @@ -121,7 +121,9 @@ typedef enum TU_UpdateIndexes /* * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail * because the target tuple is already outdated, they fill in this struct to - * provide information to the caller about what happened. + * provide information to the caller about what happened. When those functions + * succeed, the contents of this struct should not be relied upon, except for + * `traversed`, which may be set in both success and failure cases. * * ctid is the target's ctid link: it is the same as the target's TID if the * target was deleted, or the location of the replacement tuple if the target @@ -137,6 +139,9 @@ typedef enum TU_UpdateIndexes * tuple); otherwise cmax is zero. (We make this restriction because * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other * transactions.) + * + * traversed indicates if an update chain was followed in order to try to lock + * the target tuple. (This may be set in both success and failure cases.) */ typedef struct TM_FailureData { @@ -1510,8 +1515,8 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * Output parameters: * tmfd - filled in failure cases (see below) * lockmode - filled with lock mode acquired on tuple - * update_indexes - in success cases this is set to true if new index entries - * are required for this tuple + * update_indexes - in success cases this is set if new index entries + * are required for this tuple; see TU_UpdateIndexes * * Normal, successful return value is TM_Ok, which means we did actually * update it. Failure return codes are TM_SelfModified, TM_Updated, and @@ -1544,7 +1549,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * * Input parameters: * relation: relation containing tuple (caller must hold suitable lock) - * tid: TID of tuple to lock + * tid: TID of tuple to lock (updated if an update chain was followed) * snapshot: snapshot to use for visibility determinations * cid: current command ID (used for visibility test, and stored into * tuple's cmax if lock is successful) @@ -1569,8 +1574,10 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip * * In the failure cases other than TM_Invisible and TM_Deleted, the routine - * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See - * comments for struct TM_FailureData for additional info. + * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. + * Additionally, in both success and failure cases, tmfd->traversed is set if + * an update chain was followed. See comments for struct TM_FailureData for + * additional info. */ static inline TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlog.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlog.h index 98f83012fab..e1fb639c7a4 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlog.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlog.h @@ -219,6 +219,7 @@ extern bool RecoveryInProgress(void); extern RecoveryState GetRecoveryState(void); extern bool XLogInsertAllowed(void); extern XLogRecPtr GetXLogInsertRecPtr(void); +extern XLogRecPtr GetXLogInsertEndRecPtr(void); extern XLogRecPtr GetXLogWriteRecPtr(void); extern uint64 GetSystemIdentifier(void); @@ -257,6 +258,7 @@ extern void SwitchIntoArchiveRecovery(XLogRecPtr EndRecPtr, TimeLineID replayTLI extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli); extern void SetInstallXLogFileSegmentActive(void); extern bool IsInstallXLogFileSegmentActive(void); +extern void ResetInstallXLogFileSegmentActive(void); extern void XLogShutdownWalRcv(void); /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogdefs.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogdefs.h index fe794c77405..bd2607512a0 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogdefs.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogdefs.h @@ -25,7 +25,8 @@ typedef uint64 XLogRecPtr; * WAL segment, initializing the first WAL page at WAL segment size, so no XLOG * record can begin at zero. */ -#define InvalidXLogRecPtr 0 +#define InvalidXLogRecPtr 0 +#define XLogRecPtrIsValid(r) ((r) != InvalidXLogRecPtr) #define XLogRecPtrIsInvalid(r) ((r) == InvalidXLogRecPtr) /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogstats.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogstats.h index 89410ce92b8..a9d587cc66f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogstats.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/access/xlogstats.h @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * xlogstats.h - * Definitions for WAL Statitstics + * Definitions for WAL Statistics * * Copyright (c) 2022-2023, PostgreSQL Global Development Group * diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/c.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/c.h index 1923c2816f3..83a4a3410c6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/c.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/c.h @@ -166,6 +166,16 @@ #define PG_USED_FOR_ASSERTS_ONLY pg_attribute_unused() #endif +/* + * Our C and C++ compilers may have different ideas about which printf + * archetype best represents what src/port/snprintf.c can do. + */ +#ifndef __cplusplus +#define PG_PRINTF_ATTRIBUTE PG_C_PRINTF_ATTRIBUTE +#else +#define PG_PRINTF_ATTRIBUTE PG_CXX_PRINTF_ATTRIBUTE +#endif + /* GCC and XLC support format attributes */ #if (defined(__GNUC__) && !defined(__clang__)) || defined(__IBMC__) #define pg_attribute_format_arg(a) __attribute__((format_arg(a))) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/catalog/dependency.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/catalog/dependency.h index ffd5e9dc82d..8993a75c06a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/catalog/dependency.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/catalog/dependency.h @@ -221,6 +221,8 @@ extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId, extern Oid getExtensionOfObject(Oid classId, Oid objectId); extern List *getAutoExtensionsOfObject(Oid classId, Oid objectId); +extern Oid getExtensionType(Oid extensionOid, const char *typname); + extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId); extern List *getOwnedSequences(Oid relid); extern Oid getIdentitySequence(Oid relid, AttrNumber attnum, bool missing_ok); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/async.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/async.h index 76be04dbaef..c07a422f155 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/async.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/async.h @@ -50,4 +50,7 @@ extern void HandleNotifyInterrupt(void); /* process interrupts */ extern void ProcessNotifyInterrupt(bool flush); +/* freeze old transaction IDs in notify queue (called by VACUUM) */ +extern void AsyncNotifyFreezeXids(TransactionId newFrozenXid); + #endif /* ASYNC_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/defrem.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/defrem.h index 478203ed4c4..83423d3ba13 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/defrem.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/defrem.h @@ -81,7 +81,7 @@ extern void RemoveOperatorById(Oid operOid); extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt); /* commands/statscmds.c */ -extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt); +extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt, bool check_rights); extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt); extern void RemoveStatisticsById(Oid statsOid); extern void RemoveStatisticsDataById(Oid statsOid, bool inh); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/extension.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/extension.h index 22e50e282bd..524167d184e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/extension.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/extension.h @@ -50,6 +50,8 @@ extern char *get_extension_name(Oid ext_oid); extern Oid get_extension_schema(Oid ext_oid); extern bool extension_file_exists(const char *extensionName); +extern Oid get_function_sibling_type(Oid funcoid, const char *typname); + extern ObjectAddress AlterExtensionNamespace(const char *extensionName, const char *newschema, Oid *oldschema); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/trigger.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/trigger.h index de8044d1566..97dc2b9a43e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/trigger.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/commands/trigger.h @@ -78,7 +78,9 @@ typedef struct TransitionCaptureState /* * Private data including the tuplestore(s) into which to insert tuples. */ - struct AfterTriggersTableData *tcs_private; + struct AfterTriggersTableData *tcs_insert_private; + struct AfterTriggersTableData *tcs_update_private; + struct AfterTriggersTableData *tcs_delete_private; } TransitionCaptureState; /* diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/fe_memutils.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/fe_memutils.h index 89601cc778f..a6702a562ff 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/fe_memutils.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/fe_memutils.h @@ -10,6 +10,18 @@ #define FE_MEMUTILS_H /* + * Assumed maximum size for allocation requests. + * + * We don't enforce this, so the actual maximum is the platform's SIZE_MAX. + * But it's useful to have it defined in frontend builds, so that common + * code can check for oversized requests without having frontend-vs-backend + * differences. Also, some code relies on MaxAllocSize being no more than + * INT_MAX/2, so rather than setting this to SIZE_MAX, make it the same as + * the backend's value. + */ +#define MaxAllocSize ((Size) 0x3fffffff) /* 1 gigabyte - 1 */ + +/* * Flags for pg_malloc_extended and palloc_extended, deliberately named * the same as the backend flags. */ @@ -30,6 +42,16 @@ extern void *pg_realloc(void *ptr, size_t size); extern void pg_free(void *ptr); /* + * Support for safe calculation of memory request sizes + */ +extern Size add_size(Size s1, Size s2); +extern Size mul_size(Size s1, Size s2); +extern void *pg_malloc_mul(Size s1, Size s2); +extern void *pg_malloc0_mul(Size s1, Size s2); +extern void *pg_malloc_mul_extended(Size s1, Size s2, int flags); +extern void *pg_realloc_mul(void *p, Size s1, Size s2); + +/* * Variants with easier notation and more type safety */ @@ -42,14 +64,15 @@ extern void pg_free(void *ptr); /* * Allocate space for "count" objects of type "type" */ -#define pg_malloc_array(type, count) ((type *) pg_malloc(sizeof(type) * (count))) -#define pg_malloc0_array(type, count) ((type *) pg_malloc0(sizeof(type) * (count))) +#define pg_malloc_array(type, count) ((type *) pg_malloc_mul(sizeof(type), count)) +#define pg_malloc0_array(type, count) ((type *) pg_malloc0_mul(sizeof(type), count)) +#define pg_malloc_array_extended(type, count, flags) ((type *) pg_malloc_mul_extended(sizeof(type), count, flags)) /* * Change size of allocation pointed to by "pointer" to have space for "count" * objects of type "type" */ -#define pg_realloc_array(pointer, type, count) ((type *) pg_realloc(pointer, sizeof(type) * (count))) +#define pg_realloc_array(pointer, type, count) ((type *) pg_realloc_mul(pointer, sizeof(type), count)) /* Equivalent functions, deliberately named the same as backend functions */ extern char *pstrdup(const char *in); @@ -59,12 +82,17 @@ extern void *palloc0(Size size); extern void *palloc_extended(Size size, int flags); extern void *repalloc(void *pointer, Size size); extern void pfree(void *pointer); +extern void *palloc_mul(Size s1, Size s2); +extern void *palloc0_mul(Size s1, Size s2); +extern void *palloc_mul_extended(Size s1, Size s2, int flags); +extern void *repalloc_mul(void *p, Size s1, Size s2); #define palloc_object(type) ((type *) palloc(sizeof(type))) #define palloc0_object(type) ((type *) palloc0(sizeof(type))) -#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count))) -#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count))) -#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, sizeof(type) * (count))) +#define palloc_array(type, count) ((type *) palloc_mul(sizeof(type), count)) +#define palloc0_array(type, count) ((type *) palloc0_mul(sizeof(type), count)) +#define palloc_array_extended(type, count, flags) ((type *) palloc_mul_extended(sizeof(type), count, flags)) +#define repalloc_array(pointer, type, count) ((type *) repalloc_mul(pointer, sizeof(type), count)) /* sprintf into a palloc'd buffer --- these are in psprintf.c */ extern char *psprintf(const char *fmt,...) pg_attribute_printf(1, 2); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/int.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/int.h index 487124473d2..1f4218bfbc5 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/int.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/common/int.h @@ -438,4 +438,71 @@ pg_mul_u64_overflow(uint64 a, uint64 b, uint64 *result) #endif } +/* + * size_t + */ +static inline bool +pg_add_size_overflow(size_t a, size_t b, size_t *result) +{ +#if defined(HAVE__BUILTIN_OP_OVERFLOW) + return __builtin_add_overflow(a, b, result); +#else + size_t res = a + b; + + if (res < a) + { + *result = 0x5EED; /* to avoid spurious warnings */ + return true; + } + *result = res; + return false; +#endif +} + +static inline bool +pg_sub_size_overflow(size_t a, size_t b, size_t *result) +{ +#if defined(HAVE__BUILTIN_OP_OVERFLOW) + return __builtin_sub_overflow(a, b, result); +#else + if (b > a) + { + *result = 0x5EED; /* to avoid spurious warnings */ + return true; + } + *result = a - b; + return false; +#endif +} + +static inline bool +pg_mul_size_overflow(size_t a, size_t b, size_t *result) +{ +#if defined(HAVE__BUILTIN_OP_OVERFLOW) + return __builtin_mul_overflow(a, b, result); +#else + size_t res = a * b; + + if (a != 0 && b != res / a) + { + *result = 0x5EED; /* to avoid spurious warnings */ + return true; + } + *result = res; + return false; +#endif +} + +/* + * pg_neg_size_overflow is currently omitted, to avoid having to reason about + * the portability of SSIZE_MIN/_MAX before a use case exists. + */ +/* + * static inline bool + * pg_neg_size_overflow(size_t a, ssize_t *result) + * { + * ... + * } + */ + #endif /* COMMON_INT_H */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/execExpr.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/execExpr.h index 0e67484cdd3..49d00a524cd 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/execExpr.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/execExpr.h @@ -581,6 +581,10 @@ typedef struct ExprEvalStep { bool has_nulls; bool inclause; /* true for IN and false for NOT IN */ + bool null_lhs_result; /* for non-strict lookups, we + * cache what looking up NULL + * returns. */ + bool null_lhs_isnull; struct ScalarArrayOpExprHashTable *elements_tab; FmgrInfo *finfo; /* function's lookup data */ FunctionCallInfo fcinfo_data; /* arguments etc */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/executor.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/executor.h index c26c3122ebe..b02e7d14bb1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/executor.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/executor/executor.h @@ -211,6 +211,9 @@ extern void ExecutorRewind(QueryDesc *queryDesc); extern bool ExecCheckPermissions(List *rangeTable, List *rteperminfos, bool ereport_on_violation); extern bool ExecCheckOneRelPerms(RTEPermissionInfo *perminfo); +extern void CheckValidResultRelNew(ResultRelInfo *resultRelInfo, CmdType operation, + OnConflictAction onConflictAction, + List *mergeActions); extern void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation); extern void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/SectionMemoryManager.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/SectionMemoryManager.h index 93cf9771570..aac78b5bd7e 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/SectionMemoryManager.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/SectionMemoryManager.h @@ -1,5 +1,5 @@ /* - * This is a copy LLVM source code modified by the PostgreSQL project. + * This is a copy of LLVM source code modified by the PostgreSQL project. * See SectionMemoryManager.cpp for notes on provenance and license. */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/llvmjit_backport.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/llvmjit_backport.h index 92874f7998c..04851c9b68a 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/llvmjit_backport.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/jit/llvmjit_backport.h @@ -8,7 +8,7 @@ #include <llvm/Config/llvm-config.h> /* - * LLVM's RuntimeDyld can produce code that crashes on larger memory ARM + * Pre-LLVM 22 RuntimeDyld can produce code that crashes on large memory ARM * systems, because llvm::SectionMemoryManager allocates multiple pieces of * memory that can be placed too far apart for the generated code. See * src/backend/jit/llvm/SectionMemoryManager.cpp for the patched replacement @@ -18,7 +18,7 @@ * We have adjusted it to compile against a range of LLVM versions, but not * further back than 12 for now. */ -#if defined(__aarch64__) && LLVM_VERSION_MAJOR > 11 +#if defined(__aarch64__) && LLVM_VERSION_MAJOR > 11 && LLVM_VERSION_MAJOR < 22 #define USE_LLVM_BACKPORT_SECTION_MEMORY_MANAGER #endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/mb/pg_wchar.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/mb/pg_wchar.h index 96d138da969..448ceb0458f 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/mb/pg_wchar.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/mb/pg_wchar.h @@ -608,7 +608,14 @@ extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2); extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n); extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n); extern size_t pg_wchar_strlen(const pg_wchar *str); +extern int pg_mblen_cstr(const char *mbstr); +extern int pg_mblen_range(const char *mbstr, const char *end); +extern int pg_mblen_with_len(const char *mbstr, int limit); +extern int pg_mblen_unbounded(const char *mbstr); + +/* deprecated */ extern int pg_mblen(const char *mbstr); + extern int pg_dsplen(const char *mbstr); extern int pg_mbstrlen(const char *mbstr); extern int pg_mbstrlen_with_len(const char *mbstr, int limit); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/miscadmin.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/miscadmin.h index ec0abe5206e..8f17fcedbe1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/miscadmin.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/miscadmin.h @@ -116,7 +116,8 @@ extern void ProcessInterrupts(void); (unlikely(InterruptPending)) #else #define INTERRUPTS_PENDING_CONDITION() \ - (unlikely(UNBLOCKED_SIGNAL_QUEUE()) ? pgwin32_dispatch_queued_signals() : 0, \ + (unlikely(UNBLOCKED_SIGNAL_QUEUE()) ? \ + pgwin32_dispatch_queued_signals() : (void) 0, \ unlikely(InterruptPending)) #endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/execnodes.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/execnodes.h index f40cb9bb5ba..74718fa256d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/execnodes.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/execnodes.h @@ -565,15 +565,13 @@ typedef struct ResultRelInfo bool ri_RootToChildMapValid; /* - * Information needed by tuple routing target relations + * Other information needed by child result relations * - * RootResultRelInfo gives the target relation mentioned in the query, if - * it's a partitioned table. It is not set if the target relation - * mentioned in the query is an inherited table, nor when tuple routing is - * not needed. + * ri_RootResultRelInfo gives the target relation mentioned in the query. + * Used as the root for tuple routing and/or transition capture. * - * PartitionTupleSlot is non-NULL if RootToChild conversion is needed and - * the relation is a partition. + * ri_PartitionTupleSlot is non-NULL if the relation is a partition to + * route tuples into and ri_RootToChildMap conversion is needed. */ struct ResultRelInfo *ri_RootResultRelInfo; TupleTableSlot *ri_PartitionTupleSlot; diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/pathnodes.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/pathnodes.h index 69d7b3d9b83..0162d59e079 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/pathnodes.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/nodes/pathnodes.h @@ -1054,9 +1054,10 @@ typedef struct RelOptInfo * IndexOptInfo * Per-index information for planning/optimization * - * indexkeys[], indexcollations[] each have ncolumns entries. - * opfamily[], and opcintype[] each have nkeycolumns entries. They do - * not contain any information about included attributes. + * indexkeys[] and canreturn[] each have ncolumns entries. + * + * indexcollations[], opfamily[], and opcintype[] each have nkeycolumns + * entries. These don't contain any information about INCLUDE columns. * * sortopfamily[], reverse_sort[], and nulls_first[] have * nkeycolumns entries, if the index is ordered; but if it is unordered, diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/pg_config-linux.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/pg_config-linux.h index 46784db5f76..dcc45a8778c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/pg_config-linux.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/pg_config-linux.h @@ -149,6 +149,10 @@ don't. */ #define HAVE_DECL_STRNLEN 1 +/* Define to 1 if you have the declaration of `timingsafe_bcmp', and to 0 if + you don't. */ +#define HAVE_DECL_TIMINGSAFE_BCMP 0 + /* Define to 1 if you have the <editline/history.h> header file. */ /* #undef HAVE_EDITLINE_HISTORY_H */ @@ -484,6 +488,9 @@ /* Define to 1 if you have the <termios.h> header file. */ #define HAVE_TERMIOS_H 1 +/* Define to 1 if you have the `timingsafe_bcmp' function. */ +/* #undef HAVE_TIMINGSAFE_BCMP */ + /* Define to 1 if your compiler understands `typeof' or something similar. */ #define HAVE_TYPEOF 1 @@ -603,7 +610,7 @@ #define PACKAGE_NAME "PostgreSQL" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PostgreSQL 16.10" +#define PACKAGE_STRING "PostgreSQL 16.14" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "postgresql" @@ -612,7 +619,15 @@ #define PACKAGE_URL "https://www.postgresql.org/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "16.10" +#define PACKAGE_VERSION "16.14" + +/* Define to best C++ printf format archetype, usually gnu_printf if + available. */ +#define PG_CXX_PRINTF_ATTRIBUTE gnu_printf + +/* Define to best C printf format archetype, usually gnu_printf if available. + */ +#define PG_C_PRINTF_ATTRIBUTE __syslog__ /* Define to the name of a signed 128-bit integer type. */ #define PG_INT128_TYPE __int128 @@ -631,22 +646,19 @@ #define PG_MAJORVERSION_NUM 16 /* PostgreSQL minor version number */ -#define PG_MINORVERSION_NUM 10 - -/* Define to best printf format archetype, usually gnu_printf if available. */ -#define PG_PRINTF_ATTRIBUTE gnu_printf +#define PG_MINORVERSION_NUM 14 /* Define to 1 to use <stdbool.h> to define type bool. */ #define PG_USE_STDBOOL 1 /* PostgreSQL version as a string */ -#define PG_VERSION "16.10" +#define PG_VERSION "16.14" /* PostgreSQL version as a number */ -#define PG_VERSION_NUM 160010 +#define PG_VERSION_NUM 160014 /* A string containing the version number, platform, and C compiler */ -#define PG_VERSION_STR "PostgreSQL 16.10 on x86_64-pc-linux-gnu, compiled by clang version 20.1.8, 64-bit" +#define PG_VERSION_STR "PostgreSQL 16.14 on x86_64-pc-linux-gnu, compiled by clang version 20.1.8, 64-bit" /* Define to 1 to allow profiling output to be saved separately for each process. */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port.h index 4b4d842f6d4..726a93c3bad 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port.h @@ -58,6 +58,7 @@ extern void make_native_path(char *filename); extern void cleanup_path(char *path); extern bool path_contains_parent_reference(const char *path); extern bool path_is_relative_and_below_cwd(const char *path); +extern bool path_is_safe_for_extraction(const char *path); extern bool path_is_prefix_of_path(const char *path1, const char *path2); extern char *make_absolute_path(const char *path); extern const char *get_progname(const char *argv0); @@ -338,7 +339,6 @@ extern bool rmtree(const char *path, bool rmtopdir); * open() and fopen() replacements to allow deletion of open files and * passing of other special options. */ -#define O_DIRECT 0x80000000 extern HANDLE pgwin32_open_handle(const char *, int, bool); extern int pgwin32_open(const char *, int,...); extern FILE *pgwin32_fopen(const char *, const char *); @@ -468,6 +468,10 @@ extern bool pg_get_user_name(uid_t user_id, char *buffer, size_t buflen); extern bool pg_get_user_home_dir(uid_t user_id, char *buffer, size_t buflen); #endif +#if !HAVE_DECL_TIMINGSAFE_BCMP +extern int timingsafe_bcmp(const void *b1, const void *b2, size_t len); +#endif + extern void pg_qsort(void *base, size_t nel, size_t elsize, int (*cmp) (const void *, const void *)); extern int pg_qsort_strcmp(const void *a, const void *b); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/atomics/generic-gcc.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/atomics/generic-gcc.h index da04e9f0dc3..f9698a4876c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/atomics/generic-gcc.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/atomics/generic-gcc.h @@ -44,12 +44,20 @@ #if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) /* acquire semantics include read barrier semantics */ -# define pg_read_barrier_impl() __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define pg_read_barrier_impl() do \ +{ \ + pg_compiler_barrier_impl(); \ + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ +} while (0) #endif #if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) /* release semantics include write barrier semantics */ -# define pg_write_barrier_impl() __atomic_thread_fence(__ATOMIC_RELEASE) +# define pg_write_barrier_impl() do \ +{ \ + pg_compiler_barrier_impl(); \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ +} while (0) #endif diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/win32_port.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/win32_port.h index ae2a2221d22..a6d0b011336 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/win32_port.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/port/win32_port.h @@ -350,18 +350,15 @@ extern int _pgstat64(const char *name, struct stat *buf); /* * Supplement to <fcntl.h>. - * This is the same value as _O_NOINHERIT in the MS header file. This is - * to ensure that we don't collide with a future definition. It means - * we cannot use _O_NOINHERIT ourselves. - */ -#define O_DSYNC 0x0080 - -/* - * Our open() replacement does not create inheritable handles, so it is safe to - * ignore O_CLOEXEC. (If we were using Windows' own open(), it might be - * necessary to convert this to _O_NOINHERIT.) + * + * We borrow bits from the high end when we have to, to avoid colliding with + * the system-defined values. Our open() replacement in src/port/open.c + * converts these to the equivalent CreateFile() flags, along with the ones + * from fcntl.h. */ -#define O_CLOEXEC 0 +#define O_CLOEXEC 0x04000000 +#define O_DIRECT 0x80000000 +#define O_DSYNC _O_NOINHERIT /* * Supplement to <errno.h>. diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regcustom.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regcustom.h index af0fe97c796..91b5fc80ae8 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regcustom.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regcustom.h @@ -52,6 +52,8 @@ #define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM) #define FREE(p) pfree(VS(p)) #define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM) +#define MALLOC_ARRAY(type, n) palloc_array_extended(type, n, MCXT_ALLOC_NO_OOM) +#define REALLOC_ARRAY(p, type, n) repalloc_array_extended(p, type, n, MCXT_ALLOC_NO_OOM) #define INTERRUPT(re) CHECK_FOR_INTERRUPTS() #define assert(x) Assert(x) diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regguts.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regguts.h index fd69299a16d..6fb9551721d 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regguts.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/regex/regguts.h @@ -76,6 +76,14 @@ #ifndef FREE #define FREE(p) free(VS(p)) #endif +#ifndef MALLOC_ARRAY +/* we don't depend on calloc's zeroing behavior, we do need overflow check */ +#define MALLOC_ARRAY(type, n) ((type *) calloc(sizeof(type), n)) +#endif +#ifndef REALLOC_ARRAY +/* XXX this definition does not provide the desired overflow check */ +#define REALLOC_ARRAY(p, type, n) ((type *) REALLOC(p, sizeof(type) * (n))) +#endif /* interruption */ #ifndef INTERRUPT @@ -446,6 +454,11 @@ struct cnfa * (the compacted NFA and the colormap). * The scaling here is based on an empirical measurement that very large * NFAs tend to have about 4 arcs/state. + * + * Do not raise this so high as to allow more than INT_MAX/8 states or arcs, + * or you risk integer overflows in various space allocation requests. + * (We could be more defensive in those places, but that's so far beyond the + * practical range of NFA sizes that it doesn't seem worth additional code.) */ #ifndef REG_MAX_COMPILE_SPACE #define REG_MAX_COMPILE_SPACE \ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/replication/slot.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/replication/slot.h index 8895a08e8fe..8199ed8e04b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/replication/slot.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/replication/slot.h @@ -224,6 +224,8 @@ extern void ReplicationSlotMarkDirty(void); /* misc stuff */ extern void ReplicationSlotInitialize(void); extern bool ReplicationSlotValidateName(const char *name, int elevel); +extern bool ReplicationSlotValidateNameInternal(const char *name, + int *err_code, char **err_msg, char **err_hint); extern void ReplicationSlotReserveWal(void); extern void ReplicationSlotsComputeRequiredXmin(bool already_locked); extern void ReplicationSlotsComputeRequiredLSN(void); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/bufmgr.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/bufmgr.h index 8e0cc415c37..d30cb75befb 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/bufmgr.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/bufmgr.h @@ -211,6 +211,9 @@ extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, extern void InitBufferPoolAccess(void); extern void AtEOXact_Buffers(bool isCommit); +#ifdef USE_ASSERT_CHECKING +extern void AssertBufferLocksPermitCatalogRead(void); +#endif extern void PrintBufferLeakWarning(Buffer buffer); extern void CheckPointBuffers(int flags); extern BlockNumber BufferGetBlockNumber(Buffer buffer); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/fd.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/fd.h index f76e090b9d8..0f173f9a929 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/fd.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/fd.h @@ -59,12 +59,23 @@ typedef int File; #define IO_DIRECT_WAL 0x02 #define IO_DIRECT_WAL_INIT 0x04 +enum FileExtendMethod +{ +#ifdef HAVE_POSIX_FALLOCATE + FILE_EXTEND_METHOD_POSIX_FALLOCATE, +#endif + FILE_EXTEND_METHOD_WRITE_ZEROS, +}; + +/* Default to the first available file_extend_method. */ +#define DEFAULT_FILE_EXTEND_METHOD 0 /* GUC parameter */ extern __thread PGDLLIMPORT int max_files_per_process; extern __thread PGDLLIMPORT bool data_sync_retry; extern __thread PGDLLIMPORT int recovery_init_sync_method; extern __thread PGDLLIMPORT int io_direct_flags; +extern __thread PGDLLIMPORT int file_extend_method; /* * This is private to fd.c, but exported for save/restore_backend_variables() diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/lwlock.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/lwlock.h index e33d741f68f..7b6ac33536b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/lwlock.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/lwlock.h @@ -131,6 +131,8 @@ extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode); extern void LWLockRelease(LWLock *lock); extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val); extern void LWLockReleaseAll(void); +extern void ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *), + void *context); extern bool LWLockHeldByMe(LWLock *lock); extern bool LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride); extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/proc.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/proc.h index b92d94aa81b..d29d47f20a8 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/proc.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/proc.h @@ -109,10 +109,10 @@ struct XidCache * is inserted prior to the new redo point, the corresponding data changes will * also be flushed to disk before the checkpoint can complete. (In the * extremely common case where the data being modified is in shared buffers - * and we acquire an exclusive content lock on the relevant buffers before - * writing WAL, this mechanism is not needed, because phase 2 will block - * until we release the content lock and then flush the modified data to - * disk.) + * and we acquire an exclusive content lock and MarkBufferDirty() on the + * relevant buffers before writing WAL, this mechanism is not needed, because + * phase 2 will block until we release the content lock and then flush the + * modified data to disk. See transam/README and SyncOneBuffer().) * * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2 * to phase 3. This is useful if we are performing a WAL-logged operation that diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/shmem.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/shmem.h index 0e1fb2006c1..030aba03278 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/shmem.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/storage/shmem.h @@ -35,8 +35,6 @@ extern void InitShmemIndex(void); extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags); extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr); -extern Size add_size(Size s1, Size s2); -extern Size mul_size(Size s1, Size s2); /* ipci.c */ extern void RequestAddinShmemSpace(Size size); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_locale.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_locale.h index 58d594d4006..787ffb165d1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_locale.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_locale.h @@ -37,13 +37,37 @@ typedef struct /* The second argument of t_iseq() must be a plain ASCII character */ #define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c)) -#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s)) +/* Copy multibyte character of known byte length, return byte length. */ +static inline int +ts_copychar_with_len(void *dest, const void *src, int length) +{ + memcpy(dest, src, length); + return length; +} + +/* Copy multibyte character from null-terminated string, return byte length. */ +static inline int +ts_copychar_cstr(void *dest, const void *src) +{ + return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src)); +} + +/* Historical macro for the above. */ +#define COPYCHAR ts_copychar_cstr + +#define GENERATE_T_ISCLASS_DECL(character_class) \ +extern int t_is##character_class##_with_len(const char *ptr, int len); \ +extern int t_is##character_class##_cstr(const char *ptr); \ +extern int t_is##character_class##_unbounded(const char *ptr); \ +\ +/* deprecated */ \ +extern int t_is##character_class(const char *ptr); -extern int t_isdigit(const char *ptr); -extern int t_isspace(const char *ptr); -extern int t_isalpha(const char *ptr); -extern int t_isalnum(const char *ptr); -extern int t_isprint(const char *ptr); +GENERATE_T_ISCLASS_DECL(alnum); +GENERATE_T_ISCLASS_DECL(alpha); +GENERATE_T_ISCLASS_DECL(digit); +GENERATE_T_ISCLASS_DECL(print); +GENERATE_T_ISCLASS_DECL(space); extern char *lowerstr(const char *str); extern char *lowerstr_with_len(const char *str, int len); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_utils.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_utils.h index d3dc8bae475..48db1b800a1 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_utils.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/tsearch/ts_utils.h @@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state, extern void close_tsvector_parser(TSVectorParseState state); /* phrase operator begins with '<' */ -#define ISOPERATOR(x) \ - ( pg_mblen(x) == 1 && ( *(x) == '!' || \ - *(x) == '&' || \ - *(x) == '|' || \ - *(x) == '(' || \ - *(x) == ')' || \ - *(x) == '<' \ - ) ) +#define ISOPERATOR(x) (*(x) == '!' || \ + *(x) == '&' || \ + *(x) == '|' || \ + *(x) == '(' || \ + *(x) == ')' || \ + *(x) == '<') /* parse_tsquery */ diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/builtins.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/builtins.h index e9edacdab4b..3c8db9e4e8b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/builtins.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/builtins.h @@ -65,6 +65,7 @@ extern char *pg_ultostr(char *str, uint32 value); /* oid.c */ extern oidvector *buildoidvector(const Oid *oids, int n); +extern void check_valid_oidvector(const oidvector *oidArray); extern Oid oidparse(Node *node); extern int oid_cmp(const void *p1, const void *p2); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/catcache.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/catcache.h index 3ead775acb6..c7591534312 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/catcache.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/catcache.h @@ -229,7 +229,8 @@ extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue); extern void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)); + void (*function) (int, uint32, Oid, void *), + void *context); extern void PrintCatCacheLeakWarning(HeapTuple tuple); extern void PrintCatCacheListLeakWarning(CatCList *list); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/elog.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/elog.h index 430e09af4f8..4245ba760fc 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/elog.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/elog.h @@ -540,6 +540,7 @@ extern void write_jsonlog(ErrorData *edata); * safely (memory context, GUC load etc) */ extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2); +extern void vwrite_stderr(const char *fmt, va_list ap) pg_attribute_printf(1, 0); /* * Write a message to STDERR using only async-signal-safe functions. This can diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/guc.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/guc.h index a46677ed377..8725b30f9d0 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/guc.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/guc.h @@ -102,7 +102,7 @@ typedef enum * will show as "default" in pg_settings. If there is a specific reason not * to want that, use source == PGC_S_OVERRIDE. * - * NB: see GucSource_Names in guc.c if you change this. + * NB: see GucSource_Names in guc_tables.c if you change this. */ typedef enum { diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/inval.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/inval.h index 05f4c28dc87..bf3511372e6 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/inval.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/inval.h @@ -28,6 +28,10 @@ extern void AcceptInvalidationMessages(void); extern void AtEOXact_Inval(bool isCommit); +extern void PreInplace_Inval(void); +extern void AtInplace_Inval(void); +extern void ForgetInplace_Inval(void); + extern void AtEOSubXact_Inval(bool isCommit); extern void PostPrepare_Inval(void); @@ -37,6 +41,8 @@ extern void CommandEndInvalidationMessages(void); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple); +extern void CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple); extern void CacheInvalidateCatalog(Oid catalogId); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/lsyscache.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/lsyscache.h index 4c5869b9482..42c88a1db4b 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/lsyscache.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/lsyscache.h @@ -86,6 +86,7 @@ extern bool get_op_hash_functions(Oid opno, extern List *get_op_btree_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2); +extern bool collations_agree_on_equality(Oid coll1, Oid coll2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum, bool missing_ok); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/memutils.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/memutils.h index df710890694..9a0dcae8cec 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/memutils.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/memutils.h @@ -41,7 +41,7 @@ #define AllocSizeIsValid(size) ((Size) (size) <= MaxAllocSize) -/* Must be less than SIZE_MAX */ +/* Do not make this any bigger; see add_size() and mul_size() */ #define MaxAllocHugeSize (SIZE_MAX / 2) #define InvalidAllocSize SIZE_MAX diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/palloc.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/palloc.h index 87c868a00cd..ed107652277 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/palloc.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/palloc.h @@ -90,6 +90,18 @@ extern pg_nodiscard void *repalloc0(void *pointer, Size oldsize, Size size); extern void pfree(void *pointer); /* + * Support for safe calculation of memory request sizes + */ +extern Size add_size(Size s1, Size s2); +extern Size mul_size(Size s1, Size s2); +extern void *palloc_mul(Size s1, Size s2); +extern void *palloc0_mul(Size s1, Size s2); +extern void *palloc_mul_extended(Size s1, Size s2, int flags); +pg_nodiscard extern void *repalloc_mul(void *p, Size s1, Size s2); +pg_nodiscard extern void *repalloc_mul_extended(void *p, Size s1, Size s2, + int flags); + +/* * Variants with easier notation and more type safety */ @@ -102,15 +114,17 @@ extern void pfree(void *pointer); /* * Allocate space for "count" objects of type "type" */ -#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count))) -#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count))) +#define palloc_array(type, count) ((type *) palloc_mul(sizeof(type), count)) +#define palloc0_array(type, count) ((type *) palloc0_mul(sizeof(type), count)) +#define palloc_array_extended(type, count, flags) ((type *) palloc_mul_extended(sizeof(type), count, flags)) /* * Change size of allocation pointed to by "pointer" to have space for "count" * objects of type "type" */ -#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, sizeof(type) * (count))) -#define repalloc0_array(pointer, type, oldcount, count) ((type *) repalloc0(pointer, sizeof(type) * (oldcount), sizeof(type) * (count))) +#define repalloc_array(pointer, type, count) ((type *) repalloc_mul(pointer, sizeof(type), count)) +#define repalloc0_array(pointer, type, oldcount, count) ((type *) repalloc0(pointer, mul_size(sizeof(type), oldcount), mul_size(sizeof(type), count))) +#define repalloc_array_extended(pointer, type, count, flags) ((type *) repalloc_mul_extended(pointer, sizeof(type), count, flags)) /* * The result of palloc() is always word-aligned, so we can skip testing diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/relcache.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/relcache.h index 87619dd083c..02bfa3ba559 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/relcache.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/relcache.h @@ -37,6 +37,14 @@ typedef Relation *RelationPtr; /* * Routines to open (lookup) and close a relcache entry */ +#ifdef USE_ASSERT_CHECKING +extern void AssertCouldGetRelation(void); +#else +static inline void +AssertCouldGetRelation(void) +{ +} +#endif extern Relation RelationIdGetRelation(Oid relationId); extern void RelationClose(Relation relation); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/syscache.h b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/syscache.h index 1395497e198..34b3b76aa5c 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/syscache.h +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/include/utils/syscache.h @@ -113,9 +113,11 @@ enum SysCacheIdentifier TYPENAMENSP, TYPEOID, USERMAPPINGOID, - USERMAPPINGUSERSERVER + USERMAPPINGUSERSERVER, + /* intentionally out of alphabetical order, to avoid an ABI break: */ + EXTENSIONOID -#define SysCacheSize (USERMAPPINGUSERSERVER + 1) +#define SysCacheSize (EXTENSIONOID + 1) }; extern void InitCatalogCache(void); diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/port/path.c b/yql/essentials/parser/pg_wrapper/postgresql/src/port/path.c index 817f6b08348..58732b13418 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/port/path.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/port/path.c @@ -627,6 +627,23 @@ path_is_relative_and_below_cwd(const char *path) } /* + * Detect whether a path is safe for use during archive extraction. + * + * This applies canonicalize_path(), then it checks that the path does + * not contain any parent directory references. + */ +bool +path_is_safe_for_extraction(const char *path) +{ + char buf[MAXPGPATH]; + + strlcpy(buf, path, sizeof(buf)); + canonicalize_path(buf); + + return path_is_relative_and_below_cwd(buf); +} + +/* * Detect whether path1 is a prefix of path2 (including equality). * * This is pretty trivial, but it seems better to export a function than diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/port/timingsafe_bcmp.c b/yql/essentials/parser/pg_wrapper/postgresql/src/port/timingsafe_bcmp.c new file mode 100644 index 00000000000..288865f50d1 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/port/timingsafe_bcmp.c @@ -0,0 +1,43 @@ +/* + * src/port/timingsafe_bcmp.c + * + * $OpenBSD: timingsafe_bcmp.c,v 1.3 2015/08/31 02:53:57 guenther Exp $ + */ + +/* + * Copyright (c) 2010 Damien Miller. All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "c.h" + +#ifdef USE_SSL +#include <openssl/crypto.h> +#endif + +int +timingsafe_bcmp(const void *b1, const void *b2, size_t n) +{ +#ifdef USE_SSL + return CRYPTO_memcmp(b1, b2, n); +#else + const unsigned char *p1 = b1, + *p2 = b2; + int ret = 0; + + for (; n > 0; n--) + ret |= *p1++ ^ *p2++; + return (ret != 0); +#endif +} diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c index 9247a34157f..3da0b4d7658 100644 --- a/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/timezone/strftime.c @@ -122,6 +122,13 @@ static char *_yconv(int a, int b, bool convert_top, bool convert_yy, char *pt, c * Convert timestamp t to string s, a caller-allocated buffer of size maxsize, * using the given format pattern. * + * Unlike standard strftime(), we guarantee to provide a null-terminated + * result even on failure, so long as maxsize > 0. If we overrun the buffer, + * return an empty string rather than risking mis-encoded multibyte output. + * (Since this module only supports C locale, you might think multibyte + * characters are impossible --- but the time zone name printed by %Z comes + * from outside and could contain such.) + * * See also timestamptz_to_str. */ size_t @@ -135,11 +142,15 @@ pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t) if (!p) { errno = EOVERFLOW; + if (maxsize > 0) + *s = '\0'; return 0; } if (p == s + maxsize) { errno = ERANGE; + if (maxsize > 0) + *s = '\0'; return 0; } *p = '\0'; |
