*
* predicate_internals.h
* POSTGRES internal predicate locking definitions.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/predicate_internals.h
*
* -------------------------------------------------------------------------
*/
#ifndef PREDICATE_INTERNALS_H
#define PREDICATE_INTERNALS_H
#include "storage/lock/lock.h"
* Commit number.
*/
typedef uint64 SerCommitSeqNo;
* Reserved commit sequence numbers:
* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
* used as a SerCommitSeqNo, even an invalid one
* - InvalidSerCommitSeqNo is used to indicate a transaction that
* hasn't committed yet, so use a number greater than all valid
* ones to make comparison do the expected thing
* - RecoverySerCommitSeqNo is used to refer to transactions that
* happened before a crash/recovery, since we restart the sequence
* at that point. It's earlier than all normal sequence numbers,
* and is only used by recovered prepared transactions
*/
#define InvalidSerCommitSeqNo ((SerCommitSeqNo)UINT64CONST(0xFFFFFFFFFFFFFFFF))
#define RecoverySerCommitSeqNo ((SerCommitSeqNo)1)
#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo)2)
* The SERIALIZABLEXACT struct contains information needed for each
* serializable database transaction to support SSI techniques.
*
* A home-grown list is maintained in shared memory to manage these.
* An entry is used when the serializable transaction acquires a snapshot.
* Unless the transaction is rolled back, this entry must generally remain
* until all concurrent transactions have completed. (There are special
* optimizations for READ ONLY transactions which often allow them to be
* cleaned up earlier.) A transaction which is rolled back is cleaned up
* as soon as possible.
*
* Eligibility for cleanup of committed transactions is generally determined
* by comparing the transaction's finishedBefore field to
* SerializableGlobalXmin.
*/
typedef struct SERIALIZABLEXACT {
VirtualTransactionId vxid;
* these. */
* We use two numbers to track the order that transactions commit. Before
* commit, a transaction is marked as prepared, and prepareSeqNo is set.
* Shortly after commit, it's marked as committed, and commitSeqNo is set.
* This doesn't give a strict commit order, but these two values together
* are good enough for us, as we can always err on the safe side and
* assume that there's a conflict, if we can't be sure of the exact
* ordering of two commits.
*
* Note that a transaction is marked as prepared for a short period during
* commit processing, even if two-phase commit is not used. But with
* two-phase commit, a transaction can stay in prepared state for some
* time.
*/
SerCommitSeqNo prepareSeqNo;
SerCommitSeqNo commitSeqNo;
union {
SerCommitSeqNo earliestOutConflictCommit;
SerCommitSeqNo lastCommitBeforeSnapshot;
} SeqNo;
SHM_QUEUE outConflicts;
SHM_QUEUE inConflicts;
SHM_QUEUE predicateLocks;
SHM_QUEUE finishedLink;
* for r/o transactions: list of concurrent r/w transactions that we could
* potentially have conflicts with, and vice versa for r/w transactions
*/
SHM_QUEUE possibleUnsafeConflicts;
TransactionId topXid;
TransactionId finishedBefore;
* serializable xids are before this. */
TransactionId xmin;
uint32 flags;
ThreadId pid;
} SERIALIZABLEXACT;
#define SXACT_FLAG_COMMITTED 0x00000001
#define SXACT_FLAG_PREPARED 0x00000002
#define SXACT_FLAG_ROLLED_BACK 0x00000004
#define SXACT_FLAG_DOOMED 0x00000008
* The following flag actually means that the flagged transaction has a
* conflict out *to a transaction which committed ahead of it*. It's hard
* to get that into a name of a reasonable length.
*/
#define SXACT_FLAG_CONFLICT_OUT 0x00000010
#define SXACT_FLAG_READ_ONLY 0x00000020
#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
#define SXACT_FLAG_RO_SAFE 0x00000080
#define SXACT_FLAG_RO_UNSAFE 0x00000100
#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
* The following types are used to provide an ad hoc list for holding
* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
* access these by key -- there are direct pointers to these objects where
* needed. If a shared memory list is created, these types can probably be
* eliminated in favor of using the general solution.
*/
typedef struct PredXactListElementData {
SHM_QUEUE link;
SERIALIZABLEXACT sxact;
} PredXactListElementData;
typedef struct PredXactListElementData* PredXactListElement;
#define PredXactListElementDataSize ((Size)MAXALIGN(sizeof(PredXactListElementData)))
typedef struct PredXactListData {
SHM_QUEUE availableList;
SHM_QUEUE activeList;
* These global variables are maintained when registering and cleaning up
* serializable transactions. They must be global across all backends,
* but are not needed outside the predicate.c source file. Protected by
* SerializableXactHashLock.
*/
TransactionId SxactGlobalXmin;
* transactions */
int SxactGlobalXminCount;
* transactions have this xmin */
int WritableSxactCount;
* transactions are active */
SerCommitSeqNo LastSxactCommitSeqNo;
* increasing number for
* commits of serializable
* transactions */
SerCommitSeqNo CanPartialClearThrough;
* and inConflicts for
* committed transactions
* through this seq no */
SerCommitSeqNo HavePartialClearedThrough;
* seq no */
SERIALIZABLEXACT* OldCommittedSxact;
PredXactListElement element;
} PredXactListData;
typedef struct PredXactListData* PredXactList;
#define PredXactListDataSize ((Size)MAXALIGN(sizeof(PredXactListData)))
* The following types are used to provide lists of rw-conflicts between
* pairs of transactions. Since exactly the same information is needed,
* they are also used to record possible unsafe transaction relationships
* for purposes of identifying safe snapshots for read-only transactions.
*
* When a RWConflictData is not in use to record either type of relationship
* between a pair of transactions, it is kept on an "available" list. The
* outLink field is used for maintaining that list.
*/
typedef struct RWConflictData {
SHM_QUEUE outLink;
SHM_QUEUE inLink;
SERIALIZABLEXACT* sxactOut;
SERIALIZABLEXACT* sxactIn;
} RWConflictData;
typedef struct RWConflictData* RWConflict;
#define RWConflictDataSize ((Size)MAXALIGN(sizeof(RWConflictData)))
typedef struct RWConflictPoolHeaderData {
SHM_QUEUE availableList;
RWConflict element;
} RWConflictPoolHeaderData;
typedef struct RWConflictPoolHeaderData* RWConflictPoolHeader;
#define RWConflictPoolHeaderDataSize ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
* transaction or any of its subtransactions.
*/
typedef struct SERIALIZABLEXIDTAG {
TransactionId xid;
} SERIALIZABLEXIDTAG;
* The SERIALIZABLEXID struct provides a link from a TransactionId for a
* serializable transaction to the related SERIALIZABLEXACT record, even if
* the transaction has completed and its connection has been closed.
*
* These are created as new top level transaction IDs are first assigned to
* transactions which are participating in predicate locking. This may
* never happen for a particular transaction if it doesn't write anything.
* They are removed with their related serializable transaction objects.
*
* The SubTransGetTopmostTransaction method is used where necessary to get
* from an XID which might be from a subtransaction to the top level XID.
*/
typedef struct SERIALIZABLEXID {
SERIALIZABLEXIDTAG tag;
SERIALIZABLEXACT* myXact;
} SERIALIZABLEXID;
* The PREDICATELOCKTARGETTAG struct identifies a database object which can
* be the target of predicate locks.
*
* Note that the hash function being used doesn't properly respect tag
* length -- it will go to a four byte boundary past the end of the tag.
* If you change this struct, make sure any slack space is initialized,
* so that any random bytes in the middle or at the end are not included
* in the hash.
*
* If we always use the same fields for the same type of value, we
* should rename these. Holding off until it's clear there are no exceptions.
* Since indexes are relations with blocks and tuples, it's looking likely that
* the rename will be possible. If not, we may need to divide the last field
* and use part of it for a target type, so that we know how to interpret the
* data..
*/
typedef struct PREDICATELOCKTARGETTAG {
uint32 locktag_field1;
uint32 locktag_field2;
uint32 locktag_field3;
uint32 locktag_field4;
uint32 locktag_field5;
} PREDICATELOCKTARGETTAG;
* The PREDICATELOCKTARGET struct represents a database object on which there
* are predicate locks.
*
* A hash list of these objects is maintained in shared memory. An entry is
* added when a predicate lock is requested on an object which doesn't
* already have one. An entry is removed when the last lock is removed from
* its list.
*
* Because a particular target might become obsolete, due to update to a new
* version, before the reading transaction is obsolete, we need some way to
* prevent errors from reuse of a tuple ID. Rather than attempting to clean
* up the targets as the related tuples are pruned or vacuumed, we check the
* xmin on access. This should be far less costly.
*/
typedef struct PREDICATELOCKTARGET {
PREDICATELOCKTARGETTAG tag;
SHM_QUEUE predicateLocks;
* predicate lock target */
} PREDICATELOCKTARGET;
* The PREDICATELOCKTAG struct identifies an individual predicate lock.
*
* It is the combination of predicate lock target (which is a lockable
* object) and a serializable transaction which has acquired a lock on that
* target.
*/
typedef struct PREDICATELOCKTAG {
PREDICATELOCKTARGET* myTarget;
SERIALIZABLEXACT* myXact;
} PREDICATELOCKTAG;
* The PREDICATELOCK struct represents an individual lock.
*
* An entry can be created here when the related database object is read, or
* by promotion of multiple finer-grained targets. All entries related to a
* serializable transaction are removed when that serializable transaction is
* cleaned up. Entries can also be removed when they are combined into a
* single coarser-grained lock entry.
*/
typedef struct PREDICATELOCK {
PREDICATELOCKTAG tag;
SHM_QUEUE targetLink;
* predicate locks */
SHM_QUEUE xactLink;
* predicate locks */
SerCommitSeqNo commitSeqNo;
} PREDICATELOCK;
* The LOCALPREDICATELOCK struct represents a local copy of data which is
* also present in the PREDICATELOCK table, organized for fast access without
* needing to acquire a LWLock. It is strictly for optimization.
*
* Each serializable transaction creates its own local hash table to hold a
* collection of these. This information is used to determine when a number
* of fine-grained locks should be promoted to a single coarser-grained lock.
* The information is maintained more-or-less in parallel to the
* PREDICATELOCK data, but because this data is not protected by locks and is
* only used in an optimization heuristic, it is allowed to drift in a few
* corner cases where maintaining exact data would be expensive.
*
* The hash table is created when the serializable transaction acquires its
* snapshot, and its memory is released upon completion of the transaction.
*/
typedef struct LOCALPREDICATELOCK {
PREDICATELOCKTARGETTAG tag;
bool held;
int childLocks;
} LOCALPREDICATELOCK;
* The types of predicate locks which can be acquired.
*/
typedef enum PredicateLockTargetType {
PREDLOCKTAG_RELATION,
PREDLOCKTAG_PAGE,
PREDLOCKTAG_TUPLE
} PredicateLockTargetType;
* This structure is used to quickly capture a copy of all predicate
* locks. This is currently used only by the pg_lock_status function,
* which in turn is used by the pg_locks view.
*/
typedef struct PredicateLockData {
int nelements;
PREDICATELOCKTARGETTAG* locktags;
SERIALIZABLEXACT* xacts;
} PredicateLockData;
* These macros define how we map logical IDs of lockable objects into the
* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
* rather than accessing the fields directly. Note multiple eval of target!
*/
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag, dboid, reloid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = InvalidBlockNumber, \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag, dboid, reloid, blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag, dboid, reloid, blocknum, offnum, xmin) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = (offnum), \
(locktag).locktag_field5 = (xmin))
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) ((Oid)(locktag).locktag_field1)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) ((Oid)(locktag).locktag_field2)
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) ((BlockNumber)(locktag).locktag_field3)
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) ((OffsetNumber)(locktag).locktag_field4)
#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) ((TransactionId)(locktag).locktag_field5)
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
(((locktag).locktag_field4 != InvalidOffsetNumber) \
? PREDLOCKTAG_TUPLE \
: (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : PREDLOCKTAG_RELATION))
* Two-phase commit statefile records. There are two types: for each
* transaction, we generate one per-transaction record and a variable
* number of per-predicate-lock records.
*/
typedef enum TwoPhasePredicateRecordType {
TWOPHASEPREDICATERECORD_XACT,
TWOPHASEPREDICATERECORD_LOCK
} TwoPhasePredicateRecordType;
* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
* much is needed because most of it not meaningful for a recovered
* prepared transaction.
*
* In particular, we do not record the in and out conflict lists for a
* prepared transaction because the associated SERIALIZABLEXACTs will
* not be available after recovery. Instead, we simply record the
* existence of each type of conflict by setting the transaction's
* summary conflict in/out flag.
*/
typedef struct TwoPhasePredicateXactRecord {
TransactionId xmin;
uint32 flags;
} TwoPhasePredicateXactRecord;
typedef struct TwoPhasePredicateLockRecord {
PREDICATELOCKTARGETTAG target;
} TwoPhasePredicateLockRecord;
typedef struct TwoPhasePredicateRecord {
TwoPhasePredicateRecordType type;
union {
TwoPhasePredicateXactRecord xactRecord;
TwoPhasePredicateLockRecord lockRecord;
} data;
} TwoPhasePredicateRecord;
* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
*/
#define InvalidSerializableXact ((SERIALIZABLEXACT*)NULL)
* Function definitions for functions needing awareness of predicate
* locking internals.
*/
extern PredicateLockData* GetPredicateLockStatusData(void);
#endif