PostgreSQL源码学习删除数据#0

database

以一条delete from test where a = 123;的SQL语句为例,跟踪删除数据的代码逻辑。(PG版本为12.2)

删除数据主要的函数是heap_delete。查看调用栈:

#0  heap_delete (relation=0x7f67ac24ca28, tid=0x7fff92b2c8ba, cid=0, 

crosscheck=0x0, wait=true, tmfd=0x7fff92b2c7d0, changingPart=false)

at heapam.c:2447

#1 0x00000000004d3bb7 in heapam_tuple_delete (relation=0x7f67ac24ca28,

tid=0x7fff92b2c8ba, cid=0, snapshot=0x2e37060, crosscheck=0x0, wait=true,

tmfd=0x7fff92b2c7d0, changingPart=false) at heapam_handler.c:314

#2 0x00000000006dac91 in table_tuple_delete (rel=0x7f67ac24ca28,

tid=0x7fff92b2c8ba, cid=0, snapshot=0x2e37060, crosscheck=0x0, wait=true,

tmfd=0x7fff92b2c7d0, changingPart=false)

at ../../../src/include/access/tableam.h:1230

#3 0x00000000006dbfcb in ExecDelete (mtstate=0x2dcd1f0,

tupleid=0x7fff92b2c8ba, oldtuple=0x0, planSlot=0x2dce5c8,

epqstate=0x2dcd2e8, estate=0x2dcce70, processReturning=true,

canSetTag=true, changingPart=false, tupleDeleted=0x0, epqreturnslot=0x0)

at nodeModifyTable.c:768

#4 0x00000000006de016 in ExecModifyTable (pstate=0x2dcd1f0)

at nodeModifyTable.c:2226

heap_delete函数

//src/include/access/heapam.h

extern TM_Result heap_delete(Relation relation, ItemPointer tid,

CommandId cid, Snapshot crosscheck, bool wait,

struct TM_FailureData *tmfd, bool changingPart);

TransactionId xid = GetCurrentTransactionId();

Assert(ItemPointerIsValid(tid));

/* 禁止并行操作时进行删除 */

if (IsInParallelMode())

ereport(ERROR,

(errcode(ERRCODE_INVALID_TRANSACTION_STATE),

errmsg("cannot delete tuples during a parallel operation")));

/* 根据元组的tid获取其所在的块,并取得块对应的page */

block = ItemPointerGetBlockNumber(tid);

buffer = ReadBuffer(relation, block);

page = BufferGetPage(buffer);

/* 在锁定buffer前,若page全可见需要将其visibility map给pin到共享内存,之后要修改 */

if (PageIsAllVisible(page))

visibilitymap_pin(relation, block, &vmbuffer);

LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);

/* 如果刚刚在锁定buffer期间正好page被其它进程修改变得全可见了(很稀有的情况),

* 那么需要解锁buffer,pin了vm page后重新锁定 */

if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))

{

LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

visibilitymap_pin(relation, block, &vmbuffer);

LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);

}

/* 从page中获取元组的ItemId */

lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));

Assert(ItemIdIsNormal(lp));

/* 获取元组的各项信息 */

tp.t_tableOid = RelationGetRelid(relation);

tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);

tp.t_len = ItemIdGetLength(lp);

tp.t_self = *tid;

/* goto跳跃点1 */

l1:

/* 检查此元组是否允许被更新 */

result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);

/* 元组当前已不可见,不允许被更新 */

if (result == TM_Invisible)

{

UnlockReleaseBuffer(buffer);

ereport(ERROR,

(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

errmsg("attempted to delete invisible tuple")));

}

/* 元组正在被其它事务更新 */

else if (result == TM_BeingModified && wait)

{

/* 之后要进行解锁,所以先把一些信息拷贝出来 */

xwait = HeapTupleHeaderGetRawXmax(tp.t_data);

infomask = tp.t_data->t_infomask;

/* 在并发的更新或删除事务结束之前进行休眠,在睡眠前获取元组锁用来保持等待的顺序 */

if (infomask & HEAP_XMAX_IS_MULTI)

{

/* 给定的MultiXactId是否与当前持有给定锁的事务冲突 */

if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,

LockTupleExclusive, &current_is_member))

{

/* 解锁buffer */

LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

/* 有需要的话获取元组的锁 */

if (!current_is_member)

heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,

LockWaitBlock, &have_tuple_lock);

/* 睡眠等待multixact,唤醒后重新锁定buffer */

MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate, infomask,

relation, &(tp.t_self), XLTW_Delete,

NULL);

LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);

/* 如果睡眠的这段时间xmax相关信息发生了变化,返回跳跃点l1重新开始 */

if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||

!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),

xwait))

goto l1;

}

}

/* 不是并发的更新或删除事务,但是需要等待前面的事务结束的情况 */

else if (!TransactionIdIsCurrentTransactionId(xwait))

{

/* 等待常规事务结束,但首先要获取元组锁 */

LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,

LockWaitBlock, &have_tuple_lock);

XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);

LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);

/* 若等待期间xmax发生了变化,返回跳跃点l1 */

if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||

!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),

xwait))

goto l1;

/* 否则,更新元组关于xmax的提交或中止的提示位 */

UpdateXmaxHintBits(tp.t_data, buffer, xwait);

}

/* 若前面xmax中止,或提交后只锁定但没有更新,那么下面可以继续正常做delete操作 */

if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||

HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||

HeapTupleHeaderIsOnlyLocked(tp.t_data))

result = TM_Ok;

else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid) ||

HeapTupleHeaderIndicatesMovedPartitions(tp.t_data))

result = TM_Updated;

else

result = TM_Deleted;

}

/* 对事务快照模式RI更新执行附加检查 */

if (crosscheck != InvalidSnapshot && result == TM_Ok)

{

if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))

result = TM_Updated;

}

/* 无法执行delete,填充TM_FailureData然后return */

if (result != TM_Ok)

{

Assert(result == TM_SelfModified ||

result == TM_Updated ||

result == TM_Deleted ||

result == TM_BeingModified);

Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));

Assert(result != TM_Updated ||

!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));

tmfd->ctid = tp.t_data->t_ctid;

tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);

if (result == TM_SelfModified)

tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);

else

tmfd->cmax = InvalidCommandId;

UnlockReleaseBuffer(buffer);

if (have_tuple_lock)

UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);

if (vmbuffer != InvalidBuffer)

ReleaseBuffer(vmbuffer);

return result;

}

/* 即将执行实际的删除,首先检查序列化冲突 */

CheckForSerializableConflictIn(relation, &tp, buffer);

/* 有必要时需要将cid换为combo cid(包含xmin和xmax),

* 当元组被一个还未提交的事务插入,但是当前事务已不是该元组的xmin对应的事务 */

HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);

/* 获取可以标识元组身份的副本 */

old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);

/* 若当前事务可能是第一个multixact操作的话,执行此函数可以保证当前事务

* 不会成为其它更老的multiact操作的成员 */

MultiXactIdSetOldestMember();

/* 计算新的xmax和infomask */

compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),

tp.t_data->t_infomask, tp.t_data->t_infomask2,

xid, LockTupleExclusive, true,

&new_xmax, &new_infomask, &new_infomask2);

/* 这里开始禁止ereport(ERROR) */

START_CRIT_SECTION();

/* 如果这个事务提交,元组很快就会变为dead状态。在这里给page设置

* 一个标志,当xid低于OldestXmin时,此page会成为被修剪的候选页 */

PageSetPrunable(page, xid);

/* 清除page的全可见标志 */

if (PageIsAllVisible(page))

{

all_visible_cleared = true;

PageClearAllVisible(page);

visibilitymap_clear(relation, BufferGetBlockNumber(buffer),

vmbuffer, VISIBILITYMAP_VALID_BITS);

}

/* 存储xact删除元组的事务信息(将元组置为不可见=删除) */

tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);

tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;

tp.t_data->t_infomask |= new_infomask;

tp.t_data->t_infomask2 |= new_infomask2;

HeapTupleHeaderClearHotUpdated(tp.t_data);

HeapTupleHeaderSetXmax(tp.t_data, new_xmax);

HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);

/* 确保t_ctid中没有指向前面的链接了 */

tp.t_data->t_ctid = tp.t_self;

/* 表明这是将元组移动到了另一个分区 */

if (changingPart)

HeapTupleHeaderSetMovedPartitions(tp.t_data);

/* 标记脏块 */

MarkBufferDirty(buffer);

/* xlog相关流程 */

if (RelationNeedsWAL(relation))

{

/* 若是系统表元组,需要发送combocids来正确解码,

* 因此记录一条XLOG_HEAP2_NEW_CID记录 */

if (RelationIsAccessibleInLogicalDecoding(relation))

log_heap_new_cid(relation, &tp);

xlrec.flags = 0;

if (all_visible_cleared)

xlrec.flags |= XLH_DELETE_ALL_VISIBLE_CLEARED;

if (changingPart)

xlrec.flags |= XLH_DELETE_IS_PARTITION_MOVE;

xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,

tp.t_data->t_infomask2);

xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);

xlrec.xmax = new_xmax;

if (old_key_tuple != NULL)

{

if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)

xlrec.flags |= XLH_DELETE_CONTAINS_OLD_TUPLE;

else

xlrec.flags |= XLH_DELETE_CONTAINS_OLD_KEY;

}

/* 开始构造WAL记录,将数据添加到正在构造的WAL记录中 */

XLogBeginInsert();

XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);

XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);

/* 如果有的话,记录被删除的元组的身份副本 */

if (old_key_tuple != NULL)

{

xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;

xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;

xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;

XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);

XLogRegisterData((char *) old_key_tuple->t_data

+ SizeofHeapTupleHeader,

old_key_tuple->t_len

- SizeofHeapTupleHeader);

}

/* 复制源包含在记录中 */

XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);

/* 插入xlog记录 */

recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);

/* 设置page的LSN */

PageSetLSN(page, recptr);

}

END_CRIT_SECTION();

/* 解锁 */

LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

if (vmbuffer != InvalidBuffer)

ReleaseBuffer(vmbuffer);

/* 如果元组有toast项,删除它 */

if (relation->rd_rel->relkind != RELKIND_RELATION &&

relation->rd_rel->relkind != RELKIND_MATVIEW)

{

/* toast表里的数据不应该递归的toast */

Assert(!HeapTupleHasExternal(&tp));

}

else if (HeapTupleHasExternal(&tp))

toast_delete(relation, &tp, false);

/* 对cache中的无效信息(tuple)进行注册(Cache同步机制) */

CacheInvalidateHeapTuple(relation, &tp, NULL);

/* 释放buffer */

ReleaseBuffer(buffer);

/* 释放元组锁 */

if (have_tuple_lock)

UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);

/* 更新统计信息 */

pgstat_count_heap_delete(relation);

/* 如果有的话释放old_key_tuple */

if (old_key_tuple != NULL && old_key_copied)

heap_freetuple(old_key_tuple);

return TM_Ok;

以上是 PostgreSQL源码学习删除数据#0 的全部内容, 来源链接: utcz.com/z/534051.html

回到顶部