postgres 源码解析47 元组的删除流程 heap_delete

最新推荐文章于 2024-09-25 20:53:15 发布

原创

最新推荐文章于 2024-09-25 20:53:15 发布 · 1.7k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#数据结构 #postgresql #c语言

文章深入解析了Postgres数据库中删除元组的流程，涉及元组的xmax设置、事务标识位更新以及可见性判断等核心步骤，包括并发控制和锁管理机制。

本文将从源码层讲解postgres元组的删除流程，实现的本质设置xmax和相关标识位字段信息，逻辑上对外显示删除不可见。

关键数据结构

在这里插入图片描述

执行流程

在这里插入图片描述

/*
 *	heap_delete - delete a tuple
 *
 * See table_tuple_delete() for an explanation of the parameters, except that
 * this routine directly takes a tuple rather than a slot.
 *
 * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
 * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
 * only for TM_SelfModified, since we cannot obtain cmax from a combo CID
 * generated by another transaction).
 */
TM_Result
heap_delete(Relation relation, ItemPointer tid,
			CommandId cid, Snapshot crosscheck, bool wait,
			TM_FailureData *tmfd, bool changingPart)
{
   
   
	TM_Result	result;
	TransactionId xid = GetCurrentTransactionId();			// 写操作事务均需要获取事务号
	ItemId		lp;
	HeapTupleData tp;
	Page		page;
	BlockNumber block;
	Buffer		buffer;
	Buffer		vmbuffer = InvalidBuffer;
	TransactionId new_xmax;
	uint16		new_infomask,
				new_infomask2;
	bool		have_tuple_lock = false;
	bool		iscombo;
	bool		all_visible_cleared = false;
	HeapTuple	old_key_tuple = NULL;	/* replica identity of the tuple */
	bool		old_key_copied = false;

	Assert(ItemPointerIsValid(tid));                        // 删除元组的 tid上层函数传入

	/*
	 * Forbid this during a parallel operation, lest it allocate a combo CID.
	 * Other workers might need that combo CID for visibility checks, and we
	 * have no provision for broadcasting it to them.
	 */
	 // 删除一条元组禁止开并行模式
	if (IsInParallelMode())
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
				 errmsg("cannot delete tuples during a parallel operation")));

	// 根据元组的 tid确定块号，并根据块号和 relation 描述符将数据块加载时共享缓冲区中buffer，获取页地址
	block = ItemPointerGetBlockNumber(tid);
	buffer = ReadBuffer(relation, block);
	page = BufferGetPage(buffer);

	/*
	 * Before locking the buffer, pin the visibility map page if it appears to
	 * be necessary.  Since we haven't got the lock yet, someone else might be
	 * in the middle of changing this, so we'll need to recheck after we have
	 * the lock.
	 */
	 // 由于删除元组，势必会修改VM对应标识位，如果数据页含有 allvisible标记，则需要将数据页对应的VM页加载至
	 // vmbuffer(pin住)
	if (PageIsAllVisible(page))
		visibilitymap_pin(relation, block, &vmbuffer);

	// 对 buffer施加排他锁
	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);

	/*
	 * If we didn't pin the visibility map page and the page has become all
	 * visible while we were busy locking the buffer, we'll have to unlock and
	 * re-lock, to avoid holding the buffer lock across an I/O.  That's a bit
	 * unfortunate, but hopefully shouldn't happen often.
	 */
	 // 在获取上述缓冲块排他锁期间，可能有其他进程将对应的VM 数据页标记信息更新为 allvisible,那么此时
	 // 需要释放buffer 排他锁，pin住 vmbuffer,后在此获取buffer排他锁。，其目的是为了防止在持有buffer
	 // 排他锁行执行 io (加载vm page 至 vmbuffer)
	if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
	{
   
   
		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
		visibilitymap_pin(relation, block, &vmbuffer);
		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
	}

	// 获取偏移量为tid的元组相关信息 
	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
	Assert(ItemIdIsNormal(lp));

	tp.t_tableOid = RelationGetRelid(relation);
	tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
	tp.t_len = ItemIdGetLength(lp);
	tp.t_self = *tid;

l1:
	// 可见性判断：根据其结果判断元组是否满足被更新/删除
	result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
	
	// 元组不可见，报错
	if (result == TM_Invisible)
	{
   
   
		UnlockReleaseBuffer(buffer);
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("attempted to delete invisible tuple")));
	}
	// 该元组正在被更新，且等待
	else if (result == TM_BeingModified && wait)
	{
   
   
		TransactionId xwait;
		uint16		infomask;

		/* must copy state data before unlocking buffer */
		// 获取 元组的xmax和 infomask，此时并不知道 xmax是单纯的事务号还是 MultiXactId
		xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
		infomask = tp.t_data->t_infomask;

		/*
		 * Sleep until concurrent transaction ends -- except when there's a
		 * single locker and it's our own transaction.  Note we don't care
		 * which lock mode the locker has, because we need the strongest one.
		 *
		 * Before sleeping, we need to acquire tuple lock to establish our
		 * priority for the tuple (see heap_lock_tuple).  LockTuple will
		 * release us when we are next-in-line for the tuple.
		 *
		 * If we are forced to "start over" below, we keep the tuple lock;
		 * this arranges that we stay at the head of the line while rechecking
		 * tuple state.
		 */
		 // 如果是 MultiXactId
		if (infomask & HEAP_XMAX_IS_MULTI)
		{
   
   
			bool		current_is_member = false;

		// 判断 MultiXactId中保存的锁模式是否与 LockTupleExclusive冲突，冲突则
			if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
										LockTupleExclusive, &current_is_member))
			{
   
   
				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

				/*
				 * Acquire the lock, if necessary (but skip it when we're
				 * requesting a lock and already have one; avoids deadlock).
				 */
				 // 如果当前事务不属于MultiXactId成员，则需获取元组级常规锁，反之无需获取，
				 // 其目的是避免死锁
				if (!current_is_member)
					heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
										 LockWaitBlock, &have_tuple_lock);

				/* wait for multixact */
				// 等待冲突的事务完成
				MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate, infomask,
								relation, &