Tbase 源码 (九)

【事务处理】

  事务处理主要算法就是两阶段提交,显然并不是所有的操作都需要两阶段提交的。触发两阶段提交的条件通过  IsTwoPhaseCommitRequired(bool localWrite) 来判断。

/*
 * Returns true if 2PC is required for consistent commit: if there was write
 * activity on two or more nodes within current transaction.
 */
bool
IsTwoPhaseCommitRequired(bool localWrite)

{// #lizard forgives
    PGXCNodeAllHandles *handles = NULL;
    bool                found = localWrite;
    int                 i = 0;
#ifdef __TBASE__
    int                                     sock_fatal_count = 0;
#endif

    /* Never run 2PC on Datanode-to-Datanode connection */
    if (IS_PGXC_DATANODE)
        return false;

    if (MyXactFlags & XACT_FLAGS_ACCESSEDTEMPREL)
    {
        elog(DEBUG1, "Transaction accessed temporary objects - "
                "2PC will not be used and that can lead to data inconsistencies "
                "in case of failures");
        return false;
    }

    /*
     * If no XID assigned, no need to run 2PC since neither coordinator nor any
     * remote nodes did write operation
     */

    if (!TransactionIdIsValid(GetTopTransactionIdIfAny()))
        return false;

#ifdef __TBASE__
    handles = get_sock_fatal_handles();
    sock_fatal_count = handles->dn_conn_count + handles->co_conn_count;

    for (i = 0; i < handles->dn_conn_count; i++)
    {
        PGXCNodeHandle *conn = handles->datanode_handles[i];

        elog(LOG, "IsTwoPhaseCommitRequired, fatal_conn=%p, fatal_conn->nodename=%s, fatal_conn->sock=%d, "
            "fatal_conn->read_only=%d, fatal_conn->transaction_status=%c, "
            "fatal_conn->sock_fatal_occurred=%d, conn->backend_pid=%d, fatal_conn->error=%s", 
            conn, conn->nodename, conn->sock, conn->read_only, conn->transaction_status,
            conn->sock_fatal_occurred, conn->backend_pid,  conn->error);

    }

    for (i = 0; i < handles->co_conn_count; i++)
    {
        PGXCNodeHandle *conn = handles->coord_handles[i];

        elog(LOG, "IsTwoPhaseCommitRequired, fatal_conn=%p, fatal_conn->nodename=%s, fatal_conn->sock=%d, "
            "fatal_conn->read_only=%d, fatal_conn->transaction_status=%c, "
            "fatal_conn->sock_fatal_occurred=%d, conn->backend_pid=%d, fatal_conn->error=%s", 
            conn, conn->nodename, conn->sock, conn->read_only, conn->transaction_status,
            conn->sock_fatal_occurred, conn->backend_pid,  conn->error);
    }
    pfree_pgxc_all_handles(handles);

    if (sock_fatal_count != 0)
    {
        elog(ERROR, "IsTwoPhaseCommitRequired, Found %d sock fatal handles exist", sock_fatal_count);
    }
#endif
    /* get current transaction handles that we register when pgxc_node_begin */
    handles = get_current_txn_handles();
    for (i = 0; i < handles->dn_conn_count; i++)
    {
        PGXCNodeHandle *conn = handles->datanode_handles[i];

#ifdef __TBASE__
        elog(DEBUG5, "IsTwoPhaseCommitRequired, conn->nodename=%s, conn->sock=%d, conn->read_only=%d, conn->transaction_status=%c", 
            conn->nodename, conn->sock, conn->read_only, conn->transaction_status);
#endif

        if (conn->sock == NO_SOCKET)
        {
            elog(ERROR, "IsTwoPhaseCommitRequired, remote node %s's connection handle is invalid, backend_pid: %d",
                 conn->nodename, conn->backend_pid);

        }
        else if (!conn->read_only && conn->transaction_status == 'T')
        {
            if (found)
            {
                pfree_pgxc_all_handles(handles);
                return true; /* second found */
            }    
            else
            {
                found = true; /* first found */
            }
        }
        else if (conn->transaction_status == 'E')
        {
            elog(ERROR, "IsTwoPhaseCommitRequired, remote node %s is in error state, backend_pid: %d",
                    conn->nodename, conn->backend_pid);
        }
    }
    for (i = 0; i < handles->co_conn_count; i++)
    {
        PGXCNodeHandle *conn = handles->coord_handles[i];

#ifdef __TBASE__
        elog(DEBUG5, "IsTwoPhaseCommitRequired, conn->nodename=%s, conn->sock=%d, conn->read_only=%d, conn->transaction_status=%c", 
            conn->nodename, conn->sock, conn->read_only, conn->transaction_status);
#endif

        if (conn->sock == NO_SOCKET)
        {
            elog(ERROR, "IsTwoPhaseCommitRequired, remote node %s's connection handle is invalid, backend_pid: %d",
                 conn->nodename, conn->backend_pid);
        }
        else if (!conn->read_only && conn->transaction_status == 'T')
        {
            if (found)
            {
                pfree_pgxc_all_handles(handles);
                return true; /* second found */
            }
            else
            {
                found = true; /* first found */

            }
        }
        else if (conn->transaction_status == 'E')
        {
            elog(ERROR, "IsTwoPhaseCommitRequired, remote node %s is in error state, backend_pid: %d",
                 conn->nodename, conn->backend_pid);
        }
    }
    pfree_pgxc_all_handles(handles);

#ifdef __TBASE__
    elog(DEBUG5, "IsTwoPhaseCommitRequired return false");
#endif

    return false;
}

关于两阶段提交, 下面这段注释说明是否需要两阶段提交的判断 逻辑,相关的代码,一方面在backend/pgxc/pool里,另一方面在PG正常的事务处理中。

/*
 * Do pre-commit processing for remote nodes which includes Datanodes and
 * Coordinators. If more than one nodes are involved in the transaction write
 * activity, then we must run 2PC. For 2PC, we do the following steps:
 *
 *  
1. PREPARE the transaction locally if the local node is involved in the
 *     transaction. If local node is not involved, skip this step and go to the
 *     next step
 *  2. PREPARE the transaction on all the remote nodes. If any node fails to
 *     PREPARE, directly go to step 6
 *  3. Now that all the involved nodes are PREPAREd, we can commit the
 *     transaction. We first inform the GTM that the transaction is fully
 *     PREPARED and also supply the list of the nodes involved in the
 *     transaction
 *  4. COMMIT PREPARED the transaction on all the remotes nodes and then
 *     finally COMMIT PREPARED on the local node if its involved in the
 *     transaction and start a new transaction so that normal commit processing
 *     works unchanged. Go to step 5.
 *  5. Return and let the normal commit processing resume
 *  6. Abort by ereporting the error and let normal abort-processing take
 *     charge.

 */

create table 为示例
Coordinator 节点先在自己节点上创建这个表(包括更新 pg_class 缓存,创建物理文件等),再向各节点 dispatch 命令 。

在结束事务时(finish_xact_command->CommitTransactionCommand)触发二阶段提交:

接口调用 CommitTransactionCommand =》 CommitTransaction =》PreCommit_Remote =》SetDataRowForExtParams =》IsTwoPhaseCommitRequired

【第一阶段】
prepareTransaction

1) 检查序列化冲突 (PreCommit_CheckForSerializationFailure
2)Datanode发送信息 “Distributed Prepare”    AddRemoteQueryNode(stmts,                                                 queryString, is_local
                                ? EXEC_ON_NONE
                                : (is_temp ? EXEC_ON_DATANODES : EXEC_ON_ALL_NODES));
3)Datanode 收到后走 PostgreMain  PrepareTransaction 逻辑
执行 on commit 的操作:
关闭大对象的表
检查死锁
标记事务状态为 preparing
写 XLOG_XACT_PREPARE 的事务日志,创建状态文件 (StartPrepare->EndPrepare)
标记本进程没有事务了
释放占用的资源(buffer,cache,pgstat ,缓存的 database 的旧记录,事务id,锁,谓词锁),发送invalidate 高速缓存条目的消息
清除分配的帐户与上下文内存

释放 resource group
 回到循环的 ready for query 状态, 回复 Coordinator


【第二阶段】
RecordTransactionCommit

1) Coordinator  插入 XLOG_XACT_COMMIT 的事务日志
2)将日志刷盘 XLogFlush
3)写 clog 日志 TransactionIdCommitTree(先提交子事务,再提交总事务)
4)Datanode 发送 “Distributed Commit Prepared”命令 
5)Datanode 走   pgxc_node_remote_prepare逻辑,开启一个事务命令,进入 FinishPreparedTransaction : 
记录 XLOG_XACT_COMMIT_PREPARED 事务日志 (RecordTransactionCommitPrepared
将日志刷盘 XLogFlush
写分布式 clog (SetCommittedTree)
等待mirror 节点 SyncRepWaitForLSN
从进程队列中移除此进程。
删除要删的database 目录,表文件
释放二阶段提交相关的锁 lock_twophase_postcommit
释放谓词锁

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值