数据库修仙炼气篇六——walsender

本文详细介绍了PostgreSQL中的WAL Sender模块,包括启动参数解析、am_walsender和am_db_walsender的设置,am_cascading_walsender的初始化,以及对外部接口如InitWalSender和WalSndSetState的使用。数据模型和关键数据结构如WalSnd和WalSndCtlData也进行了深入剖析。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

walsender

交互接口

全局变量

/* global state */
extern PGDLLIMPORT bool am_walsender; // 是否是walsender进程
extern PGDLLIMPORT bool am_cascading_walsender; // 是否是级联walsender
extern PGDLLIMPORT bool am_db_walsender; // 是否连接到数据库
extern PGDLLIMPORT bool wake_wal_senders; 

/* user-settable parameters */
extern PGDLLIMPORT int max_wal_senders; // 最大walsender进程数
extern PGDLLIMPORT int wal_sender_timeout; // wal消息发送超时时间
extern PGDLLIMPORT bool log_replication_commands;
  • am_walsender和am_db_walsender

    解析启动参数replication的值进行赋值,如果replication的值是database或者true就设置这两个值为true。

    				if (strcmp(valptr, "database") == 0)
    				{
    					am_walsender = true;
    					am_db_walsender = true;
    				}
    				else if (!parse_bool(valptr, &am_walsender)) {
                        
                    }
    
  • am_cascading_walsender

    am_cascading_walsender在初始化walSnd的时候赋值。

    am_cascading_walsender = RecoveryInProgress();
    

    值呢主要来自于全局的LocalRecoveryInProgress,LocalRecoveryInProgress=false时就是false,否则的话就从xlogctl->SharedRecoveryState取值。

    LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
    

对外接口

extern void InitWalSender(void);
extern bool exec_replication_command(const char *query_string);
extern void WalSndErrorCleanup(void);
extern void WalSndResourceCleanup(bool isCommit);
extern void WalSndSignals(void);
extern Size WalSndShmemSize(void);
extern void WalSndShmemInit(void);
extern void WalSndWakeup(void);
extern void WalSndInitStopping(void);
extern void WalSndWaitStopping(void);
extern void HandleWalSndInitStopping(void);
extern void WalSndRqstFileReload(void);
  • InitWalSender

    初始化一个walSnd。当am_walsender为true的时候,porstgres启动的时候就会初始化一个walSnd。

    初始化walSnd
    InitWalSender
    InitWalSenderSlot
    MarkPostmasterChildWalSender
    SendPostmasterSignal
    MemoryContextAllocZero

    初始化slot的时候会将全局的WalSndCtl的walsnds初始化,walsnds是一个变长数组,会根据max_wal_senders进行内存分配和初始化。每个创建的walSnd都会保存到全局的WalSndCtl的数组中。

    根据walSnd的pid是否为0来判断是否需要初始化,每个初始化的walSnd的状态为WALSNDSTATE_STARTUP。

对内接口

extern void WalSndSetState(WalSndState state);

/*
 * Internal functions for parsing the replication grammar, in repl_gram.y and
 * repl_scanner.l
 */
extern int	replication_yyparse(void);
extern int	replication_yylex(void);
extern void replication_yyerror(const char *str) pg_attribute_noreturn();
extern void replication_scanner_init(const char *query_string);
extern void replication_scanner_finish(void);
extern bool replication_scanner_is_replication_command(void);
  • WalSndSetState

    用来更改walSnd的状态。

数据模型

  • walsender状态
typedef enum WalSndState
{
	WALSNDSTATE_STARTUP = 0,
	WALSNDSTATE_BACKUP,
	WALSNDSTATE_CATCHUP,
	WALSNDSTATE_STREAMING,
	WALSNDSTATE_STOPPING
} WalSndState;
  • walsender 结构

一个进程对应一个walSnd结构。

typedef struct WalSnd
{
	pid_t		pid;			/* this walsender's PID, or 0 if not active */

	WalSndState state;			/* this walsender's state */
	XLogRecPtr	sentPtr;		/* WAL has been sent up to this point */
	bool		needreload;		/* does currently-open file need to be
								 * reloaded? */

	/*
	 * The xlog locations that have been written, flushed, and applied by
	 * standby-side. These may be invalid if the standby-side has not offered
	 * values yet.
	 */
	XLogRecPtr	write;
	XLogRecPtr	flush;
	XLogRecPtr	apply;

	/* Measured lag times, or -1 for unknown/none. */
	TimeOffset	writeLag;
	TimeOffset	flushLag;
	TimeOffset	applyLag;

	/*
	 * The priority order of the standby managed by this WALSender, as listed
	 * in synchronous_standby_names, or 0 if not-listed.
	 */
	int			sync_standby_priority;

	/* Protects shared variables shown above. */
	slock_t		mutex;

	/*
	 * Pointer to the walsender's latch. Used by backends to wake up this
	 * walsender when it has work to do. NULL if the walsender isn't active.
	 */
	Latch	   *latch;

	/*
	 * Timestamp of the last message received from standby.
	 */
	TimestampTz replyTime;
} WalSnd;
  • WalSndCtlData
typedef struct
{
	/*
	 * Synchronous replication queue with one queue per request type.
	 * Protected by SyncRepLock.
	 */
	SHM_QUEUE	SyncRepQueue[NUM_SYNC_REP_WAIT_MODE];

	/*
	 * Current location of the head of the queue. All waiters should have a
	 * waitLSN that follows this value. Protected by SyncRepLock.
	 */
	XLogRecPtr	lsn[NUM_SYNC_REP_WAIT_MODE];
	
	/*
	 * Are any sync standbys defined?  Waiting backends can't reload the
	 * config file safely, so checkpointer updates this value as needed.
	 * Protected by SyncRepLock.
	 */
	bool		sync_standbys_defined;
	
	WalSnd		walsnds[FLEXIBLE_ARRAY_MEMBER];
} WalSndCtlData;
  • NodeTag
typedef enum NodeTag {
   .....
       /*
	 * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
	 */
	T_IdentifySystemCmd,
	T_BaseBackupCmd,
	T_CreateReplicationSlotCmd,
	T_DropReplicationSlotCmd,
	T_ReadReplicationSlotCmd,
	T_StartReplicationCmd,
	T_TimeLineHistoryCmd,
    ......
} NodeTag;
  • XLogReaderState

    typedef uint64 XLogRecPtr;
    struct XLogReaderState
    {
        XLogReaderRoutine routine;
        XLogRecPtr	ReadRecPtr;		/* start of last record read */
    	XLogRecPtr	EndRecPtr;		/* end+1 of last record read */
    }
    
    • XLogReaderRoutine

      typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
      							   XLogRecPtr targetPagePtr,
      							   int reqLen,
      							   XLogRecPtr targetRecPtr,
      							   char *readBuf);
      typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
      								  XLogSegNo nextSegNo,
      								  TimeLineID *tli_p);
      typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
      typedef struct XLogReaderRoutine
      {
      	XLogPageReadCB page_read;
      	WALSegmentOpenCB segment_open;
      	WALSegmentCloseCB segment_close;
      } XLogReaderRoutine;
      
  • XLogRecoveryCtlData

    typedef struct XLogRecoveryCtlData
    {
    	/*
    	 * SharedHotStandbyActive indicates if we allow hot standby queries to be
    	 * run.  Protected by info_lck.
    	 */
    	bool		SharedHotStandbyActive;
    
    	/*
    	 * SharedPromoteIsTriggered indicates if a standby promotion has been
    	 * triggered.  Protected by info_lck.
    	 */
    	bool		SharedPromoteIsTriggered;
    
    	Latch		recoveryWakeupLatch;
    
    	/*
    	 * Last record successfully replayed.
    	 */
    	XLogRecPtr	lastReplayedReadRecPtr; /* start position */
    	XLogRecPtr	lastReplayedEndRecPtr;	/* end+1 position */
    	TimeLineID	lastReplayedTLI;	/* timeline */
    
    	/*
    	 * When we're currently replaying a record, ie. in a redo function,
    	 * replayEndRecPtr points to the end+1 of the record being replayed,
    	 * otherwise it's equal to lastReplayedEndRecPtr.
    	 */
    	XLogRecPtr	replayEndRecPtr;
    	TimeLineID	replayEndTLI;
    	/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
    	TimestampTz recoveryLastXTime;
    
    	/*
    	 * timestamp of when we started replaying the current chunk of WAL data,
    	 * only relevant for replication or archive recovery
    	 */
    	TimestampTz currentChunkStartTime;
    	/* Recovery pause state */
    	RecoveryPauseState recoveryPauseState;
    	ConditionVariable recoveryNotPausedCV;
    
    	slock_t		info_lck;		/* locks shared variables shown above */
    } XLogRecoveryCtlData;
    

数据发送

数据通过socket接口进行发送,最终数据出口为操作系统提供的socket接口的send函数。

typedef struct
{
	void		(*comm_reset) (void);
	int			(*flush) (void);
	int			(*flush_if_writable) (void);
	bool		(*is_send_pending) (void);
	int			(*putmessage) (char msgtype, const char *s, size_t len);
	void		(*putmessage_noblock) (char msgtype, const char *s, size_t len);
} PQcommMethods;
static const PQcommMethods PqCommSocketMethods = {
	socket_comm_reset,
	socket_flush,
	socket_flush_if_writable,
	socket_is_send_pending,
	socket_putmessage,
	socket_putmessage_noblock
};

其执行流程如下:

XLogSendPhysical
pq_putmessage_noblock
socket_putmessage_noblock
socket_putmessage
internal_putbytes
internal_flush
secure_write
secure_raw_write
send
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

我在数据库世界里修仙

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值