本文简单介绍了PG插入数据部分的源码,主要内容包括exec_simple_query函数的实现逻辑,该函数位于src/backend/tcop/postgres.c文件中。
一、源码解读
exec_simple_query函数,顾名思义,执行简单“查询”(包括INSERT/UPDATE/DELETE等语句)
/*
* exec_simple_query
*
* Execute a "simple Query" protocol message.
*/
/*
输入:
query_string-SQL语句
输出:
无
*/
static void
exec_simple_query(const char *query_string)
{
CommandDest dest = whereToSendOutput;//输出到哪里的定义
MemoryContext oldcontext;//存储原内存上下文
List *parsetree_list;//分析树列表
ListCell *parsetree_item;//分析树中的ITEM
bool save_log_statement_stats = log_statement_stats;//是否保存统计信息,false
bool was_logged = false;//Log?
bool use_implicit_block;//是否使用隐式事务块
char msec_str[32];
/*
* Report query to various monitoring facilities.
*/
debug_query_string = query_string;
pgstat_report_activity(STATE_RUNNING, query_string);//统计信息
TRACE_POSTGRESQL_QUERY_START(query_string);
/*
* We use save_log_statement_stats so ShowUsage doesn't report incorrect
* results because ResetUsage wasn't called.
*/
if (save_log_statement_stats)
ResetUsage();
/*
* Start up a transaction command. All queries generated by the
* query_string will be in this same command block, *unless* we find a
* BEGIN/COMMIT/ABORT statement; we have to force a new xact command after
* one of those, else bad things will happen in xact.c. (Note that this
* will normally change current memory context.)
*/
start_xact_command();//启动事务
/*
* Zap any pre-existing unnamed statement. (While not strictly necessary,
* it seems best to define simple-Query mode as if it used the unnamed
* statement and portal; this ensures we recover any storage used by prior
* unnamed operations.)
*/
drop_unnamed_stmt();//清除匿名语句
/*
* Switch to appropriate context for constructing parsetrees.
*/
oldcontext = MemoryContextSwitchTo(MessageContext);//切换内存上下文
/*
* Do basic parsing of the query or queries (this should be safe even if
* we are in aborted transaction state!)
*/
parsetree_list = pg_parse_query(query_string);//解析输入的查询语句,获得分析树List(元素是RawStmt nodes)
/* Log immediately if dictated by log_statement */
if (check_log_statement(parsetree_list))//日志记录
{
ereport(LOG,
(errmsg("statement: %s", query_string),
errhidestmt(true),
errdetail_execute(parsetree_list)));
was_logged = true;
}
/*
* Switch back to transaction context to enter the loop.
*/
MemoryContextSwitchTo(oldcontext);//切换回原内存上下文
/*
* For historical reasons, if multiple SQL statements are given in a
* single "simple Query" message, we execute them as a single transaction,
* unless explicit transaction control commands are included to make
* portions of the list be separate transactions. To represent this
* behavior properly in the transaction machinery, we use an "implicit"
* transaction block.
*/
use_implicit_block = (list_length(parsetree_list) > 1);//如果分析树条目>1,使用隐式事务块(多条SQL语句在同一个事务中)
/*
* Run through the raw parsetree(s) and process each one.
*/
foreach(parsetree_item, parsetree_list)//对分析树中的每一个条目进行处理
{
RawStmt *parsetree = lfirst_node(RawStmt, parsetree_item);//分析树List中的元素为RawStmt指针类型
bool snapshot_set = false;//是否设置快照?
const char *commandTag;//命令标识
char completionTag[COMPLETION_TAG_BUFSIZE];//完成标记,如INSERT 0 1之类的字符串
List *querytree_list,//查询树List
*plantree_list;//执行计划List
Portal portal;//“门户”变量
DestReceiver *receiver;//目标接收端
int16 format;//
/*
* Get the command name for use in status display (it also becomes the
* default completion tag, down inside PortalRun). Set ps_status and
* do any special start-of-SQL-command processing needed by the
* destination.
*/
commandTag = CreateCommandTag(parsetree->stmt);//创建命令标记,插入数据则为INSERT
set_ps_display(commandTag, false);
BeginCommand(commandTag, dest);//do Nothing!
/*
* If we are in an aborted transaction, reject all commands except
* COMMIT/ABORT. It is important that this test occur before we try
* to do parse analysis, rewrite, or planning, since all those phases
* try to do database accesses, which may fail in abort state. (It
* might be safe to allow some additional utility commands in this
* state, but not many...)
*/
if (IsAbortedTransactionBlockState() &&
!IsTransactionExitStmt(parsetree->stmt))
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block"),
errdetail_abort()));
/* Make sure we are in a transaction command */
start_xact_command();//确认在事务中
/*
* If using an implicit transaction block, and we're not already in a
* transaction block, start an implicit block to force this statement
* to be grouped together with any following ones. (We must do this
* each time through the loop; otherwise, a COMMIT/ROLLBACK in the
* list would cause later statements to not be grouped.)
*/
if (use_implicit_block)
BeginImplicitTransactionBlock();//隐式事务,进入事务块
/* If we got a cancel signal in parsing or prior command, quit */
CHECK_FOR_INTERRUPTS();
/*
* Set up a snapshot if parse analysis/planning will need one.
*/
if (analyze_requires_snapshot(parsetree))//是否需要快照进行分析?增删改查均需要
{
PushActiveSnapshot(GetTransactionSnapshot());//
snapshot_set = true;
}
/*
* OK to analyze, rewrite, and plan this query.
*
* Switch to appropriate context for constructing querytrees (again,
* these must outlive the execution context).
*/
oldcontext = MemoryContextSwitchTo(MessageContext);//切换内存上下文
querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
NULL, 0, NULL);//根据分析树获得查询树,返回List(元素为Query)
plantree_list = pg_plan_queries(querytree_list,
CURSOR_OPT_PARALLEL_OK, NULL);//根据查询树获取计划树,返回List(元素为PlannedStmt)
/* Done with the snapshot used for parsing/planning */
if (snapshot_set)
PopActiveSnapshot();//
/* If we got a cancel signal in analysis or planning, quit */
CHECK_FOR_INTERRUPTS();
/*
* Create unnamed portal to run the query or queries in. If there
* already is one, silently drop it.
*/
portal = CreatePortal("", true, true);//创建匿名Portal变量
/* Don't display the portal in pg_cursors */
portal->visible = false;
/*
* We don't have to copy anything into the portal, because everything
* we are passing here is in MessageContext, which will outlive the
* portal anyway.
*/
PortalDefineQuery(portal,
NULL,
query_string,
commandTag,
plantree_list,
NULL);//给Portal变量赋值
/*
* Start the portal. No parameters here.
*/
PortalStart(portal, NULL, 0, InvalidSnapshot);//为PortalRun作准备
/*
* Select the appropriate output format: text unless we are doing a
* FETCH from a binary cursor. (Pretty grotty to have to do this here
* --- but it avoids grottiness in other places. Ah, the joys of
* backward compatibility...)
*/
format = 0; /* TEXT is default */
if (IsA(parsetree->stmt, FetchStmt))
{
FetchStmt *stmt = (FetchStmt *) parsetree->stmt;
if (!stmt->ismove)
{
Portal fportal = GetPortalByName(stmt->portalname);
if (PortalIsValid(fportal) &&
(fportal->cursorOptions & CURSOR_OPT_BINARY))
format = 1; /* BINARY */
}
}
PortalSetResultFormat(portal, 1, &format);//设置结果返回的格式,默认为TEXT
/*
* Now we can create the destination receiver object.
*/
receiver = CreateDestReceiver(dest);//创建目标接收器(如使用psql则为:printtup DestReceiver)
if (dest == DestRemote)
SetRemoteDestReceiverParams(receiver, portal);
/*
* Switch back to transaction context for execution.
*/
MemoryContextSwitchTo(oldcontext);//切换回原内存上下文
/*
* Run the portal to completion, and then drop it (and the receiver).
*/
(void) PortalRun(portal,
FETCH_ALL,
true, /* always top level */
true,
receiver,
receiver,
completionTag);//执行
receiver->rDestroy(receiver);//执行完毕,销毁接收器
PortalDrop(portal, false);//清除Portal中的资源&Portal
if (lnext(parsetree_item) == NULL)//所有语句已执行完毕
{
/*
* If this is the last parsetree of the query string, close down
* transaction statement before reporting command-complete. This
* is so that any end-of-transaction errors are reported before
* the command-complete message is issued, to avoid confusing
* clients who will expect either a command-complete message or an
* error, not one and then the other. Also, if we're using an
* implicit transaction block, we must close that out first.
*/
if (use_implicit_block)
EndImplicitTransactionBlock();//结束事务
finish_xact_command();//结束事务
}
else if (IsA(parsetree->stmt, TransactionStmt))//事务语句?BEGIN/COMMIT/ABORT...
{
/*
* If this was a transaction control statement, commit it. We will
* start a new xact command for the next command.
*/
finish_xact_command();
}
else
{
/*
* We need a CommandCounterIncrement after every query, except
* those that start or end a transaction block.
*/
CommandCounterIncrement();//命令+1(对应Tuple中的cid)
}
/*
* Tell client that we're done with this query. Note we emit exactly
* one EndCommand report for each raw parsetree, thus one for each SQL
* command the client sent, regardless of rewriting. (But a command
* aborted by error will not send an EndCommand report at all.)
*/
EndCommand(completionTag, dest);//命令Done
} /* end loop over parsetrees */
//所有语句结束
/*
* Close down transaction statement, if one is open. (This will only do
* something if the parsetree list was empty; otherwise the last loop
* iteration already did it.)
*/
finish_xact_command();
/*
* If there were no parsetrees, return EmptyQueryResponse message.
*/
if (!parsetree_list)
NullCommand(dest);
/*
* Emit duration logging if appropriate.
*/
switch (check_log_duration(msec_str, was_logged))
{
case 1:
ereport(LOG,
(errmsg("duration: %s ms", msec_str),
errhidestmt(true)));
break;
case 2:
ereport(LOG,
(errmsg("duration: %s ms statement: %s",
msec_str, query_string),
errhidestmt(true),
errdetail_execute(parsetree_list)));
break;
}
if (save_log_statement_stats)
ShowUsage("QUERY STATISTICS");
TRACE_POSTGRESQL_QUERY_DONE(query_string);
debug_query_string = NULL;
}
二、基础信息
exec_simple_query函数使用的数据结构、宏定义以及依赖的函数等。
数据结构/宏定义
*1、whereToSendOutput *
/* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
CommandDest whereToSendOutput = DestDebug;
/* ----------------
* CommandDest is a simplistic means of identifying the desired
* destination. Someday this will probably need to be improved.
*
* Note: only the values DestNone, DestDebug, DestRemote are legal for the
* global variable whereToSendOutput. The other values may be used
* as the destination for individual commands.
* ----------------
*/
typedef enum
{
DestNone, /* results are discarded */
DestDebug, /* results go to debugging output */
DestRemote, /* results sent to frontend process */
DestRemoteExecute, /* sent to frontend, in Execute command */
DestRemoteSimple, /* sent to frontend, w/no catalog access */
DestSPI, /* results sent to SPI manager */
DestTuplestore, /* results sent to Tuplestore */
DestIntoRel, /* results sent to relation (SELECT INTO) */
DestCopyOut, /* results sent to COPY TO code */
DestSQLFunction, /* results sent to SQL-language func mgr */
DestTransientRel, /* results sent to transient relation */
DestTupleQueue /* results sent to tuple queue */
} CommandDest;
2、RawStmt
/*
* RawStmt --- container for any one statement's raw parse tree
*
* Parse analysis converts a raw parse tree headed by a RawStmt node into
* an analyzed statement headed by a Query node. For optimizable statements,
* the conversion is complex. For utility statements, the parser usually just
* transfers the raw parse tree (sans RawStmt) into the utilityStmt field of
* the Query node, and all the useful work happens at execution time.
*
* stmt_location/stmt_len identify the portion of the source text string
* containing this raw statement (useful for multi-statement strings).
*/
typedef struct RawStmt
{
NodeTag type;
Node *stmt; /* raw parse tree */
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} RawStmt;
3、Query
//在解析查询语句时再深入解析
/*****************************************************************************
* Query Tree
*****************************************************************************/
/*
* Query -
* Parse analysis turns all statements into a Query tree
* for further processing by the rewriter and planner.
*
* Utility statements (i.e. non-optimizable statements) have the
* utilityStmt field set, and the rest of the Query is mostly dummy.
*
* Planning converts a Query tree into a Plan tree headed by a PlannedStmt
* node --- the Query structure is not used by the executor.
*/
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
uint64 queryId; /* query identifier (can be set by plugins) */
bool canSetTag; /* do I set the command result tag? */
Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual */
bool hasWindowFuncs; /* has window functions in tlist */
bool hasTargetSRFs; /* has set-returning functions in tlist */
bool hasSubLinks; /* has subquery SubLink */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
List *cteList; /* WITH list (of CommonTableExpr's) */
List *rtable; /* list of range table entries */
FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */
List *targetList; /* target list (of TargetEntry) */
OverridingKind override; /* OVERRIDING clause */
OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
List *groupingSets; /* a list of GroupingSet's if present */
Node *havingQual; /* qualifications applied to groups */
List *windowClause; /* a list of WindowClause's */
List *distinctClause; /* a list of SortGroupClause's */
List *sortClause; /* a list of SortGroupClause's */
Node *limitOffset; /* # of result tuples to skip (int8 expr) */
Node *limitCount; /* # of result tuples to return (int8 expr) */
List *rowMarks; /* a list of RowMarkClause's */
Node *setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
List *withCheckOptions; /* a list of WithCheckOption's, which are
* only added during rewrite and therefore
* are not written out as part of Query. */
/*
* The following two fields identify the portion of the source text string
* containing this query. They are typically only populated in top-level
* Queries, not in sub-queries. When not set, they might both be zero, or
* both be -1 meaning "unknown".
*/
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} Query;
4、ParseState
/*
* Function signatures for parser hooks
*/
typedef struct ParseState ParseState;
typedef Node *(*PreParseColumnRefHook) (ParseState *pstate, ColumnRef *cref);
typedef Node *(*PostParseColumnRefHook) (ParseState *pstate, ColumnRef *cref, Node *var);
typedef Node *(*ParseParamRefHook) (ParseState *pstate, ParamRef *pref);
typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
Oid targetTypeId, int32 targetTypeMod,
int location);
/*
* State information used during parse analysis
*
* parentParseState: NULL in a top-level ParseState. When parsing a subquery,
* links to current parse state of outer query.
*
* p_sourcetext: source string that generated the raw parsetree being
* analyzed, or NULL if not available. (The string is used only to
* generate cursor positions in error messages: we need it to convert
* byte-wise locations in parse structures to character-wise cursor
* positions.)
*
* p_rtable: list of RTEs that will become the rangetable of the query.
* Note that neither relname nor refname of these entries are necessarily
* unique; searching the rtable by name is a bad idea.
*
* p_joinexprs: list of JoinExpr nodes associated with p_rtable entries.
* This is one-for-one with p_rtable, but contains NULLs for non-join
* RTEs, and may be shorter than p_rtable if the last RTE(s) aren't joins.
*
* p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that
* will become the fromlist of the query's top-level FromExpr node.
*
* p_namespace: list of ParseNamespaceItems that represents the current
* namespace for table and column lookup. (The RTEs listed here may be just
* a subset of the whole rtable. See ParseNamespaceItem comments below.)
*
* p_lateral_active: true if we are currently parsing a LATERAL subexpression
* of this parse level. This makes p_lateral_only namespace items visible,
* whereas they are not visible when p_lateral_active is FALSE.
*
* p_ctenamespace: list of CommonTableExprs (WITH items) that are visible
* at the moment. This is entirely different from p_namespace because a CTE
* is not an RTE, rather "visibility" means you could make an RTE from it.
*
* p_future_ctes: list of CommonTableExprs (WITH items) that are not yet
* visible due to scope rules. This is used to help improve error messages.
*
* p_parent_cte: CommonTableExpr that immediately contains the current query,
* if any.
*
* p_target_relation: target relation, if query is INSERT, UPDATE, or DELETE.
*
* p_target_rangetblentry: target relation's entry in the rtable list.
*
* p_is_insert: true to process assignment expressions like INSERT, false
* to process them like UPDATE. (Note this can change intra-statement, for
* cases like INSERT ON CONFLICT UPDATE.)
*
* p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
* We collect these while transforming expressions and then transform them
* afterwards (so that any resjunk tlist items needed for the sort/group
* clauses end up at the end of the query tlist). A WindowDef's location in
* this list, counting from 1, is the winref number to use to reference it.
*
* p_expr_kind: kind of expression we're currently parsing, as per enum above;
* EXPR_KIND_NONE when not in an expression.
*
* p_next_resno: next TargetEntry.resno to assign, starting from 1.
*
* p_multiassign_exprs: partially-processed MultiAssignRef source expressions.
*
* p_locking_clause: query's FOR UPDATE/FOR SHARE clause, if any.
*
* p_locked_from_parent: true if parent query level applies FOR UPDATE/SHARE
* to this subquery as a whole.
*
* p_resolve_unknowns: resolve unknown-type SELECT output columns as type TEXT
* (this is true by default).
*
* p_hasAggs, p_hasWindowFuncs, etc: true if we've found any of the indicated
* constructs in the query.
*
* p_last_srf: the set-returning FuncExpr or OpExpr most recently found in
* the query, or NULL if none.
*
* p_pre_columnref_hook, etc: optional parser hook functions for modifying the
* interpretation of ColumnRefs and ParamRefs.
*
* p_ref_hook_state: passthrough state for the parser hook functions.
*/
struct ParseState
{
struct ParseState *parentParseState; /* stack link */
const char *p_sourcetext; /* source text, or NULL if not available */
List *p_rtable; /* range table so far */
List *p_joinexprs; /* JoinExprs for RTE_JOIN p_rtable entries */
List *p_joinlist; /* join items so far (will become FromExpr
* node's fromlist) */
List *p_namespace; /* currently-referenceable RTEs (List of
* ParseNamespaceItem) */
bool p_lateral_active; /* p_lateral_only items visible? */
List *p_ctenamespace; /* current namespace for common table exprs */
List *p_future_ctes; /* common table exprs not yet in namespace */
CommonTableExpr *p_parent_cte; /* this query's containing CTE */
Relation p_target_relation; /* INSERT/UPDATE/DELETE target rel */
RangeTblEntry *p_target_rangetblentry; /* target rel's RTE */
bool p_is_insert; /* process assignment like INSERT not UPDATE */
List *p_windowdefs; /* raw representations of window clauses */
ParseExprKind p_expr_kind; /* what kind of expression we're parsing */
int p_next_resno; /* next targetlist resno to assign */
List *p_multiassign_exprs; /* junk tlist entries for multiassign */
List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */
bool p_locked_from_parent; /* parent has marked this subquery
* with FOR UPDATE/FOR SHARE */
bool p_resolve_unknowns; /* resolve unknown-type SELECT outputs as
* type text */
QueryEnvironment *p_queryEnv; /* curr env, incl refs to enclosing env */
/* Flags telling about things found in the query: */
bool p_hasAggs;
bool p_hasWindowFuncs;
bool p_hasTargetSRFs;
bool p_hasSubLinks;
bool p_hasModifyingCTE;
Node *p_last_srf; /* most recent set-returning func/op found */
/*
* Optional hook functions for parser callbacks. These are null unless
* set up by the caller of make_parsestate.
*/
PreParseColumnRefHook p_pre_columnref_hook;
PostParseColumnRefHook p_post_columnref_hook;
ParseParamRefHook p_paramref_hook;
CoerceParamHook p_coerce_param_hook;
void *p_ref_hook_state; /* common passthrough link for above */
};
5、RangeTblEntry
/*--------------------
* RangeTblEntry -
* A range table is a List of RangeTblEntry nodes.
*
* A range table entry may represent a plain relation, a sub-select in
* FROM, or the result of a JOIN clause. (Only explicit JOIN syntax
* produces an RTE, not the implicit join resulting from multiple FROM
* items. This is because we only need the RTE to deal with SQL features
* like outer joins and join-output-column aliasing.) Other special
* RTE types also exist, as indicated by RTEKind.
*
* Note that we consider RTE_RELATION to cover anything that has a pg_class
* entry. relkind distinguishes the sub-cases.
*
* alias is an Alias node representing the AS alias-clause attached to the
* FROM expression, or NULL if no clause.
*
* eref is the table reference name and column reference names (either
* real or aliases). Note that system columns (OID etc) are not included
* in the column list.
* eref->aliasname is required to be present, and should generally be used
* to identify the RTE for error messages etc.
*
* In RELATION RTEs, the colnames in both alias and eref are indexed by
* physical attribute number; this means there must be colname entries for
* dropped columns. When building an RTE we insert empty strings ("") for
* dropped columns. Note however that a stored rule may have nonempty
* colnames for columns dropped since the rule was created (and for that
* matter the colnames might be out of date due to column renamings).
* The same comments apply to FUNCTION RTEs when a function's return type
* is a named composite type.
*
* In JOIN RTEs, the colnames in both alias and eref are one-to-one with
* joinaliasvars entries. A JOIN RTE will omit columns of its inputs when
* those columns are known to be dropped at parse time. Again, however,
* a stored rule might contain entries for columns dropped since the rule
* was created. (This is only possible for columns not actually referenced
* in the rule.) When loading a stored rule, we replace the joinaliasvars
* items for any such columns with null pointers. (We can't simply delete
* them from the joinaliasvars list, because that would affect the attnums
* of Vars referencing the rest of the list.)
*
* inh is true for relation references that should be expanded to include
* inheritance children, if the rel has any. This *must* be false for
* RTEs other than RTE_RELATION entries.
*
* inFromCl marks those range variables that are listed in the FROM clause.