Tbase 源码 (三)

【查询优化——生成路径】

set_base_rel_pathlists函数负责生成路径的第一个阶段:生成基本关系的访问路径——即为每一个基本关系生成一个RelOptInfo结构并生成路径,放在RelOptInfo结构的pathlist字段中。set_base_rel_pathlists函数是负责为基本关系生成访问路径的,但是基本关系的类型就有很多种,所以对于不同类型的基本关系又需要通过不同的函数去完成访问路径的生成。即set_base_rel_pathlists函数又需要调用不同的函数分别为不同类型的基本关系生成访问路径。
 

基本关系类型注释 相应函数
子关系RTE 的inh字段为真set_append_rel_pathlist
RTE_SUBQUERY表示当前的基本关系是一个子查询set_subquery_pathlist
RTE_FUNCTION表示当前的基本关系是一个函数set_function_pathlist
RTE_VALUES表示当前的基本关系是一个VALUES列表set_values_pathlist
RTE_CTE 表示当前的基本关系是一个公共表表达式如果是递归的set_worktable_pathlist,否则set_cte_pathlist
RTE_RELATION表示当前的基本关系是一个普通表 set_plain_rel_pathlist
/*
 * set_base_rel_pathlists
 *      Finds all paths available for scanning each base-relation entry.
 *      Sequential scan and any available indices are considered.
 *      Each useful path is attached to its relation's 'pathlist' field.
 */
static void
set_base_rel_pathlists(PlannerInfo *root)

 set_scanpath_distribution函数将数据的节点分布信息分发到基本关系对象的扫描路径里。

/*
 * set_scanpath_distribution
 *      Assign distribution to the path which is a base relation scan.
 */
static void
set_scanpath_distribution(PlannerInfo *root, RelOptInfo *rel, Path *pathnode)
{// #lizard forgives
    RangeTblEntry   *rte;
    RelationLocInfo *rel_loc_info;

    rte = planner_rt_fetch(rel->relid, root);
    rel_loc_info = GetRelationLocInfo(rte->relid);
    
#ifdef __TBASE__
    /* 
     * get group oid which base rel belongs to, and used later at end of planner.
     * local tables not included.
     */
    if (IS_PGXC_COORDINATOR)
    {
        if (rel_loc_info)
        {
            Oid group = InvalidOid;

            if (rel_loc_info->locatorType == LOCATOR_TYPE_SHARD ||
                rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED)
                group = GetRelGroup(rte->relid);
            else
                group = InvalidOid;

#ifdef __COLD_HOT__
            if (AttributeNumberIsValid(rel_loc_info->secAttrNum) 
                || OidIsValid(rel_loc_info->coldGroupId))
            {
                has_cold_hot_table = true;
            }
            else
            {
                if (rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED
                    && !OidIsValid(group))
                {
                    /* do nothing */
                }
                else
                {
                    groupOids = list_append_unique_oid(groupOids, group);
                }
            }
#endif
        }
    }
#endif

    if (IS_PGXC_COORDINATOR && rel_loc_info)
    {
        ListCell *lc;
        bool retry = true;
        Distribution *distribution = makeNode(Distribution);
        distribution->distributionType = rel_loc_info->locatorType;
        /*
         * for LOCATOR_TYPE_REPLICATED distribution, check if
         * all of the mentioned nodes are hale and hearty. Remove
         * those which are not. Do this only for SELECT queries!
         */
retry_pools:
        if (root->parse->commandType == CMD_SELECT &&
                distribution->distributionType == LOCATOR_TYPE_REPLICATED)
        {
            int i;
            bool *healthmap = NULL;
            healthmap = (bool*)palloc(sizeof(bool) * TBASE_MAX_DATANODE_NUMBER);
            if (healthmap == NULL)
            {
                ereport(ERROR,
                        (errcode(ERRCODE_OUT_OF_MEMORY),
                         errmsg("out of memory for healthmap")));
            }

            PgxcNodeDnListHealth(rel_loc_info->rl_nodeList, healthmap);

            i = 0;
            foreach(lc, rel_loc_info->rl_nodeList)
            {
                if (healthmap[i++] == true)
                    distribution->nodes = bms_add_member(distribution->nodes,
                                                         lfirst_int(lc));
            }

            if (healthmap)
            {
                pfree(healthmap);
                healthmap = NULL;
            }

            if (bms_is_empty(distribution->nodes))
            {
                /*
                 * Try an on-demand pool maintenance just to see if some nodes
                 * have come back.
                 *
                 * Try once and error out if datanodes are still down
                 */
                if (retry)
                {
                    PoolPingNodes();
                    retry = false;
                    goto retry_pools;
                }
                else
                    elog(ERROR,
                         "Could not find healthy nodes for replicated table. Exiting!");
            }
        }
        else
        {
            foreach(lc, rel_loc_info->rl_nodeList)
                distribution->nodes = bms_add_member(distribution->nodes,
                                                     lfirst_int(lc));
        }

        distribution->restrictNodes = NULL;
        /*
         * Distribution expression of the base relation is Var representing
         * respective attribute.
         */
        distribution->distributionExpr = NULL;
        if (rel_loc_info->partAttrNum)
        {
            Var        *var = NULL;
            ListCell   *lc;

            /* Look if the Var is already in the target list */
            foreach (lc, rel->reltarget->exprs)
            {
                var = (Var *) lfirst(lc);
                if (IsA(var, Var) && var->varno == rel->relid &&
                        var->varattno == rel_loc_info->partAttrNum)
                    break;
            }
            /* If not found we should look up the attribute and make the Var */
            if (!lc)
            {
                Relation     relation = heap_open(rte->relid, NoLock);
                TupleDesc    tdesc = RelationGetDescr(relation);
                Form_pg_attribute att_tup;

                att_tup = tdesc->attrs[rel_loc_info->partAttrNum - 1];
                var = makeVar(rel->relid, rel_loc_info->partAttrNum,
                              att_tup->atttypid, att_tup->atttypmod,
                              att_tup->attcollation, 0);


                heap_close(relation, NoLock);
            }

            distribution->distributionExpr = (Node *) var;
        }
        pathnode->distribution = distribution;
    }
}

 调用关系:set_scanpath_distribution =》 create_xxx_paths  =>  set_plain_rel_pathlist  =》  set_rel_pathlist =》set_base_rel_pathlists

【生成并行计划的路径】

*
* If this relation could possibly be scanned from within a worker, then set
* its consider_parallel flag.
*/
static void
set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, ... )

/*
 * create_plain_partial_paths
 *      Build partial access paths for parallel scan of a plain relation
 */
static void
create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)

set_append_rel_pathlist 

 \src\backend\optimizer\path\Allpaths.c

/*
 * set_append_rel_pathlist
 *      Build access paths for an "append relation"
 */
static void
set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                        Index rti, RangeTblEntry *rte)

 /*
 * set_subquery_pathlist
 *        Generate SubqueryScan access paths for a subquery RTE
 *
 * We don't currently support generating parameterized paths for subqueries
 * by pushing join clauses down into them; it seems too expensive to re-plan
 * the subquery multiple times to consider different alternatives.
 * (XXX that could stand to be reconsidered, now that we use Paths.)
 * So the paths made here will be parameterized if the subquery contains
 * LATERAL references, otherwise not.  As long as that's true, there's no need
 * for a separate set_subquery_size phase: just make the paths right away.
 */
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                      Index rti, RangeTblEntry *rte)

/*
 * set_function_pathlist
 *        Build the (single) access path for a function RTE
 */
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)

/*
 * set_values_pathlist
 *        Build the (single) access path for a VALUES RTE
 */
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)

/*
 * set_worktable_pathlist
 *        Build the (single) access path for a self-reference CTE RTE
 *
 * There's no need for a separate set_worktable_size phase, since we don't
 * support join-qual-parameterized paths for CTEs.
 */
static void
set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/*
 * set_cte_pathlist
 *        Build the (single) access path for a non-self-reference CTE RTE
 *
 * There's no need for a separate set_cte_size phase, since we don't
 * support join-qual-parameterized paths for CTEs.
 */
static void
set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
 * set_plain_rel_pathlist
 *      Build access paths for a plain relation (no subquery, no inheritance)
 */
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
    Relids        required_outer;

    /*
     * We don't support pushing join clauses into the quals of a seqscan, but
     * it could still have required parameterization due to LATERAL refs in
     * its tlist.
     */
    required_outer = rel->lateral_relids;

    /* Consider sequential scan */
    add_path(rel, create_seqscan_path(root, rel, required_outer, 0));

    /* If appropriate, consider parallel sequential scan */
    if (rel->consider_parallel && required_outer == NULL)
        create_plain_partial_paths(root, rel);

    /* Consider index scans */
    create_index_paths(root, rel);

    /* Consider TID scans */
    create_tidscan_paths(root, rel);
}

生成索引扫描路径
当关系上涉及索引扫描时,要生成相应的索引路径。生成索引路径的函数主要是 create_index_paths函数
该函数为参数rel中指定的RelOptInfo结构添加可能的索引路径。

一个索引必须满足以下条件之一才会被考虑用于生成索引扫描路径:
能匹配一个或者多个约束条件(来自于baserestrictinfo字段中的RestrictInfo结构)
能匹配查询中的连接条件
能匹配查询的排序要求
能匹配查询的条件(where子句中除了连接条件的其他条件) 

 \src\backend\optimizer\path\Indexpaths.c

/*
 * create_index_paths()
 *      Generate all interesting index paths for the given relation.
 *      Candidate paths are added to the rel's pathlist (using add_path).
 *
 * To be considered for an index scan, an index must match one or more
 * restriction clauses or join clauses from the query's qual condition,
 * or match the query's ORDER BY condition, or have a predicate that
 * matches the query's qual condition.
 *
 * There are two basic kinds of index scans.  A "plain" index scan uses
 * only restriction clauses (possibly none at all) in its indexqual,
 * so it can be applied in any context.  A "parameterized" index scan uses
 * join clauses (plus restriction clauses, if available) in its indexqual.
 * When joining such a scan to one of the relations supplying the other
 * variables used in its indexqual, the parameterized scan must appear as
 * the inner relation of a nestloop join; it can't be used on the outer side,
 * nor in a merge or hash join.  In that context, values for the other rels'
 * attributes are available and fixed during any one scan of the indexpath.
 *
 * An IndexPath is generated and submitted to add_path() for each plain or
 * parameterized index scan this routine deems potentially interesting for
 * the current query.
 *
 * 'rel' is the relation for which we want to generate index paths
 *
 * Note: check_index_predicates() must have been run previously for this rel.
 *
 * Note: in cases involving LATERAL references in the relation's tlist, it's
 * possible that rel->lateral_relids is nonempty.  Currently, we include
 * lateral_relids into the parameterization reported for each path, but don't
 * take it into account otherwise.  The fact that any such rels *must* be
 * available as parameter sources perhaps should influence our choices of
 * index quals ... but for now, it doesn't seem worth troubling over.
 * In particular, comments below about "unparameterized" paths should be read
 * as meaning "unparameterized so far as the indexquals are concerned".
 */
void
create_index_paths(PlannerInfo *root, RelOptInfo *rel)

 create_index_paths函数调用 — generate_bitmap_or_paths函数
generate_bitmap_or_paths函数
create_index_paths->generate_bitmap_or_paths函数从条件子句链表中寻找OR子句,如找到并且可以处理则生成BitmapOrPath.函数返回生成的链表BitmapOrPaths. 

 \src\backend\optimizer\path\Indexpaths.c

/*
 * generate_bitmap_or_paths
 *        Look through the list of clauses to find OR clauses, and generate
 *        a BitmapOrPath for each one we can handle that way.  Return a list
 *        of the generated BitmapOrPaths.
 *
 * other_clauses is a list of additional clauses that can be assumed true
 * for the purpose of generating indexquals, but are not to be searched for
 * ORs.  (See build_paths_for_OR() for motivation.)
 */
static List *
generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
                         List *clauses, List *other_clauses)

 生成TID扫描路径
 TID用来表示元组的物理地址,一个TID从逻辑上可以看成是一个三元组(表OID,块号,元组的块内偏移量)。因为TID可以让执行器快速定位到磁盘上的元组,因此通过TID来访问元组非常高效。在PostgreSQL查询中也允许在where子句中使用TID作为查询条件,对于这种情况,查询规划器将为它生成TID扫描路径。TID扫描路径的生成主要由函数create_tidscan_paths完成。该函数主要分为两大步:
(1)从表的 RelOptInfo结构的baserestrictinfo中提取TID条件
(2)根据TID条件调用create_tidscan_paths函数生成TID扫描路径,并尝试用add_path加入到pathlist中。 

  \src\backend\optimizer\path\Tidpaths.c

/*
 * create_tidscan_paths
 *      Create paths corresponding to direct TID scans of the given rel.
 *
 *      Candidate paths are added to the rel's pathlist (using add_path).
 */
void
create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
{

【生成连接关系】

生成最终路径
所有基本关系都创建好后,就可以把他们连接起来形成最终的连接关系,最终路径生成由make_rel_from_joinlist函数完成make_rel_from_joinlist函数会将传入参数joinlist中的基本关系连接起来生成最终的连接关系并且为最终的连接关系建立RelOptInfo结构,其pathlist字段就是最终路径组成的链表。该函数的实质就是选择不同的连接方式作为中间节点,选择不同的连接顺序将代表基本关系的叶子结点连接成一棵树,使其代价最小。这是一个递归生成二叉树的过程。 

 \src\backend\optimizer\path\Allpaths.c

/*
 * make_rel_from_joinlist
 *      Build access paths using a "joinlist" to guide the join path search.
 *
 * See comments for deconstruct_jointree() for definition of the joinlist
 * data structure.
 */
static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)

standard_join_search  

make_rel_from_joinlist   调用 standard_join_search 函数

/*
 * standard_join_search
 *      Find possible joinpaths for a query by successively finding ways
 *      to join component relations into join relations.
 *
 * 'levels_needed' is the number of iterations needed, ie, the number of
 *        independent jointree items in the query.  This is > 1.
 *
 * 'initial_rels' is a list of RelOptInfo nodes for each independent
 *        jointree item.  These are the components to be joined together.
 *        Note that levels_needed == list_length(initial_rels).
 *
 * Returns the final level of join relations, i.e., the relation that is
 * the result of joining all the original relations together.
 * At least one implementation path must be provided for this relation and
 * all required sub-relations.
 *
 * To support loadable plugins that modify planner behavior by changing the
 * join searching algorithm, we provide a hook variable that lets a plugin
 * replace or supplement this function.  Any such hook must return the same
 * final join relation as the standard code would, but it might have a
 * different set of implementation paths attached, and only the sub-joinrels
 * needed for these paths need have been instantiated.
 *
 * Note to plugin authors: the functions invoked during standard_join_search()
 * modify root->join_rel_list and root->join_rel_hash.  If you want to do more
 * than one join-order search, you'll probably need to save and restore the
 * original states of those data structures.  See geqo_eval() for an example.
 */
RelOptInfo *
standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)

{
    int            lev;
    RelOptInfo *rel;

    /*
     * This function cannot be invoked recursively within any one planning
     * problem, so join_rel_level[] can't be in use already.
     */
    Assert(root->join_rel_level == NULL);

    /*
     * We employ a simple "dynamic programming" algorithm: we first find all
     * ways to build joins of two jointree items, then all ways to build joins
     * of three items (from two-item joins and single items), then four-item
     * joins, and so on until we have considered all ways to join all the
     * items into one rel.
     *
     * root->join_rel_level[j] is a list of all the j-item rels.  Initially we
     * set root->join_rel_level[1] to represent all the single-jointree-item
     * relations.
     */
    root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));

    root->join_rel_level[1] = initial_rels;

    for (lev = 2; lev <= levels_needed; lev++)
    {
        ListCell   *lc;

        /*
         * Determine all possible pairs of relations to be joined at this
         * level, and build paths for making each one from every available
         * pair of lower-level relations.
         */
        join_search_one_level(root, lev);

        /*
         * Run generate_partition_wise_join_paths() and
         * generate_gather_paths() for each just-processed joinrel.  We could
         * not do this earlier because both regular and partial paths can get
         * added to a particular joinrel at multiple times within
         * join_search_one_level.
         *
         * After that, we're done creating paths for the joinrel, so run
         * set_cheapest().
         */
        foreach(lc, root->join_rel_level[lev])
        {
            rel = (RelOptInfo *) lfirst(lc);

            /* Create paths for partition-wise joins. */
            generate_partition_wise_join_paths(root, rel);

            /*
             * Except for the topmost scan/join rel, consider gathering
             * partial paths.  We'll do the same for the topmost scan/join rel
             * once we know the final targetlist (see grouping_planner).
             */
            if (lev < levels_needed)
            generate_gather_paths(root, rel);

            /* Find and save the cheapest paths for this rel */
            set_cheapest(rel);

#ifdef OPTIMIZER_DEBUG
            debug_print_rel(root, rel);
#endif
        }
    }

    /*

     * We should have a single rel at the final level.
     */
    if (root->join_rel_level[levels_needed] == NIL)
        elog(ERROR, "failed to build any %d-way joins", levels_needed);
    Assert(list_length(root->join_rel_level[levels_needed]) == 1);

    rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);

    root->join_rel_level = NULL;

    return rel;
}

/*
 * join_search_one_level
 *      Consider ways to produce join relations containing exactly 'level'
 *      jointree items.  (This is one step of the dynamic-programming method
 *      embodied in standard_join_search.)  Join rel nodes for each feasible
 *      combination of lower-level rels are created and returned in a list.
 *      Implementation paths are created for each such joinrel, too.
 *
 * level: level of rels we want to make this time
 * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
 *
 * The result is returned in root->join_rel_level[level].
 */
void
join_search_one_level(PlannerInfo *root, int level)

 【查找join的分布方式

 hashjoin_path 示例:

set_joinpath_distribution =》  create_hashjoin_path =》 try_hashjoin_path   =》 hash_inner_and_outer =》 add_paths_to_joinrel   =》populate_joinrel_with_paths
=》make_join_rel =》 join_search_one_level

create_xxx_path 中都会调用set_joinpath_distribution来生成join的分布方式。

/*
 * Analyze join parameters and set distribution of the join node.
 * If there are possible alternate distributions the respective pathes are
 * returned as a list so caller can cost all of them and choose cheapest to
 * continue.
 */
static List *
set_joinpath_distribution(PlannerInfo *root, JoinPath *pathnode)

下面这些 是可以不重分布数据的情况(具体实现方法参见set_joinpath_distribution 源码):

/*
     * If both subpaths are distributed by replication, the resulting
     * distribution will be replicated on smallest common set of nodes.
     * Catalog tables are the same on all nodes, so treat them as replicated
     * on all nodes.
     */
    /*
     * Check if we have inner replicated
     * The "both replicated" case is already checked, so if innerd
     * is replicated, then outerd is not replicated and it is not NULL.
     * This case is not acceptable for some join types. If outer relation is
     * nullable data nodes will produce joined rows with NULLs for cases when
     * matching row exists, but on other data node.
     */
         /*
     * Check if we have outer replicated
     * The "both replicated" case is already checked, so if outerd
     * is replicated, then innerd is not replicated and it is not NULL.
     * This case is not acceptable for some join types. If inner relation is
     * nullable data nodes will produce joined rows with NULLs for cases when
     * matching row exists, but on other data node.
     */
     /*
     * This join is still allowed if inner and outer paths have
     * equivalent distribution and joined along the distribution keys.
     */
    /*
     * Build cartesian product, if no hasheable restrictions is found.
     * Perform coordinator join in such cases. If this join would be a part of
     * larger join, it will be handled as replicated.
     * To do that leave join distribution NULL and place a RemoteSubPath node on
     * top of each subpath to provide access to joined result sets.
     * Do not redistribute pathes that already have NULL distribution, this is
     * possible if performing outer join on a coordinator and a datanode
     * relations.

     */

【重分布数据】

【\src\backend\optimizer\util\Pathnode.c】

*/*
 * redistribute_path
 *     Redistributes the path to match desired distribution parameters.
 *
 * It's also possible to specify desired sort order using pathkeys. If the
 * subpath does not match the order, a Sort node will be added automatically.
 * This is similar to how create_merge_append_path() injects Sort nodes.
 */
static Path *
redistribute_path(PlannerInfo *root, Path *subpath, List *pathkeys,
                  char distributionType, Node* distributionExpr,
                  Bitmapset *nodes, Bitmapset *restrictNodes)

重分布数据--nestloop_path 示例】函数调用路径   redistribute_path =》 set_joinpath_distribution=》 create_nestloop_path=》try_nestloop_path  =》match_unsorted_outer =》 add_paths_to_joinrel  =》populate_joinrel_with_paths
=》make_join_rel =》 join_search_one_level  

重分布数据--hashjoin_path 示例】函数调用路径   redistribute_path =》 set_joinpath_distribution=》 create_hashjoin_path=》try_hashjoin_path  =》match_unsorted_outer =》 add_paths_to_joinrel  =》populate_joinrel_with_paths
=》make_join_rel =》 join_search_one_level  

重分布数据--mergejoin_path 示例】函数调用路径   redistribute_path =》 set_joinpath_distribution=》 create_mergejoin_path=》[ try_partial_mergejoin_path=》] try_mergejoin_path  =》match_unsorted_outer =》 add_paths_to_joinrel  =》populate_joinrel_with_paths
=》make_join_rel =》 join_search_one_level  

【计算代价最小的路径】

set_cheapest

/*
 * set_cheapest
 *      Find the minimum-cost paths from among a relation's paths,
 *      and save them in the rel's cheapest-path fields.
 *
 * cheapest_total_path is normally the cheapest-total-cost unparameterized
 * path; but if there are no unparameterized paths, we assign it to be the
 * best (cheapest least-parameterized) parameterized path.  However, only
 * unparameterized paths are considered candidates for cheapest_startup_path,
 * so that will be NULL if there are no unparameterized paths.
 *
 * The cheapest_parameterized_paths list collects all parameterized paths
 * that have survived the add_path() tournament for this relation.  (Since
 * add_path ignores pathkeys for a parameterized path, these will be paths
 * that have best cost or best row count for their parameterization.  We
 * may also have both a parallel-safe and a non-parallel-safe path in some
 * cases for the same parameterization in some cases, but this should be
 * relatively rare since, most typically, all paths for the same relation
 * will be parallel-safe or none of them will.)
 *
 * cheapest_parameterized_paths always includes the cheapest-total
 * unparameterized path, too, if there is one; the users of that list find
 * it more convenient if that's included.
 *
 * This is normally called only after we've finished constructing the path
 * list for the rel node.
 */
void
set_cheapest(RelOptInfo *parent_rel)

{// #lizard forgives
    Path       *cheapest_startup_path;
    Path       *cheapest_total_path;
    Path       *best_param_path;
    List       *parameterized_paths;
    ListCell   *p;

    Assert(IsA(parent_rel, RelOptInfo));

#ifdef __TBASE__
    /*
     * When set_joinpath_distribution() adjusted the strategy for complex
     * UPDATE/DELETE, the original paths could be give up caused by no proper
     * distribution found. Which lead to an early error pop up here, thus
     * we need to provide more accurate error message here. (Before the
     * complex delete enhancement, this will pop up in group_planner at
     * final stage.)
     */
    if (parent_rel->pathlist == NIL &&
        parent_rel->resultRelLoc != RESULT_REL_NONE)
    {
#ifdef _PG_REGRESS_
            ereport(ERROR,
                    (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
                     errmsg("could not plan this distributed UPDATE/DELETE"),
                     errdetail("correlated or complex UPDATE/DELETE is currently not supported in Postgres-XL.")));
#else
            ereport(ERROR,
                    (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
                     errmsg("could not plan this distributed UPDATE/DELETE"),
                     errdetail("correlated or complex UPDATE/DELETE is currently not supported in TBase.")));
#endif
    }
#endif

    if (parent_rel->pathlist == NIL)
        elog(ERROR, "could not devise a query plan for the given query");

    cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
    parameterized_paths = NIL;

    foreach(p, parent_rel->pathlist)
    {
        Path       *path = (Path *) lfirst(p);
        int            cmp;

        if (path->param_info)
        {
            /* Parameterized path, so add it to parameterized_paths */
            parameterized_paths = lappend(parameterized_paths, path);

            /*
             * If we have an unparameterized cheapest-total, we no longer care
             * about finding the best parameterized path, so move on.
             */
            if (cheapest_total_path)
                continue;

            /*
             * Otherwise, track the best parameterized path, which is the one
             * with least total cost among those of the minimum
             * parameterization.
             */
            if (best_param_path == NULL)
                best_param_path = path;
            else
            {
                switch (bms_subset_compare(PATH_REQ_OUTER(path),
                                           PATH_REQ_OUTER(best_param_path)))
                {
                    case BMS_EQUAL:
                        /* keep the cheaper one */
                        if (compare_path_costs(path, best_param_path,
                                               TOTAL_COST) < 0)
                            best_param_path = path;
                        break;
                    case BMS_SUBSET1:
                        /* new path is less-parameterized */
                        best_param_path = path;
                        break;
                    case BMS_SUBSET2:
                        /* old path is less-parameterized, keep it */
                        break;
                    case BMS_DIFFERENT:

                        /*
                         * This means that neither path has the least possible
                         * parameterization for the rel.  We'll sit on the old
                         * path until something better comes along.
                         */
                        break;
                }
            }
        }
        else
        {
            /* Unparameterized path, so consider it for cheapest slots */
            if (cheapest_total_path == NULL)
            {
                cheapest_startup_path = cheapest_total_path = path;
                continue;
            }

            /*
             * If we find two paths of identical costs, try to keep the
             * better-sorted one.  The paths might have unrelated sort
             * orderings, in which case we can only guess which might be
             * better to keep, but if one is superior then we definitely
             * should keep that one.
             */
            cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
            if (cmp > 0 ||
                (cmp == 0 &&
                 compare_pathkeys(cheapest_startup_path->pathkeys,
                                  path->pathkeys) == PATHKEYS_BETTER2))
                cheapest_startup_path = path;

            cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
            if (cmp > 0 ||
                (cmp == 0 &&
                 compare_pathkeys(cheapest_total_path->pathkeys,
                                  path->pathkeys) == PATHKEYS_BETTER2))
                cheapest_total_path = path;
        }
    }

    /* Add cheapest unparameterized path, if any, to parameterized_paths */
    if (cheapest_total_path)
        parameterized_paths = lcons(cheapest_total_path, parameterized_paths);

    /*
     * If there is no unparameterized path, use the best parameterized path as
     * cheapest_total_path (but not as cheapest_startup_path).
     */
    if (cheapest_total_path == NULL)
        cheapest_total_path = best_param_path;
    Assert(cheapest_total_path != NULL);

    parent_rel->cheapest_startup_path = cheapest_startup_path;
    parent_rel->cheapest_total_path = cheapest_total_path;
    parent_rel->cheapest_unique_path = NULL;    /* computed only if needed */
    parent_rel->cheapest_parameterized_paths = parameterized_paths;
}

 

【查询优化——生成计划】

生成可优化的MIN/MAX聚集计划
在生成计划时,规划器会首先处理一种比较特殊的查询:查询中含有MIN/MAX聚集函数,并且聚集函数使用的属性上建有索引或者属性恰好是 ORDER BY 子句中指定的属性。 在这种特殊情况下,可以直接从索引或者已经排序好的元组集中取到含有最大最小值的元组,从而避免了扫描全表带来的开销。规划器会先检查一个查询是否可以优化到不对全表扫描而直接读取元组,如果可以则生成可优化的MIN/MAX聚集计划,否则需要对全表扫描,生成普通计划。而负责生成可优化的MIN/MAX聚集计划的主函数是preprocess_minmax_aggregates.

preprocess_minmax_aggregates函数
preprocess_minmax_aggregates函数从一个选定的路径生成计划,如果该路径对应的查询满足可优化的MIN/MAX聚集计划的条件,该函数会返回一个计划,否则该函数返回空值。如果该函数能够生成一个非空的计划,则后续生成普通计划的步骤就不再进行,将这个计划进行完善后作为最终的计划。 

 \src\backend\optimizer\plan\Planagg.c

/*
 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
 *
 * Check to see whether the query contains MIN/MAX aggregate functions that
 * might be optimizable via indexscans.  If it does, and all the aggregates
 * are potentially optimizable, then create a MinMaxAggPath and add it to
 * the (UPPERREL_GROUP_AGG, NULL) upperrel.
 *
 * This should be called by grouping_planner() just before it's ready to call
 * query_planner(), because we generate indexscan paths by cloning the
 * planner's state and invoking query_planner() on a modified version of
 * the query parsetree.  Thus, all preprocessing needed before query_planner()
 * must already be done.
 *
 * Note: we are passed the preprocessed targetlist separately, because it's
 * not necessarily equal to root->parse->targetList.
 */
void
preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)

函数build_minmax_path的主要流程步骤:
(1)判断聚集与索引属性是否匹配,并且确定了索引扫描方向。主要通过函数match_agg_to_index_col来判断是否匹配,如果聚集的排序操作符与索引中前向扫描的操作符类匹配则返回ForwardScanDirection,进行前向扫描;未匹配,则判断聚集的排序操作符与索引中后向扫描的操作符类是否匹配,如果匹配则返回BackwardScanDirection;如果都不匹配则返回NoMovementScanDirection,表明不进行扫描。
(2)提取约束信息即按照怎样的约束条件通过索引获取元组
(3) 创建索引访问路径。通过函数create_index_path来实现,具体实现可以看我前一篇的博客,分析了创建索引扫描路径
(4)进行代价评估,并选取所有索引扫描路径中代价最小的路径 

  \src\backend\optimizer\plan\Planagg.c

/*
 * build_minmax_path
 *        Given a MIN/MAX aggregate, try to build an indexscan Path it can be
 *        optimized with.
 *
 * If successful, stash the best path in *mminfo and return TRUE.
 * Otherwise, return FALSE.
 */
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
                  Oid eqop, Oid sortop, bool nulls_first)

生成普通计划

如果preprocess_minmax_aggregates函数返回的是空值,则需要继续生成普通计划。生成普通计划的入口函数是create_plan,这个函数为最优路径创建计划,根据路径节点的不同类型,分别调用不同的函数生成相应的计划。扫描计划,连接计划,其他计划

/*
 * create_plan
 *      Creates the access plan for a query by recursively processing the
 *      desired tree of pathnodes, starting at the node 'best_path'.  For
 *      every pathnode found, we create a corresponding plan node containing
 *      appropriate id, target list, and qualification information.
 *
 *      The tlists and quals in the plan tree are still in planner format,
 *      ie, Vars still correspond to the parser's numbering.  This will be
 *      fixed later by setrefs.c.
 *
 *      best_path is the best access path
 *
 *      Returns a Plan tree.
 */
Plan *
create_plan(PlannerInfo *root, Path *best_path)
{
    Plan       *plan;

    /* plan_params should not be in use in current query level */
    Assert(root->plan_params == NIL);

    /* Initialize this module's private workspace in PlannerInfo */
    root->curOuterRels = NULL;
    root->curOuterParams = NIL;
#ifdef XCP
    root->curOuterRestrict = NULL;
    adjust_subplan_distribution(root, root->distribution,
                              best_path->distribution);
#endif

    /* Recursively process the path tree, demanding the correct tlist result */
    plan = create_plan_recurse(root, best_path, CP_EXACT_TLIST);

    /*
     * Make sure the topmost plan node's targetlist exposes the original
     * column names and other decorative info.  Targetlists generated within
     * the planner don't bother with that stuff, but we must have it on the
     * top-level tlist seen at execution time.  However, ModifyTable plan
     * nodes don't have a tlist matching the querytree targetlist.
     */
    if (!IsA(plan, ModifyTable))
        apply_tlist_labeling(plan->targetlist, root->processed_tlist);

    /*
     * Attach any initPlans created in this query level to the topmost plan
     * node.  (In principle the initplans could go in any plan node at or
     * above where they're referenced, but there seems no reason to put them
     * any lower than the topmost node for the query level.  Also, see
     * comments for SS_finalize_plan before you try to change this.)
     */
    SS_attach_initplans(root, plan);

    /* Check we successfully assigned all NestLoopParams to plan nodes */
    if (root->curOuterParams != NIL)
        elog(ERROR, "failed to assign all NestLoopParams to plan nodes");

    /*
     * Reset plan_params to ensure param IDs used for nestloop params are not
     * re-used later
     */
    root->plan_params = NIL;

    return plan;
}

   create_plan 调用 create_plan_recurse 函数实现递归的生产查询计划,

/*
 * create_plan_recurse
 *      Recursive guts of create_plan().
 */
static Plan *
create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
{// #lizard forgives
    Plan       *plan;

    switch (best_path->pathtype)
    {
        case T_SeqScan:
        case T_SampleScan:
        case T_IndexScan:
        case T_IndexOnlyScan:
        case T_BitmapHeapScan:
        case T_TidScan:
        case T_SubqueryScan:
        case T_FunctionScan:
        case T_TableFuncScan:
        case T_ValuesScan:
        case T_CteScan:
        case T_WorkTableScan:
        case T_NamedTuplestoreScan:
        case T_ForeignScan:
        case T_CustomScan:
            plan = create_scan_plan(root, best_path, flags);
            break;
#ifdef XCP
        case T_RemoteSubplan:
            remote_subplan_depth++;
            plan = (Plan *) create_remotescan_plan(root,
                                                   (RemoteSubPath *) best_path);
            remote_subplan_depth--;
            break;
#endif
        case T_HashJoin:
        case T_MergeJoin:
        case T_NestLoop:
            plan = create_join_plan(root,
                                    (JoinPath *) best_path);
            break;
        case T_Append:
            plan = create_append_plan(root,
                                      (AppendPath *) best_path);
            break;
        case T_MergeAppend:
            plan = create_merge_append_plan(root,
                                            (MergeAppendPath *) best_path);
            break;
        case T_Result:
            if (IsA(best_path, ProjectionPath))
            {
                plan = create_projection_plan(root,
                                              (ProjectionPath *) best_path);
            }
            else if (IsA(best_path, MinMaxAggPath))
            {
                plan = (Plan *) create_minmaxagg_plan(root,
                                                      (MinMaxAggPath *) best_path);
            }
            else
            {
                Assert(IsA(best_path, ResultPath));
                plan = (Plan *) create_result_plan(root,
                                                   (ResultPath *) best_path);
            }
            break;
        case T_ProjectSet:
            plan = (Plan *) create_project_set_plan(root,
                                                    (ProjectSetPath *) best_path);
            break;
        case T_Material:
            plan = (Plan *) create_material_plan(root,
                                                 (MaterialPath *) best_path,
                                                 flags);
            break;
        case T_Unique:
            if (IsA(best_path, UpperUniquePath))
            {
                plan = (Plan *) create_upper_unique_plan(root,
                                                         (UpperUniquePath *) best_path,
                                                         flags);
            }
            else
            {
                Assert(IsA(best_path, UniquePath));
                plan = create_unique_plan(root,
                                          (UniquePath *) best_path,
                                          flags);
            }
            break;
        case T_Gather:
            plan = (Plan *) create_gather_plan(root,
                                               (GatherPath *) best_path);
            break;
        case T_Sort:
            plan = (Plan *) create_sort_plan(root,
                                             (SortPath *) best_path,
                                             flags);
            break;
        case T_Group:
            plan = (Plan *) create_group_plan(root,
                                              (GroupPath *) best_path);
            break;
        case T_Agg:
            if (IsA(best_path, GroupingSetsPath))
                plan = create_groupingsets_plan(root,
                                                (GroupingSetsPath *) best_path);
            else
            {
                Assert(IsA(best_path, AggPath));
                plan = (Plan *) create_agg_plan(root,
                                                (AggPath *) best_path);
            }
            break;
        case T_WindowAgg:
            plan = (Plan *) create_windowagg_plan(root,
                                                  (WindowAggPath *) best_path);
            break;
        case T_SetOp:
            plan = (Plan *) create_setop_plan(root,
                                              (SetOpPath *) best_path,
                                              flags);
            break;
        case T_RecursiveUnion:
            plan = (Plan *) create_recursiveunion_plan(root,
                                                       (RecursiveUnionPath *) best_path);
            break;
        case T_LockRows:
            plan = (Plan *) create_lockrows_plan(root,
                                                 (LockRowsPath *) best_path,
                                                 flags);
            break;
        case T_ModifyTable:
            plan = (Plan *) create_modifytable_plan(root,
                                                    (ModifyTablePath *) best_path);
            break;
        case T_Limit:
            plan = (Plan *) create_limit_plan(root,
                                              (LimitPath *) best_path,
                                              flags, 0, 1);
            break;
        case T_GatherMerge:
            plan = (Plan *) create_gather_merge_plan(root,
                                                     (GatherMergePath *) best_path);
            break;
        default:
            elog(ERROR, "unrecognized node type: %d",
                 (int) best_path->pathtype);
            plan = NULL;        /* keep compiler quiet */
            break;
    }

    return plan;
}

 【生成分布式计划】

......

#ifdef XCP
        case T_RemoteSubplan:
            remote_subplan_depth++;
            plan = (Plan *) create_remotescan_plan(root,
                                                   (RemoteSubPath *) best_path);
            remote_subplan_depth--;
            break;
#endif

......

 create_remotescan_plan 调用 adjust_subplan_distribution函数,确认计划的分布信息和消费者接收信息是匹配的。

/*
 * adjust_subplan_distribution
 *     Make sure the distribution of the subplan is matching to the consumers.
 */
static void
adjust_subplan_distribution(PlannerInfo *root, Distribution *pathd,
                          Distribution *subd)

create_remotescan_plan 调用make_remotesubplan 函数会生成一个RemoteSubPlan,它和PG原有的plan是一样的,只不过它在执行的时候是读取从网络(而不是内存或磁盘)得到的中间结果。在makeplan的时候,需要计算源分布,结果分布和排序。
对于不同的操作join,scan,group by都有着不同的分布方式,不过得到的plan是一致的。 

/*
 * make_remotesubplan
 *     Create a RemoteSubplan node to execute subplan on remote nodes.
 *  leftree - the subplan which we want to push down to remote node.
 *  resultDistribution - the distribution of the remote result. May be NULL -
 * results are coming to the invoking node
 *  execDistribution - determines how source data of the subplan are
 * distributed, where we should send the subplan and how combine results.
 *    pathkeys - the remote subplan is sorted according to these keys, executor
 *         should perform merge sort of incoming tuples
 */
RemoteSubplan *
make_remotesubplan(PlannerInfo *root,
                   Plan *lefttree,
                   Distribution *resultDistribution,
                   Distribution *execDistribution,
                   List *pathkeys)

{

......

            /*
              * calculation of parallel workers is from compute_parallel_worker
              */
            double outer_rows              = lefttree->lefttree ? lefttree->lefttree->plan_rows : lefttree->plan_rows;
            double inner_rows              = lefttree->righttree ? lefttree->righttree->plan_rows : 0;
            double rows                    = outer_rows > inner_rows ? 
                                             outer_rows : inner_rows;
            bool contain_nonparallel_hashjoin = contain_node_walker(lefttree, T_HashJoin, true);
            bool   need_parallel           = true;
            int parallel_workers           = 0;

            /* if contain nonparallel hashjoin, don't add gather plan */
            if (contain_nonparallel_hashjoin)
            {
                need_parallel = false;
            }

            /* only add gather to remote_subplan at top */
            if (need_parallel && distributionType == LOCATOR_TYPE_NONE)
            {
                if (remote_subplan_depth > 1)
                    need_parallel = false;
            }

            if (need_parallel)
            {
                /* make decision whether we indeed need parallel execution or not ? */
                switch(nodeTag(lefttree))
                {
                    case T_SeqScan:
                        if (rows >= min_parallel_rows_size * 3)
                        {
                            lefttree->parallel_aware = true;
                        }
                        break;
                    case T_HashJoin:
                        if (!lefttree->parallel_aware)
                        {
                            need_parallel = false;
                        }
                        break;
                    case T_Agg:
                        if (!lefttree->parallel_aware)
                        {
                            Agg *node = (Agg *)lefttree;

                            /* do not parallel if it's not safe */
                            if (node->aggsplit == AGGSPLIT_INITIAL_SERIAL
                                && lefttree->parallel_safe)
                            {
                                switch(node->aggstrategy)
                                {
                                    case AGG_PLAIN:
                                    case AGG_HASHED:
                                        if (IsA(lefttree->lefttree, SubqueryScan))
                                        {
                                            SubqueryScan *subscan = (SubqueryScan *)lefttree->lefttree;

                                            if (!subscan->subplan->parallel_aware)
                                            {
                                                need_parallel = false;
                                            }
                                        }
                                        else
                                        {
                                            if (!lefttree->lefttree->parallel_aware)
                                            {
                                                need_parallel = false;
                                            }
                                        }
                                        break;
                                    case AGG_SORTED:
                                        {
                                            Plan *lplan = NULL;
                                            
                                            if (IsA(lefttree->lefttree, Sort))
                                            {
                                                lplan = lefttree->lefttree->lefttree;
                                            }
                                            else
                                            {
                                                lplan = lefttree->lefttree;
                                            }

                                            if (IsA(lplan, SubqueryScan))
                                            {
                                                SubqueryScan *subscan = (SubqueryScan *)lplan;

                                                if (!subscan->subplan->parallel_aware)
                                                {
                                                    need_parallel = false;
                                                }
                                            }
                                            else
                                            {
                                                if (!lplan->parallel_aware)
                                                {
                                                    need_parallel = false;
                                                }
                                            }
                                        }
                                        break;
                                    default:
                                        need_parallel = false;
                                        break;
                                }
                            }
                            else
                            {
                                need_parallel = false;
                            }
                        }
                        break;
                    case T_Group:
                        if (!lefttree->parallel_aware)
                        {
                            need_parallel = false;
                        }
                        break;
                    case T_Limit:
                        {
                            Plan *left = lefttree->lefttree;
                            gather_parent = lefttree;

                            if (nodeTag(left) == T_Sort)
                            {
                                gather_parent = left;
                                left = left->lefttree;
                                need_sort = false;
                            }

                            if (!left->parallel_aware)
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    gather_left = left;
                                    
                                    left = left->lefttree;

                                    if (IsA(left, SubqueryScan))
                                    {
                                        SubqueryScan *subscan = (SubqueryScan *)left;

                                        left = subscan->subplan;
                                    }

                                    if (!left->parallel_aware)
                                        need_parallel = false;
                                    else
                                    {
                                        rows = left->lefttree->plan_rows;
                                    }
                                }
                                else
                                    need_parallel = false;
                            }
                            else
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    rows = left->lefttree->plan_rows;
                                }

                                gather_left = left;
                            }
                        }
                        break;
                    case T_Sort:
                        {
                            Plan *left = lefttree->lefttree;
                            gather_parent = lefttree;
                            need_sort = false;

                            if (!left->parallel_aware)
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    gather_left = left;
                                    
                                    left = left->lefttree;

                                    if (IsA(left, SubqueryScan))
                                    {
                                       

       SubqueryScan *subscan = (SubqueryScan *)lplan;

                                                if (!subscan->subplan->parallel_aware)
                                                {
                                                    need_parallel = false;
                                                }
                                            }
                                            else
                                            {
                                                if (!lplan->parallel_aware)
                                                {
                                                    need_parallel = false;
                                                }
                                            }
                                        }
                                        break;
                                    default:
                                        need_parallel = false;
                                        break;
                                }
                            }
                            else
                            {
                                need_parallel = false;
                            }
                        }
                        break;
                    case T_Group:
                        if (!lefttree->parallel_aware)
                        {
                            need_parallel = false;
                        }
                        break;
                    case T_Limit:
                        {
                            Plan *left = lefttree->lefttree;
                            gather_parent = lefttree;

                            if (nodeTag(left) == T_Sort)
                            {
                                gather_parent = left;
                                left = left->lefttree;
                                need_sort = false;
                            }

                            if (!left->parallel_aware)
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    gather_left = left;
                                    
                                    left = left->lefttree;

                                    if (IsA(left, SubqueryScan))
                                    {
                                        SubqueryScan *subscan = (SubqueryScan *)left;

                                        left = subscan->subplan;
                                    }

                                    if (!left->parallel_aware)
                                        need_parallel = false;
                                    else
                                    {
                                        rows = left->lefttree->plan_rows;
                                    }
                                }
                                else
                                    need_parallel = false;
                            }
                            else
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    rows = left->lefttree->plan_rows;
                                }

                                gather_left = left;
                            }
                        }
                        break;
                    case T_Sort:
                        {
                            Plan *left = lefttree->lefttree;
                            gather_parent = lefttree;
                            need_sort = false;

                            if (!left->parallel_aware)
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    gather_left = left;
                                    
                                    left = left->lefttree;

                                    if (IsA(left, SubqueryScan))
                                    {
                                        SubqueryScan *subscan = (SubqueryScan *)left;

                                        left = subscan->subplan;
                                    }

                                    if (!left->parallel_aware)
                                        need_parallel = false;
                                    else
                                    {
                                        rows = left->lefttree->plan_rows;
                                    }
                                }
                                else
                                    need_parallel = false;
                            }
                            else
                            {
                                if (nodeTag(left) == T_Agg)
                                {
                                    rows = left->lefttree->plan_rows;
                                }

                                gather_left = left;
                            }
                        }
                        break;
                    default:
                        if (!lefttree->parallel_aware)
                        {
                            need_parallel = false;
                        }
                        break;
                }
            }

......

}

[create_plan_recurse--生成扫描计划create_scan_plan]

/*
 * create_plan_recurse
 *      Recursive guts of create_plan().
 */
static Plan *
create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
{// #lizard forgives
    Plan       *plan;

    switch (best_path->pathtype)
    {
        case T_SeqScan:
        case T_SampleScan:
        case T_IndexScan:
        case T_IndexOnlyScan:
        case T_BitmapHeapScan:
        case T_TidScan:
        case T_SubqueryScan:
        case T_FunctionScan:
        case T_TableFuncScan:
        case T_ValuesScan:
        case T_CteScan:
        case T_WorkTableScan:
        case T_NamedTuplestoreScan:
        case T_ForeignScan:
        case T_CustomScan:
            plan = create_scan_plan(root, best_path, flags);
            break;

........

}

/*
 * create_scan_plan
 *     Create a scan plan for the parent relation of 'best_path'.
 */
static Plan *
create_scan_plan(PlannerInfo *root, Path *best_path, int flags)

{

......

   switch (best_path->pathtype)
    {
        case T_SeqScan:
            plan = (Plan *) create_seqscan_plan(root,
                                                best_path,
                                                tlist,
                                                scan_clauses);
            break;

        case T_SampleScan:
            plan = (Plan *) create_samplescan_plan(root,
                                                   best_path,
                                                   tlist,
                                                   scan_clauses);
            break;

        case T_IndexScan:
            plan = (Plan *) create_indexscan_plan(root,
                                                  (IndexPath *) best_path,
                                                  tlist,
                                                  scan_clauses,
                                                  false);
            break;

        case T_IndexOnlyScan:
            plan = (Plan *) create_indexscan_plan(root,
                                                  (IndexPath *) best_path,
                                                  tlist,
                                                  scan_clauses,
                                                  true);
            break;

        case T_BitmapHeapScan:
            plan = (Plan *) create_bitmap_scan_plan(root,
                                                    (BitmapHeapPath *) best_path,
                                                    tlist,
                                                    scan_clauses);
            break;

        case T_TidScan:
            plan = (Plan *) create_tidscan_plan(root,
                                                (TidPath *) best_path,
                                                tlist,
                                                scan_clauses);
            break;

        case T_SubqueryScan:
            plan = (Plan *) create_subqueryscan_plan(root,
                                                     (SubqueryScanPath *) best_path,
                                                     tlist,
                                                     scan_clauses);
            break;

        case T_FunctionScan:
            plan = (Plan *) create_functionscan_plan(root,
                                                     best_path,
                                                     tlist,
                                                     scan_clauses);
            break;

        case T_TableFuncScan:
            plan = (Plan *) create_tablefuncscan_plan(root,
                                                      best_path,
                                                      tlist,
                                                      scan_clauses);
            break;

        case T_ValuesScan:
            plan = (Plan *) create_valuesscan_plan(root,
                                                   best_path,
                                                   tlist,
                                                   scan_clauses);
            break;

        case T_CteScan:
            plan = (Plan *) create_ctescan_plan(root,
                                                best_path,
                                                tlist,
                                                scan_clauses);
            break;

        case T_NamedTuplestoreScan:
            plan = (Plan *) create_namedtuplestorescan_plan(root,
                                                            best_path,
                                                            tlist,
                                                            scan_clauses);
            break;

        case T_WorkTableScan:
            plan = (Plan *) create_worktablescan_plan(root,
                                                      best_path,
                                                      tlist,
                                                      scan_clauses);
            break;

        case T_ForeignScan:
            plan = (Plan *) create_foreignscan_plan(root,
                                                    (ForeignPath *) best_path,
                                                    tlist,
                                                    scan_clauses);
            break;

        case T_CustomScan:
            plan = (Plan *) create_customscan_plan(root,
                                                   (CustomPath *) best_path,
                                                   tlist,
                                                   scan_clauses);
            break;

       default:
            elog(ERROR, "unrecognized node type: %d",
                 (int) best_path->pathtype);
            plan = NULL;        /* keep compiler quiet */
            break;
    }

   ......

#ifdef __TBASE__
    if((rel->reloptkind == RELOPT_BASEREL || rel->reloptkind == RELOPT_OTHER_MEMBER_REL) && rel->rtekind == RTE_RELATION && rel->intervalparent && !rel->isdefault)
    {        
        /* create append plan with a list of scan.
          * If partition parent relation is target realtion of UPDATE or DELETE statement,
          * then compute pruning result and not need to expand.
          * in case of this, target relation will be expend in ModifyTable operator.
          */
        if(!bms_is_empty(rel->childs) 
            && (root->parse->resultRelation != rel->relid || root->parse->commandType == CMD_INSERT))
        {            
            List *scanlist;
            Scan *child;
            Scan **child_array = NULL;
            int arrlen;
            int arridx;
            int idx;
            Bitmapset *parts;

            arrlen = bms_num_members(rel->childs);
            parts = bms_copy(rel->childs);
            child_array = (Scan **)palloc0(arrlen*sizeof(Scan*));
            scanlist = NULL;
            child = NULL;    
            arridx = 0;
            while((idx = bms_first_member(parts)) != -1)
            {
                child = (Scan *)copyObject((void*)plan);
                child->ispartchild = true;
                child->childidx = idx;
                switch(nodeTag(child))
                {
                    case T_SeqScan:
                    case T_SampleScan:
                        break;
                    case T_IndexScan:
                        {
                            IndexScan *idxscan_child;
                            idxscan_child = (IndexScan *)child;                 
                            idxscan_child->indexid = RelationGetPartitionIndex(relation,idxscan_child->indexid,idx);
                        }
                        break;
                    case T_IndexOnlyScan:
                        {
                            IndexOnlyScan *idxonlyscan_child;
                            idxonlyscan_child = (IndexOnlyScan *)child;                 
                            idxonlyscan_child->indexid = RelationGetPartitionIndex(relation,idxonlyscan_child->indexid,idx);
                        }
                        break;
                    case T_BitmapHeapScan:
                        {
                            //BitmapHeapScan *bscan;
                            //bscan = (BitmapHeapScan *)child;
                            replace_partidx_bitmapheapscan(relation,(Node*)child->plan.lefttree,idx);
                        }
                        break;
                    case T_TidScan:
                        elog(ERROR,"cannot use tidscan on partitioned table.");
                        break;
                    default:
                        elog(ERROR,"create scan plan: unsupported scan method[%d] on partitioned table.", nodeTag(child));
                        break;
                }            

                child_array[arridx++] = child;

                //scanlist = lappend(scanlist,child);
            }
......

#endif

扫描计划

【顺序扫描(全表扫描)】

函数create_seqscan_plan是主要用于生成顺序扫描计划,该函数最后会返回一个类型为SeqScan的结构。

create_seqscan_plan函数 

 \src\backend\optimizer\plan\Createplan.c

/*
 * create_seqscan_plan
 *     Returns a seqscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static SeqScan *
create_seqscan_plan(PlannerInfo *root, Path *best_path,
                    List *tlist, List *scan_clauses)

 【抽样扫描】create_samplescan_plan

/*
 * create_samplescan_plan
 *     Returns a samplescan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static SampleScan *
create_samplescan_plan(PlannerInfo *root, Path *best_path,
                       List *tlist, List *scan_clauses)

 【索引扫描】

\src\backend\optimizer\plan\Createplan.c      

 create_indexscan_plan

/*
 * create_indexscan_plan
 *      Returns an indexscan plan for the base relation scanned by 'best_path'
 *      with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 *
 * We use this for both plain IndexScans and IndexOnlyScans, because the
 * qual preprocessing work is the same for both.  Note that the caller tells
 * us which to build --- we don't look at best_path->path.pathtype, because
 * create_bitmap_subplan needs to be able to override the prior decision.
 */
static Scan *
create_indexscan_plan(PlannerInfo *root,
                      IndexPath *best_path,
                      List *tlist,
                      List *scan_clauses,
                      bool indexonly)

 【位图扫描】

/*
 * create_bitmap_scan_plan
 *      Returns a bitmap scan plan for the base relation scanned by 'best_path'
 *      with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static BitmapHeapScan *
create_bitmap_scan_plan(PlannerInfo *root,
                        BitmapHeapPath *best_path,
                        List *tlist,
                        List *scan_clauses)

 【TID扫描】create_tidscan_plan

/*
 * create_tidscan_plan
 *     Returns a tidscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static TidScan *
create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
                    List *tlist, List *scan_clauses)

【子查询扫描】    create_subqueryscan_plan

/*
 * create_subqueryscan_plan
 *     Returns a subqueryscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static SubqueryScan *
create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path,
                         List *tlist, List *scan_clauses)

【函数扫描】create_functionscan_plan 

/*
 * create_functionscan_plan
 *     Returns a functionscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static FunctionScan *
create_functionscan_plan(PlannerInfo *root, Path *best_path,
                         List *tlist, List *scan_clauses)

 【表函数扫描】create_tablefuncscan_plan

/*
 * create_tablefuncscan_plan
 *     Returns a tablefuncscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static TableFuncScan *
create_tablefuncscan_plan(PlannerInfo *root, Path *best_path,
                          List *tlist, List *scan_clauses)

【数值扫描】create_valuesscan_plan

/*
 * create_valuesscan_plan
 *     Returns a valuesscan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static ValuesScan *
create_valuesscan_plan(PlannerInfo *root, Path *best_path,
                       List *tlist, List *scan_clauses)

 【CTE扫描】create_ctescan_plan

/*
 * create_ctescan_plan
 *     Returns a ctescan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static CteScan *
create_ctescan_plan(PlannerInfo *root, Path *best_path,
                    List *tlist, List *scan_clauses)

【命名元组集合扫描】create_namedtuplestorescan_plan

 /*
 * create_namedtuplestorescan_plan
 *     Returns a tuplestorescan plan for the base relation scanned by
 *    'best_path' with restriction clauses 'scan_clauses' and targetlist
 *    'tlist'.
 */
static NamedTuplestoreScan *
create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path,
                                List *tlist, List *scan_clauses)

【工作表扫描】create_worktablescan_plan

 /*
 * create_worktablescan_plan
 *     Returns a worktablescan plan for the base relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static WorkTableScan *
create_worktablescan_plan(PlannerInfo *root, Path *best_path,
                          List *tlist, List *scan_clauses)

【外部表扫描】create_foreignscan_plan 

/*
 * create_foreignscan_plan
 *     Returns a foreignscan plan for the relation scanned by 'best_path'
 *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 */
static ForeignScan *
create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
                        List *tlist, List *scan_clauses)

 【定制扫描】create_customscan_plan

/*
 * create_custom_plan
 *
 * Transform a CustomPath into a Plan.
 */
static CustomScan *
create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
                       List *tlist, List *scan_clauses)

连接计划——嵌套循环连接

嵌套循环连接是由函数create_nestloop_plan生成,该函数具有和前面分析过的create_seqscan相同的两个参数root和best_path。create_nestloop_plan函数将会创建一个嵌套循环连接计划节点,并连接子计划树(outer_plan和inner_plan)到该节点,最终形成嵌套循环连接计划并返回。

create_nestloop_plan     \src\backend\optimizer\plan\Createplan.c

static NestLoop *
create_nestloop_plan(PlannerInfo *root,
                     NestPath *best_path)

连接计划—哈希连接

tatic HashJoin *
create_hashjoin_plan(PlannerInfo *root,
                     HashPath *best_path)

连接计划—归并连接

static MergeJoin *
create_mergejoin_plan(PlannerInfo *root,
                      MergePath *best_path)

 生成路径仅仅考虑基本查询语句信息,并没有保留诸如GROUP BY,ORDER BY等信息。grouping_planner函数调用create_plan生成基本计划树后,则会依据查询树相关约束信息在前面生成的普通计划之上添加相应的计划节点生成完整计划。

grouping_planner函数调  对 SPJ 操作进行优化,例如进行谓词下推、计算路径代价 产生最 路径等 这部分主 要在 query_p Janner 函数中完成。
SPJ 操作的最优路径的基础上,叠加Non-SPJ 操作路径, 如生成基于 group By
,LIMIT ,ORDER BY 的执行路径。

 /*--------------------
 * grouping_planner
 *      Perform planning steps related to grouping, aggregation, etc.
 *
 * This function adds all required top-level processing to the scan/join
 * Path(s) produced by query_planner.
 *
 * If inheritance_update is true, we're being called from inheritance_planner
 * and should not include a ModifyTable step in the resulting Path(s).
 * (inheritance_planner will create a single ModifyTable node covering all the
 * target tables.)
 *
 * tuple_fraction is the fraction of tuples we expect will be retrieved.
 * tuple_fraction is interpreted as follows:
 *      0: expect all tuples to be retrieved (normal case)
 *      0 < tuple_fraction < 1: expect the given fraction of tuples available
 *        from the plan to be retrieved
 *      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
 *        expected to be retrieved (ie, a LIMIT specification)
 *
 * Returns nothing; the useful output is in the Paths we attach to the
 * (UPPERREL_FINAL, NULL) upperrel in *root.  In addition,
 * root->processed_tlist contains the final processed targetlist.
 *
 * Note that we have not done set_cheapest() on the final rel; it's convenient
 * to leave this to the caller.
 *--------------------
 */
static void
grouping_planner(PlannerInfo *root, bool inheritance_update,
                 double tuple_fraction)

{

...

 /* Make tuple_fraction accessible to lower-level routines */
    root->tuple_fraction = tuple_fraction;

    if (parse->setOperations)
    {

/*
         * If there's a top-level ORDER BY, assume we have to fetch all the
         * tuples.  This might be too simplistic given all the hackery below
         * to possibly avoid the sort; but the odds of accurate estimates here
         * are pretty low anyway.  XXX try to get rid of this in favor of
         * letting plan_set_operations generate both fast-start and
         * cheapest-total paths.
         */
        if (parse->sortClause)
            root->tuple_fraction = 0.0;

        /*
         * Construct Paths for set operations.  The results will not need any
         * work except perhaps a top-level sort and/or LIMIT.  Note that any
         * special work for recursive unions is the responsibility of
         * plan_set_operations.
         */
        current_rel = plan_set_operations(root);

        /*
         * We should not need to call preprocess_targetlist, since we must be
         * in a SELECT query node.  Instead, use the targetlist returned by
         * plan_set_operations (since this tells whether it returned any
         * resjunk columns!), and transfer any sort key information from the
         * original tlist.
         */
        Assert(parse->commandType == CMD_SELECT);

        tlist = root->processed_tlist;    /* from plan_set_operations */

        /* for safety, copy processed_tlist instead of modifying in-place */
        tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);

        /* Save aside the final decorated tlist */
        root->processed_tlist = tlist;

        /* Also extract the PathTarget form of the setop result tlist */
        final_target = current_rel->cheapest_total_path->pathtarget;

        /* And check whether it's parallel safe */
        final_target_parallel_safe =
            is_parallel_safe(root, (Node *) final_target->exprs);

        /* The setop result tlist couldn't contain any SRFs */
        Assert(!parse->hasTargetSRFs);
        final_targets = final_targets_contain_srfs = NIL;

        /*
         * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
         * checked already, but let's make sure).
         */
        if (parse->rowMarks)
            ereport(ERROR,
                    (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
            /*------
              translator: %s is a SQL row locking clause such as FOR UPDATE */
                     errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
                            LCS_asString(((RowMarkClause *)
                                          linitial(parse->rowMarks))->strength))));

        /*
         * Calculate pathkeys that represent result ordering requirements
         */

Assert(parse->distinctClause == NIL);
        root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
                                                            parse->sortClause,
                                                            tlist);
    }
    else
    {

     ...

     

/* Preprocess grouping sets and GROUP BY clause, if any */
        if (parse->groupingSets)
        {
            gset_data = preprocess_grouping_sets(root);
        }
        else
        {
            /* Preprocess regular GROUP BY clause, if any */
            if (parse->groupClause)
                parse->groupClause = preprocess_groupclause(root, NIL);
        }

        /* Preprocess targetlist */
        tlist = preprocess_targetlist(root, tlist);

        if (parse->onConflict)
            parse->onConflict->onConflictSet =
                preprocess_onconflict_targetlist(parse->onConflict->onConflictSet,
                                                 parse->resultRelation,
                                                 parse->rtable);

        /*
         * We are now done hacking up the query's targetlist.  Most of the
         * remaining planning work will be done with the PathTarget
         * representation of tlists, but save aside the full representation so
         * that we can transfer its decoration (resnames etc) to the topmost
         * tlist of the finished Plan.
         */
        root->processed_tlist = tlist;

        /*
         * Collect statistics about aggregates for estimating costs, and mark
         * all the aggregates with resolved aggtranstypes.  We must do this
         * before slicing and dicing the tlist into various pathtargets, else
         * some copies of the Aggref nodes might escape being marked with the
         * correct transtypes.
         *
         * Note: currently, we do not detect duplicate aggregates here.  This
         * may result in somewhat-overestimated cost, which is fine for our
         * purposes since all Paths will get charged the same.  But at some
         * point we might wish to do that detection in the planner, rather
         * than during executor startup.
         */
        MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
        if (parse->hasAggs)
        {
            get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
                                 &agg_costs);
            get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
                                 &agg_costs);
        }

        /*
         * Locate any window functions in the tlist.  (We don't need to look
         * anywhere else, since expressions used in ORDER BY will be in there
         * too.)  Note that they could all have been eliminated by constant
         * folding, in which case we don't need to do any more work.
         */
        if (parse->hasWindowFuncs)
        {
            wflists = find_window_functions((Node *) tlist,
                                            list_length(parse->windowClause));
            if (wflists->numWindowFuncs > 0)
                activeWindows = select_active_windows(root, wflists);
            else
                parse->hasWindowFuncs = false;
        }

        /*
         * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
         * adding logic between here and the query_planner() call.  Anything
         * that is needed in MIN/MAX-optimizable cases will have to be
         * duplicated in planagg.c.
         */
        if (parse->hasAggs)
            preprocess_minmax_aggregates(root, tlist);

        /*
         * Figure out whether there's a hard limit on the number of rows that
         * query_planner's result subplan needs to return.  Even if we know a
         * hard limit overall, it doesn't apply if the query has any
         * grouping/aggregation operations, or SRFs in the tlist.
         */
        if (parse->groupClause ||
            parse->groupingSets ||
            parse->distinctClause ||
            parse->hasAggs ||
            parse->hasWindowFuncs ||
            parse->hasTargetSRFs ||
            root->hasHavingQual)
            root->limit_tuples = -1.0;
        else
            root->limit_tuples = limit_tuples;

        /* Set up data needed by standard_qp_callback */
        qp_extra.tlist = tlist;
        qp_extra.activeWindows = activeWindows;
        qp_extra.groupClause = (gset_data
                                ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
                                : parse->groupClause);

        /*
         * Generate the best unsorted and presorted paths for the scan/join
         * portion of this Query, ie the processing represented by the
         * FROM/WHERE clauses.  (Note there may not be any presorted paths.)
         * We also generate (in standard_qp_callback) pathkey representations
         * of the query's sort clause, distinct clause, etc.
         */
        current_rel = query_planner(root, tlist,
                                    standard_qp_callback, &qp_extra);

        /*
         * Convert the query's result tlist into PathTarget format.
         *
         * Note: it's desirable to not do this till after query_planner(),
         * because the target width estimates can use per-Var width numbers
         * that were obtained within query_planner().
         */
        final_target = create_pathtarget(root, tlist);
        final_target_parallel_safe =
            is_parallel_safe(root, (Node *) final_target->exprs);

        /*
         * If ORDER BY was given, consider whether we should use a post-sort
         * projection, and compute the adjusted target for preceding steps if
         * so.
         */
        if (parse->sortClause)
        {
            sort_input_target = make_sort_input_target(root,
                                                       final_target,
                                                       &have_postponed_srfs);
            sort_input_target_parallel_safe =
                is_parallel_safe(root, (Node *) sort_input_target->exprs);
        }
        else
        {
            sort_input_target = final_target;
             

            sort_input_target_parallel_safe = final_target_parallel_safe;
        }

        /*
         * If we have window functions to deal with, the output from any
         * grouping step needs to be what the window functions want;
         * otherwise, it should be sort_input_target.
         */
        if (activeWindows)
        {
            grouping_target = make_window_input_target(root,
                                                       final_target,
                                                       activeWindows);
            grouping_target_parallel_safe =
                is_parallel_safe(root, (Node *) grouping_target->exprs);
        }
        else
        {
            grouping_target = sort_input_target;
            grouping_target_parallel_safe = sort_input_target_parallel_safe;
        }

        /*
         * If we have grouping or aggregation to do, the topmost scan/join
         * plan node must emit what the grouping step wants; otherwise, it
         * should emit grouping_target.
         */
        have_grouping = (parse->groupClause || parse->groupingSets ||
                         parse->hasAggs || root->hasHavingQual);
        if (have_grouping)
        {
            scanjoin_target = make_group_input_target(root, final_target);
            scanjoin_target_parallel_safe =
                is_parallel_safe(root, (Node *) grouping_target->exprs);
        }
        else
        {
            scanjoin_target = grouping_target;
            scanjoin_target_parallel_safe = grouping_target_parallel_safe;
        }

        /*
         * If there are any SRFs in the targetlist, we must separate each of
         * these PathTargets into SRF-computing and SRF-free targets.  Replace
         * each of the named targets with a SRF-free version, and remember the
         * list of additional projection steps we need to add afterwards.
         */
        if (parse->hasTargetSRFs)
        {
            /* final_target doesn't recompute any SRFs in sort_input_target */
            split_pathtarget_at_srfs(root, final_target, sort_input_target,
                                     &final_targets,
                                     &final_targets_contain_srfs);
            final_target = (PathTarget *) linitial(final_targets);
            Assert(!linitial_int(final_targets_contain_srfs));
            /* likewise for sort_input_target vs. grouping_target */
            split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
                                     &sort_input_targets,
                                     &sort_input_targets_contain_srfs);
            sort_input_target = (PathTarget *) linitial(sort_input_targets);
            Assert(!linitial_int(sort_input_targets_contain_srfs));
            /* likewise for grouping_target vs. scanjoin_target */
            split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
                                     &grouping_targets,
                                     &grouping_targets_contain_srfs);
            grouping_target = (PathTarget *) linitial(grouping_targets);
            Assert(!linitial_int(grouping_targets_contain_srfs));
            /* scanjoin_target will not have any SRFs precomputed for it */
            split_pathtarget_at_srfs(root, scanjoin_target, NULL,
                                     &scanjoin_targets,
                                     &scanjoin_targets_contain_srfs);
            scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
            Assert(!linitial_int(scanjoin_targets_contain_srfs));
        }
        else
        {
            /* initialize lists, just to keep compiler quiet */
            final_targets = final_targets_contain_srfs = NIL;
            sort_input_targets = sort_input_targets_contain_srfs = NIL;
            grouping_targets = grouping_targets_contain_srfs = NIL;
            scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
        }

        /*
         * Generate Gather or Gather Merge paths for the topmost scan/join
         * relation.  Once that's done, we must re-determine which paths are
         * cheapest.  (The previously-cheapest path might even have been
         * pfree'd!)
         */
        generate_gather_paths(root, current_rel);
        set_cheapest(current_rel);

        /*
         * Forcibly apply SRF-free scan/join target to all the Paths for the
         * scan/join rel.
         *
         * In principle we should re-run set_cheapest() here to identify the
         * cheapest path, but it seems unlikely that adding the same tlist
         * eval costs to all the paths would change that, so we don't bother.
         * Instead, just assume that the cheapest-startup and cheapest-total
         * paths remain so.  (There should be no parameterized paths anymore,
         * so we needn't worry about updating cheapest_parameterized_paths.)
         */
        foreach(lc, current_rel->pathlist)
        {
            Path       *subpath = (Path *) lfirst(lc);
            Path       *path;

            Assert(subpath->param_info == NULL);
            path = apply_projection_to_path(root, current_rel,
                                            subpath, scanjoin_target);
            /* If we had to add a Result, path is different from subpath */
            if (path != subpath)
            {
                lfirst(lc) = path;
                if (subpath == current_rel->cheapest_startup_path)
                    current_rel->cheapest_startup_path = path;
                if (subpath == current_rel->cheapest_total_path)
                    current_rel->cheapest_total_path = path;
            }
        }

        /*
         * Upper planning steps which make use of the top scan/join rel's
         * partial pathlist will expect partial paths for that rel to produce
         * the same output as complete paths ... and we just changed the
         * output for the complete paths, so we'll need to do the same thing
         * for partial paths.  But only parallel-safe expressions can be
         * computed by partial paths.
         */
        if (current_rel->partial_pathlist && scanjoin_target_parallel_safe)
        {
            /* Apply the scan/join target to each partial path */
            foreach(lc, current_rel->partial_pathlist)
            {
                Path       *subpath = (Path *) lfirst(lc);
                Path       *newpath;

                /* Shouldn't have any parameterized paths anymore */
                Assert(subpath->param_info == NULL);

                /*
                 * Don't use apply_projection_to_path() here, because there
                 * could be other pointers to these paths, and therefore we
                 * mustn't modify them in place.
                 */
                newpath = (Path *) create_projection_path(root,
                                                          current_rel,
                                                          subpath,
                                                          scanjoin_target);
                lfirst(lc) = newpath;
            }
        }
        else
        {
            /*
             * In the unfortunate event that scanjoin_target is not
             * parallel-safe, we can't apply it to the partial paths; in that
             * case, we'll need to forget about the partial paths, which
             * aren't valid input for upper planning steps.
             */
            current_rel->partial_pathlist = NIL;
        }

        /* Now fix things up if scan/join target contains SRFs */
        if (parse->hasTargetSRFs)
            adjust_paths_for_srfs(root, current_rel,
                                  scanjoin_targets,
                                  scanjoin_targets_contain_srfs);

        /*
         * Save the various upper-rel PathTargets we just computed into
         * root->upper_targets[].  The core code doesn't use this, but it
         * provides a convenient place for extensions to get at the info.  For
         * consistency, we save all the intermediate targets, even though some
         * of the corresponding upperrels might not be needed for this query.
         */
        root->upper_targets[UPPERREL_FINAL] = final_target;
        root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
        root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;

        /*
         * If we have grouping and/or aggregation, consider ways to implement
         * that.  We build a new upperrel representing the output of this
         * phase.
         */
        if (have_grouping)
        {
            current_rel = create_grouping_paths(root,
                                                current_rel,
                                                grouping_target,
                                                grouping_target_parallel_safe,
                                                &agg_costs,
                                                gset_data);
            /* Fix things up if grouping_target contains SRFs */
            if (parse->hasTargetSRFs)
                adjust_paths_for_srfs(root, current_rel,
                                      grouping_targets,
                                      grouping_targets_contain_srfs);
        }

        /*
         * If we have window functions, consider ways to implement those.  We
         * build a new upperrel representing the output of this phase.
         */
        if (activeWindows)
        {
            current_rel = create_window_paths(root,
                                              current_rel,
                                              grouping_target,
                                              sort_input_target,
                                              sort_input_target_parallel_safe,
                                              tlist,
                                              wflists,
                                              activeWindows);
            /* Fix things up if sort_input_target contains SRFs */
            if (parse->hasTargetSRFs)
                adjust_paths_for_srfs(root, current_rel,
                                      sort_input_targets,
                                      sort_input_targets_contain_srfs);
        }

        /*
         * If there is a DISTINCT clause, consider ways to implement that. We
         * build a new upperrel representing the output of this phase.
         */
        if (parse->distinctClause)
        {
            current_rel = create_distinct_paths(root,
                                                current_rel);
        }
    }                            /* end of if (setOperations) */

    /*
     * If ORDER BY was given, consider ways to implement that, and generate a
     * new upperrel containing only paths that emit the correct ordering and
     * project the correct final_target.  We can apply the original
     * limit_tuples limit in sort costing here, but only if there are no
     * postponed SRFs.
     */
    if (parse->sortClause)
    {
        current_rel = create_ordered_paths(root,
                                           current_rel,
                                           final_target,
                                           final_target_parallel_safe,
                                           have_postponed_srfs ? -1.0 :
                                           limit_tuples);

       /* Fix things up if final_target contains SRFs */
        if (parse->hasTargetSRFs)
            adjust_paths_for_srfs(root, current_rel,
                                  final_targets,
                                  final_targets_contain_srfs);
    }

    /*
     * Now we are prepared to build the final-output upperrel.
     */
    final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);

    /*
     * If the input rel is marked consider_parallel and there's nothing that's
     * not parallel-safe in the LIMIT clause, then the final_rel can be marked
     * consider_parallel as well.  Note that if the query has rowMarks or is
     * not a SELECT, consider_parallel will be false for every relation in the
     * query.
     */
    if (current_rel->consider_parallel &&
        is_parallel_safe(root, parse->limitOffset) &&
        is_parallel_safe(root, parse->limitCount))
        final_rel->consider_parallel = true;

    /*
     * If the current_rel belongs to a single FDW, so does the final_rel.
     */
    final_rel->serverid = current_rel->serverid;
    final_rel->userid = current_rel->userid;
    final_rel->useridiscurrent = current_rel->useridiscurrent;
    final_rel->fdwroutine = current_rel->fdwroutine;

    /*
     * Generate paths for the final_rel.  Insert all surviving paths, with
     * LockRows, Limit, and/or ModifyTable steps added if needed.
     */
    foreach(lc, current_rel->pathlist)
    {
        Path       *path = (Path *) lfirst(lc);

        /*
         * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
         * (Note: we intentionally test parse->rowMarks not root->rowMarks
         * here.  If there are only non-locking rowmarks, they should be
         * handled by the ModifyTable node instead.  However, root->rowMarks
         * is what goes into the LockRows node.)
         */
        if (parse->rowMarks)
        {
            path = (Path *) create_lockrows_path(root, final_rel, path,
                                                 root->rowMarks,
                                                 SS_assign_special_param(root));
        }

        /*
         * If there is a LIMIT/OFFSET clause, add the LIMIT node.
         */
        if (limit_needed(parse))
        {
            /* If needed, add a LimitPath on top of a RemoteSubplan. */
            if (path->distribution)
            {
                /*
                 * Try to push down LIMIT clause to the remote node, in order
                 * to limit the number of rows that get shipped over network.
                 * This can be done even if there is an ORDER BY clause, as
                 * long as we fetch at least (limit + offset) rows from all the
                 * nodes and then do a local sort and apply the original limit.
                 *
                 * We can only push down the LIMIT clause when it's a constant.
                 * Similarly, if the OFFSET is specified, then it must be constant
                 * too.
                 *
                 * Simple expressions get folded into constants by the time we come
                 * here. So this works well in case of constant expressions such as
                 *
                 *     SELECT .. LIMIT (1024 * 1024);
                 */
                if (parse->limitCount && IsA(parse->limitCount, Const) &&
                    ((parse->limitOffset == NULL) || IsA(parse->limitOffset, Const)))
                {
                    Node *limitCount = (Node *) makeConst(INT8OID, -1,
                                                   InvalidOid,
                                                   sizeof(int64),
                                       Int64GetDatum(offset_est + count_est),
                                                   false, FLOAT8PASSBYVAL);

               

                    path = (Path *) create_limit_path(root, final_rel, path,
                                              NULL,
                                              limitCount, /* LIMIT + OFFSET */
                                              0, offset_est + count_est);
                }

                path = create_remotesubplan_path(root, path, NULL);
            }

            path = (Path *) create_limit_path(root, final_rel, path,
                                              parse->limitOffset,
                                              parse->limitCount,
                                              offset_est, count_est);
        }

        /*
         * If this is an INSERT/UPDATE/DELETE, and we're not being called from
         * inheritance_planner, add the ModifyTable node.
         */
        if (parse->commandType != CMD_SELECT && !inheritance_update)
        {
            List       *withCheckOptionLists;
            List       *returningLists;
            List       *rowMarks;

            /*
             * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
             * needed.
             */
            if (parse->withCheckOptions)
                withCheckOptionLists = list_make1(parse->withCheckOptions);
            else
                withCheckOptionLists = NIL;

            if (parse->returningList)
                returningLists = list_make1(parse->returningList);
            else
                returningLists = NIL;

            /*
             * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
             * will have dealt with fetching non-locked marked rows, else we
             * need to have ModifyTable do that.
             */
            if (parse->rowMarks)
                rowMarks = NIL;
            else
                rowMarks = root->rowMarks;

            /*
             * Adjust path by injecting a remote subplan, if appropriate, so
             * that the ModifyTablePath gets properly distributed data.
             */
            path = adjust_path_distribution(root, parse, path);

#ifdef __TBASE__
            /*
             * unshippable triggers found on target relation, we have to do DML
             * on coordinator.
             */
            if (parse->hasUnshippableTriggers)
            {
                if (path->distribution)
                {
                    path = adjust_modifytable_subpath(root, parse, path);
                }
            }
#endif

            path = (Path *)
                create_modifytable_path(root, final_rel,
                                        parse->commandType,
                                        parse->canSetTag,
                                        parse->resultRelation,
                                        NIL,
                                        false,

                                        list_make1_int(parse->resultRelation),
                                        list_make1(path),
                                        list_make1(root),
                                        withCheckOptionLists,
                                        returningLists,
                                        rowMarks,
                                        parse->onConflict,
                                        SS_assign_special_param(root));
        }
        else
            /* Adjust path by injecting a remote subplan, if appropriate. */
            path = adjust_path_distribution(root, parse, path);

        /* And shove it into final_rel */
        add_path(final_rel, path);
    }

    /*
     * Generate partial paths for final_rel, too, if outer query levels might
     * be able to make use of them.
     */
    if (final_rel->consider_parallel && root->query_level > 1 &&
        !limit_needed(parse))
    {
        Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
        foreach(lc, current_rel->partial_pathlist)
        {
            Path       *partial_path = (Path *) lfirst(lc);

            add_partial_path(final_rel, partial_path);
        }
    }

    /*
     * If there is an FDW that's responsible for all baserels of the query,
     * let it consider adding ForeignPaths.
     */

    if (final_rel->fdwroutine &&
        final_rel->fdwroutine->GetForeignUpperPaths)
        final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
                                                    current_rel, final_rel);

    /* Let extensions possibly add some more paths */
    if (create_upper_paths_hook)
        (*create_upper_paths_hook) (root, UPPERREL_FINAL,
                                    current_rel, final_rel);

    /* Note: currently, we leave it to callers to do set_cheapest() */
}

 grouping_planner调用  preprocess_targetlist(PlannerInfo *root, List *tlist) 函数

【\src\backend\optimizer\prep 】  preprocess_targetlist(PlannerInfo *root, List *tlist)当中,中间用 #ifdef XCP 包围的代码就是表示需要data node分布的代码,这里面会写两个关键的变量distribution->nodes(这个表涉及到的节点)和distribution->restrictNodes(在insert和update中可以根据主键过滤掉部分节点) .

/*
 * preprocess_targetlist
 *      Driver for preprocessing the parse tree targetlist.
 *
 *      Returns the new targetlist.
 */
List *
preprocess_targetlist(PlannerInfo *root, List *tlist)

谓词下推

束条件(包括连接条件和过滤条件)下推到 RelOptlnfo ,下推到底层的扫描路径上。

生成完整计划

创建聚集计划节点

通过调用函数make_agg,生成聚集计划节点,函数返回一个类型位Agg的结构

\src\backend\optimizer\plan\Createplan.c

Agg *
make_agg(List *tlist, List *qual,
         AggStrategy aggstrategy, AggSplit aggsplit,
         int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
         List *groupingSets, List *chain,
         double dNumGroups, Plan *lefttree)

 make_agg函数调用——copy_plan_costsize函数——获得左子树代价

\src\backend\optimizer\plan\Createplan.c

/*
 * Copy cost and size info from a lower plan node to an inserted node.
 * (Most callers alter the info after copying it.)
 */
static void
copy_plan_costsize(Plan *dest, Plan *src)

 make_agg函数调用——cost_agg函数——计算聚集代价

 \src\backend\optimizer\path\Costsize.c

/*
 * cost_agg
 *        Determines and returns the cost of performing an Agg plan node,
 *        including the cost of its input.
 *
 * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
 * we are using a hashed Agg node just to do grouping).
 *
 * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
 * are for appropriately-sorted input.
 */
void
cost_agg(Path *path, PlannerInfo *root,
         AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
         int numGroupCols, double numGroups,
         Cost input_startup_cost, Cost input_total_cost,
         double input_tuples)

 make_agg函数调用——cost_qual_eval函数——计算Having字句代价

\src\backend\optimizer\path\Costsize.c 

/*
 * cost_qual_eval
 *        Estimate the CPU costs of evaluating a WHERE clause.
 *        The input can be either an implicitly-ANDed list of boolean
 *        expressions, or a list of RestrictInfo nodes.  (The latter is
 *        preferred since it allows caching of the results.)
 *        The result includes both a one-time (startup) component,
 *        and a per-evaluation component.
 */
void
cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root)

创建排序计划节点

make_sort函数用于生成排序计划节点,函数返回一个类型为sort的结构

make_sort函数    \src\backend\optimizer\plan\Createplan.c

/*
 * make_sort --- basic routine to build a Sort plan node
 *
 * Caller must have built the sortColIdx, sortOperators, collations, and
 * nullsFirst arrays already.
 */
static Sort *
make_sort(Plan *lefttree, int numCols,
          AttrNumber *sortColIdx, Oid *sortOperators,
          Oid *collations, bool *nullsFirst)

 make_sort函数函数调用——cost_sort函数  - 计算排序代价

 \src\backend\optimizer\path\Costsize.c 

/*
 * cost_sort
 *      Determines and returns the cost of sorting a relation, including
 *      the cost of reading the input data.
 *
 * If the total volume of data to sort is less than sort_mem, we will do
 * an in-memory sort, which requires no I/O and about t*log2(t) tuple
 * comparisons for t tuples.
 *
 * If the total volume exceeds sort_mem, we switch to a tape-style merge
 * algorithm.  There will still be about t*log2(t) tuple comparisons in
 * total, but we will also need to write and read each tuple once per
 * merge pass.  We expect about ceil(logM(r)) merge passes where r is the
 * number of initial runs formed and M is the merge order used by tuplesort.c.
 * Since the average initial run should be about sort_mem, we have
 *        disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
 *        cpu = comparison_cost * t * log2(t)
 *
 * If the sort is bounded (i.e., only the first k result tuples are needed)
 * and k tuples can fit into sort_mem, we use a heap method that keeps only
 * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
 *
 * The disk traffic is assumed to be 3/4ths sequential and 1/4th random
 * accesses (XXX can't we refine that guess?)
 *
 * By default, we charge two operator evals per tuple comparison, which should
 * be in the right ballpark in most cases.  The caller can tweak this by
 * specifying nonzero comparison_cost; typically that's used for any extra
 * work that has to be done to prepare the inputs to the comparison operators.
 *
 * 'pathkeys' is a list of sort keys
 * 'input_cost' is the total cost for reading the input data
 * 'tuples' is the number of tuples in the relation
 * 'width' is the average tuple width in bytes
 * 'comparison_cost' is the extra cost per comparison, if any
 * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
 * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
 *
 * NOTE: some callers currently pass NIL for pathkeys because they
 * can't conveniently supply the sort keys.  Since this routine doesn't
 * currently do anything with pathkeys anyway, that doesn't matter...
 * but if it ever does, it should react gracefully to lack of key data.
 * (Actually, the thing we'd most likely be interested in is just the number
 * of sort keys, which all callers *could* supply.)
 */
void
cost_sort(Path *path, PlannerInfo *root,
          List *pathkeys, Cost input_cost, double tuples, int width,
          Cost comparison_cost, int sort_mem,
          double limit_tuples)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值