58e61205创建于 2023年3月6日历史提交
/* ---------------------------------------------------------------------------------------
 *
 * pgxcpath_single.cpp
 *	  Routines to find possible remote query paths for various relations and
 *	  their costs.
 *
 * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 * Portions Copyright (c) 2012 Postgres-XC Development Group
 *
 * IDENTIFICATION
 *	  src/gausskernel/optimizer/path/pgxcpath_single.cpp
 *
 * ---------------------------------------------------------------------------------------
 */
#include "postgres.h"
#include "commands/tablecmds.h"
#include "nodes/makefuncs.h"
#include "optimizer/cost.h"
#include "optimizer/paths.h"
#include "optimizer/pathnode.h"
#include "optimizer/pgxcship.h"
#include "optimizer/restrictinfo.h"
#include "parser/parsetree.h"
#include "pgxc/pgxc.h"
#include "optimizer/pgxcplan.h"

#pragma GCC diagnostic ignored "-Wunused-function"

static RemoteQueryPath* pgxc_find_remotequery_path(RelOptInfo* rel);
static RemoteQueryPath* create_remotequery_path(PlannerInfo* root, RelOptInfo* rel, ExecNodes* exec_nodes,
    RemoteQueryPath* leftpath, RemoteQueryPath* rightpath, JoinType jointype, List* join_restrictlist);
/*
 * create_remotequery_path
 *	  Creates a path for given RelOptInfo (for base rel or a join rel) so that
 *	  the results corresponding to this RelOptInfo are obtained by querying
 *	  datanode/s. When RelOptInfo represents a JOIN, we leftpath and rightpath
 *	  represents the RemoteQuery paths for left and right relations resp,
 *	  jointype gives the type of JOIN and join_restrictlist gives the
 *	  restrictinfo list for the JOIN. For a base relation, these should be
 *	  NULL.
 *	  ExecNodes is the set of datanodes to which the query should be sent to.
 *	  This function also marks the path with shippability of the quals.
 *	  If any of the relations involved in this path is a temporary relation,
 *	  record that fact.
 */
static RemoteQueryPath* create_remotequery_path(PlannerInfo* root, RelOptInfo* rel, ExecNodes* exec_nodes,
    RemoteQueryPath* leftpath, RemoteQueryPath* rightpath, JoinType jointype, List* join_restrictlist)
{
    RemoteQueryPath* rqpath = makeNode(RemoteQueryPath);
    bool unshippable_quals = false;

    if (rel->reloptkind == RELOPT_JOINREL && (!leftpath || !rightpath))
        elog(ERROR, "a join rel requires both the left path and right path");

    rqpath->path.pathtype = T_RemoteQuery;
    rqpath->path.parent = rel;
    rqpath->path.pathtarget = rel->reltarget;
    /* PGXC_TODO: do we want to care about it */
    rqpath->path.param_info = NULL;
    rqpath->path.pathkeys = NIL; /* result is always unordered */
    rqpath->rqpath_en = exec_nodes;
    rqpath->leftpath = leftpath;
    rqpath->rightpath = rightpath;
    rqpath->jointype = jointype;
    rqpath->join_restrictlist = join_restrictlist;

    switch (rel->reloptkind) {
        case RELOPT_BASEREL:
        case RELOPT_OTHER_MEMBER_REL: {
            RangeTblEntry* rte = rt_fetch(rel->relid, root->parse->rtable);
            if (rte->rtekind != RTE_RELATION)
                elog(ERROR, "can not create remote path for ranges of type %d", rte->rtekind);
            rqpath->rqhas_temp_rel = IsTempTable(rte->relid);
            unshippable_quals =
                !pgxc_is_expr_shippable((Expr*)extract_actual_clauses(rel->baserestrictinfo, false), NULL);
        } break;

        case RELOPT_JOINREL: {
            rqpath->rqhas_temp_rel = leftpath->rqhas_temp_rel || rightpath->rqhas_temp_rel;
            unshippable_quals = !pgxc_is_expr_shippable((Expr*)extract_actual_clauses(join_restrictlist, false), NULL);
        } break;

        default:
            elog(ERROR, "can not create remote path for relation of type %d", rel->reloptkind);
    }
    rqpath->rqhas_unshippable_qual = unshippable_quals;
    rqpath->rqhas_unshippable_tlist = !pgxc_is_expr_shippable((Expr*)rel->reltarget->exprs, NULL);

    /* set cost properly */
    cost_remotequery(rqpath, root, rel);

    return rqpath;
}

/*
 * create_plainrel_rqpath
 * Create a RemoteQuery path for a plain relation residing on datanode/s and add
 * it to the pathlist in corresponding RelOptInfo. The function returns true, if
 * it creates a remote query path and adds it, otherwise it returns false.
 * The caller can decide whether to add the scan paths depending upon the return
 * value.
 */
extern bool create_plainrel_rqpath(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte)
{
#ifndef ENABLE_MULTIPLE_NODES
    return false;
#else
    List* quals = NIL;
    ExecNodes* exec_nodes = NULL;

    /*
     * If we are on the Coordinator, we always want to use
     * the remote query path unless relation is local to coordinator or the
     * query is to entirely executed on coordinator.
     */
    if (!IS_PGXC_COORDINATOR || IsConnFromCoord() || root->parse->is_local)
        return false;

    quals = extract_actual_clauses(rel->baserestrictinfo, false);
    exec_nodes = GetRelationNodesByQuals(rte->relid, rel->relid, (Node*)quals, RELATION_ACCESS_READ);
    if (!exec_nodes)
        return false;

    if (IsExecNodesDistributedByValue(exec_nodes)) {
        Var* dist_var = pgxc_get_dist_var(rel->relid, rte, rel->reltarget->exprs);
        exec_nodes->en_dist_vars = list_make1(dist_var);
    }

    /* We don't have subpaths for a plain base relation */
    add_path(rel, (Path*)create_remotequery_path(root, rel, exec_nodes, NULL, NULL, 0, NULL));
    return true;
#endif
}

/*
 * pgxc_find_remotequery_path
 * Search the path list for the rel for existence of a RemoteQuery path, return
 * if one found, NULL otherwise. There should be only one RemoteQuery path for
 * each rel, but we don't check for this.
 */
static RemoteQueryPath* pgxc_find_remotequery_path(RelOptInfo* rel)
{
    ListCell* cell = NULL;

    foreach (cell, rel->pathlist) {
        Path* path = (Path*)lfirst(cell);
        if (IsA(path, RemoteQueryPath))
            return (RemoteQueryPath*)path;
    }
    return NULL;
}

/*
 * pgxc_ship_remotejoin
 * If there are RemoteQuery paths for the rels being joined, check if the join
 * is shippable to the datanodes, and if so, create a remotequery path for this
 * JOIN.
 */
extern void create_joinrel_rqpath(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
    List* restrictlist, JoinType jointype, SpecialJoinInfo* sjinfo)
{
#ifndef ENABLE_MULTIPLE_NODES
    return;
#else
    RemoteQueryPath* innerpath = NULL;
    RemoteQueryPath* outerpath = NULL;
    ExecNodes* inner_en = NULL;
    ExecNodes* outer_en = NULL;
    ExecNodes* join_en = NULL;
    List* join_quals = NIL;
    List* other_quals = NIL;

    /* If GUC does not allow remote join optimization, so be it */
    if (!enable_remotejoin)
        return;

    innerpath = pgxc_find_remotequery_path(innerrel);
    outerpath = pgxc_find_remotequery_path(outerrel);
    /*
     * If one of the relation does not have RemoteQuery path, the join can not
     * be shipped to the datanodes.
     * If one of the relation has an unshippable qual, it needs to be evaluated
     * before joining the two relations. Hence this JOIN is not shippable.
     * PGXC_TODO: In case of INNER join above condition can be relaxed by
     * attaching the unshippable qual to the join itself, and thus shipping join
     * but evaluating the qual on join result. But we don't attempt it for now
     */
    if (!innerpath || !outerpath || innerpath->rqhas_unshippable_qual || outerpath->rqhas_unshippable_qual)
        return;

    inner_en = innerpath->rqpath_en;
    outer_en = outerpath->rqpath_en;

    if (!inner_en || !outer_en)
        elog(ERROR, "No node list provided for remote query path");
    /*
     * Collect quals from restrictions so as to check the shippability of a JOIN
     * between distributed relations.
     */
    extract_actual_join_clauses(restrictlist, &join_quals, &other_quals);
    /*
     * If the joining qual is not shippable and it's an OUTER JOIN, we can not
     * ship the JOIN, since that would impact JOIN result.
     */
    if (jointype != JOIN_INNER && !pgxc_is_expr_shippable((Expr*)join_quals, NULL))
        return;
    /*
     * For INNER JOIN there is no distinction between JOIN and non-JOIN clauses,
     * so let the JOIN reduction algorithm take all of them into consideration
     * to decide whether a JOIN is reducible or not based on quals (if
     * required).
     */
    if (jointype == JOIN_INNER)
        join_quals = list_concat(join_quals, other_quals);

    /*
     * If the nodelists on both the sides of JOIN can be merged, the JOIN is
     * shippable.
     */
    join_en = pgxc_is_join_shippable(inner_en,
        outer_en,
        innerpath->rqhas_unshippable_tlist,
        outerpath->rqhas_unshippable_tlist,
        jointype,
        (Node*)join_quals);
    if (join_en)
        add_path(joinrel,
            (Path*)create_remotequery_path(root, joinrel, join_en, outerpath, innerpath, jointype, restrictlist));
    return;
#endif
}