*
* selfuncs.h
* Selectivity functions and index cost estimation functions for
* standard operators and index access methods.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/utils/selfuncs.h
*
* -------------------------------------------------------------------------
*/
#ifndef SELFUNCS_H
#define SELFUNCS_H
#include "fmgr.h"
#include "access/htup.h"
#include "nodes/relation.h"
#include "optimizer/nodegroups.h"
#include "parser/parse_oper.h"
#include "catalog/pg_operator.h"
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
#define DEFAULT_EQ_SEL 0.005
#define DEFAULT_INEQ_SEL 0.3333333333333333
#define DEFAULT_RANGE_INEQ_SEL 0.005
#define DEFAULT_MATCH_SEL 0.005
#define DEFAULT_NUM_DISTINCT 200
#define DEFAULT_NUM_ROWS 10
#define DEFAULT_SPECIAL_EXPR_DISTINCT 10
#define DEFAULT_SPECIAL_EXPR_BIASE (pow(u_sess->pgxc_cxt.NumDataNodes, (double)1 / 2) / u_sess->pgxc_cxt.NumDataNodes)
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
#define MIN_NEQ_ANTI_SEL 0.05
#define MAX_NEQ_SEMI_SEL (1.0 - MIN_NEQ_ANTI_SEL)
#define SELECTIVITY_THRESHOLD_TO_USE_POISSON 0.95
#define NUM_DISTINCT_SELECTIVITY_FOR_POISSON(distinct, input_rows, selectivity) \
(double)((distinct) * (1 - exp(-((input_rows) * (selectivity) / (distinct)))))
#define NUM_DISTINCT_GTL_FOR_POISSON(gdistinct, input_rows, num_datanodes, dop) \
(double)(NUM_DISTINCT_SELECTIVITY_FOR_POISSON(gdistinct, input_rows, 1.0 / num_datanodes / (dop)))
#define NUM_PARALLEL_DISTINCT_GTL_FOR_POISSON(dn_distinct, dn_rows, dop) \
(double)(NUM_DISTINCT_SELECTIVITY_FOR_POISSON(dn_distinct, dn_rows, 1.0 / (dop)))
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
typedef struct VariableStatData {
Node* var;
RelOptInfo* rel;
HeapTuple statsTuple;
void (*freefunc)(HeapTuple tuple);
Oid vartype;
Oid atttype;
int32 atttypmod;
bool isunique;
bool enablePossion;
bool acl_ok;
PlannerInfo *root;
double numDistinct[2];
bool isEstimated;
PlannerInfo *baseRoot;
Node *baseVar;
RelOptInfo *baseRel;
bool needAdjust;
} VariableStatData;
#define ReleaseVariableStats(vardata) \
do { \
if (HeapTupleIsValid((vardata).statsTuple)) \
(*(vardata).freefunc)((vardata).statsTuple); \
} while (0)
typedef enum { Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Regex, Pattern_Type_Regex_IC } Pattern_Type;
typedef enum { Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact } Pattern_Prefix_Status;
typedef enum { STATS_TYPE_GLOBAL, STATS_TYPE_LOCAL } STATS_EST_TYPE;
* Helper routine for estimate_num_groups: add an item to a list of
* GroupVarInfos, but only if it's not known equal to any of the existing
* entries.
*/
typedef struct {
Node* var;
RelOptInfo* rel;
double ndistinct;
bool isdefault;
bool es_is_used;
Bitmapset* es_attnums;
} GroupVarInfo;
* genericcostestimate is a general-purpose estimator that can be used for
* most index types. In some cases we use genericcostestimate as the base
* code and then incorporate additional index-type-specific knowledge in
* the type-specific calling function. To avoid code duplication, we make
* genericcostestimate return a number of intermediate values as well as
* its preliminary estimates of the output cost values. The GenericCosts
* struct includes all these values.
*
* Callers should initialize all fields of GenericCosts to zero. In addition,
* they can set numIndexTuples to some positive value if they have a better
* than default way of estimating the number of leaf index tuples visited.
*/
typedef struct {
Cost indexStartupCost;
Cost indexTotalCost;
Selectivity indexSelectivity;
double indexCorrelation;
double numIndexPages;
double numIndexTuples;
double spc_random_page_cost;
double num_sa_scans;
} GenericCosts;
extern void set_local_rel_size(PlannerInfo* root, RelOptInfo* rel);
extern double get_join_ratio(VariableStatData* vardata, SpecialJoinInfo* sjinfo);
extern double get_multiple_by_distkey(PlannerInfo* root, List* distkey, double rows);
extern double estimate_agg_num_distinct(PlannerInfo* root, List* group_exprs, Plan* plan, const double* numGroups);
extern double estimate_agg_num_distinct(PlannerInfo* root, List* group_exprs, Path* path, const double* numGroups);
extern void output_noanalyze_rellist_to_log(int lev);
extern void set_noanalyze_rellist(Oid relid, AttrNumber attid);
extern double estimate_local_numdistinct(PlannerInfo* root, Node* hashkey, Path* path, SpecialJoinInfo* sjinfo,
double* global_distinct, bool* isdefault, VariableStatData* vardata);
extern void get_num_distinct(PlannerInfo* root, List* groupExprs, double local_rows, double global_rows,
unsigned int num_datanodes, double* numdistinct, List** pgset = NULL);
extern double get_local_rows(double global_rows, double multiple, bool replicate, unsigned int num_data_nodes);
extern double get_global_rows(double local_rows, double multiple, unsigned int num_data_nodes);
#define PATH_LOCAL_ROWS(path) \
get_local_rows( \
(path)->rows, (path)->multiple, IsLocatorReplicated((path)->locator_type), ng_get_dest_num_data_nodes(path))
#define PLAN_LOCAL_ROWS(plan) \
get_local_rows( \
(plan)->plan_rows, (plan)->multiple, (plan)->exec_type != EXEC_ON_DATANODES, ng_get_dest_num_data_nodes(plan))
#define RELOPTINFO_LOCAL_FIELD(root, rel, fldname) \
get_local_rows((rel)->fldname, \
(rel)->multiple, \
IsLocatorReplicated((rel)->locator_type), \
ng_get_dest_num_data_nodes((root), (rel)))
#define IDXOPTINFO_LOCAL_FIELD(root, idx, fldname) \
get_local_rows((idx)->fldname, \
(idx)->rel->multiple, \
IsLocatorReplicated((idx)->rel->locator_type), \
ng_get_dest_num_data_nodes((root), (idx)->rel))
#ifndef ENABLE_MULTIPLE_NODES
#define MID(low, high) (low + high) / 2
#define PREVIOUS_BOUND(i) i - 2
#define NEXT_BOUND(i) i + 1
#define CHECK_DISTINCT_HIST(distinct) distinct < 1.0 ? 1.0 : distinct
#endif
extern void examine_variable(PlannerInfo* root, Node* node, int varRelid, VariableStatData* vardata);
extern bool statistic_proc_security_check(const VariableStatData *vardata, Oid func_oid);
extern bool get_restriction_variable(
PlannerInfo* root, List* args, int varRelid, VariableStatData* vardata, Node** other, bool* varonleft);
extern void get_join_variables(PlannerInfo* root, List* args, SpecialJoinInfo* sjinfo, VariableStatData* vardata1,
VariableStatData* vardata2, bool* join_is_reversed);
extern double get_variable_numdistinct(VariableStatData* vardata, bool* isdefault, bool adjust_rows = true,
double join_ratio = 1.0, SpecialJoinInfo* sjinfo = NULL, STATS_EST_TYPE eType = STATS_TYPE_GLOBAL,
bool isJoinVar = false);
extern double mcv_selectivity(VariableStatData* vardata, FmgrInfo* opproc, Datum constval, bool varonleft,
double* sumcommonp, Oid equaloperator, bool* inmcv, double* lastcommonp = NULL);
extern double histogram_selectivity(VariableStatData* vardata, FmgrInfo* opproc, Datum constval, bool varonleft,
int min_hist_size, int n_skip, int* hist_size);
extern Pattern_Prefix_Status pattern_fixed_prefix(
Const* patt, Pattern_Type ptype, Oid collation, Const** prefix, Selectivity* rest_selec);
extern Const* make_greater_string(const Const* str_const, FmgrInfo* ltproc, Oid collation);
extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS);
extern Datum scalarltsel(PG_FUNCTION_ARGS);
extern float8 scalarltsel_internal(PlannerInfo* root, Oid opera, List* args, int varRelid);
extern Datum scalargtsel(PG_FUNCTION_ARGS);
extern Datum regexeqsel(PG_FUNCTION_ARGS);
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
extern Datum likesel(PG_FUNCTION_ARGS);
extern Datum iclikesel(PG_FUNCTION_ARGS);
extern Datum regexnesel(PG_FUNCTION_ARGS);
extern Datum icregexnesel(PG_FUNCTION_ARGS);
extern Datum nlikesel(PG_FUNCTION_ARGS);
extern Datum icnlikesel(PG_FUNCTION_ARGS);
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum likejoinsel(PG_FUNCTION_ARGS);
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Selectivity booltestsel(
PlannerInfo* root, BoolTestType booltesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo);
extern Selectivity nulltestsel(
PlannerInfo* root, NullTestType nulltesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo);
extern Selectivity nantestsel(
PlannerInfo* root, NanTestType nantesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo);
extern Selectivity infinitetestsel(
PlannerInfo* root, InfiniteTestType infinitetesttype, Node* arg, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo);
extern Selectivity scalararraysel(PlannerInfo* root, ScalarArrayOpExpr* clause, bool is_join_clause, int varRelid,
JoinType jointype, SpecialJoinInfo* sjinfo);
extern int estimate_array_length(Node* arrayexpr);
extern Selectivity rowcomparesel(
PlannerInfo* root, RowCompareExpr* clause, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo);
extern void mergejoinscansel(PlannerInfo* root, Node* clause, Oid opfamily, int strategy, bool nulls_first,
Selectivity* leftstart, Selectivity* leftend, Selectivity* rightstart, Selectivity* rightend);
extern double estimate_num_groups(PlannerInfo* root, List* groupExprs, double input_rows, unsigned int num_datanodes,
STATS_EST_TYPE eType = STATS_TYPE_GLOBAL, List** pgset = NULL);
extern Selectivity estimate_hash_bucketsize(
PlannerInfo* root, Node* hashkey, double nbuckets, Path* inner_path, SpecialJoinInfo* sjinfo, double* distinctnum);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern void btcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost,
Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation);
extern Datum ubtcostestimate(PG_FUNCTION_ARGS);
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
extern Datum spgcostestimate(PG_FUNCTION_ARGS);
extern Datum gincostestimate(PG_FUNCTION_ARGS);
extern Datum psortcostestimate(PG_FUNCTION_ARGS);
extern Selectivity scalararraysel_containment(
PlannerInfo* root, Node* leftop, Node* rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid);
extern Datum arraycontsel(PG_FUNCTION_ARGS);
extern Datum arraycontjoinsel(PG_FUNCTION_ARGS);
typedef enum { RatioType_Filter, RatioType_Join } RatioType;
typedef struct VarRatio {
RatioType ratiotype;
Node* var;
double ratio;
others it means selectivity of filter. */
Relids joinrelids;
} VarRatio;
typedef struct VarEqRatio {
Var* var;
double ratio;
others it means selectivity of filter. */
Relids joinrelids;
} VarEqRatio;
extern void set_varratio_after_calc_selectivity(
VariableStatData* vardata, RatioType type, double ratio, SpecialJoinInfo* sjinfo);
extern double get_windowagg_selectivity(PlannerInfo* root, WindowClause* wc, WindowFunc* wfunc, List* partitionExprs,
int32 constval, double tuples, unsigned int num_datanodes);
extern bool contain_single_col_stat(List* stat_list);
extern double convert_timevalue_to_scalar(Datum value, Oid typid);
extern void genericcostestimate(PlannerInfo* root, IndexPath* path, double loop_count, double numIndexTuples,
Cost* indexStartupCost, Cost* indexTotalCost, Selectivity* indexSelectivity, double* indexCorrelation);
#endif