* Copyright (c) 2020 Huawei Technologies Co.,Ltd.
*
* openGauss is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*
* The function generate_unique_queryid() needs to be called when generating the queryid
* and the function normalized_unique_querystring() is called when generating the query text.
* -------------------------------------------------------------------------
*
* unique_query.cpp
* functions for user stat, such as login/logout counter
*
* IDENTIFICATION
* src/gausskernel/cbb/instruments/utils/unique_query.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include "securec.h"
#include <math.h>
#include <sys/stat.h>
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "nodes/pg_list.h"
#include "nodes/parsenodes.h"
#include "nodes/primnodes.h"
#include "c.h"
#include <unistd.h>
#include "access/hash.h"
#include "parser/scanner.h"
#include "mb/pg_wchar.h"
#include "instruments/unique_query.h"
#include "instruments/instr_slow_query.h"
#include "pgstat.h"
const int JUMBLE_SIZE = 1024;
const int CLOCATIONS_BUF_SIZE = 32;
* Struct for tracking locations/lengths of constants during normalization
*/
typedef struct pgssLocationLen {
int location;
int length;
} pgssLocationLen;
* Working state for computing a query jumble and producing a normalized
* query string
*/
typedef struct pgssJumbleState {
unsigned char* jumble;
Size jumble_len;
pgssLocationLen* clocations;
int clocations_buf_size;
int clocations_count;
} pgssJumbleState;
namespace UniqueSql {
void AppendJumble(pgssJumbleState* jstate, const unsigned char* item, Size size);
void JumbleQuery(pgssJumbleState* jstate, Query* query);
void JumbleRangeTable(pgssJumbleState* jstate, List* rtable);
void JumbleExpr(pgssJumbleState* jstate, Node* node);
void RecordConstLocation(pgssJumbleState* jstate, int location);
uint32 pgss_hash_string(const char* str);
char* generate_normalized_query(pgssJumbleState* jstate, const char* query, int* query_len_p, int encoding);
void fill_in_constant_lengths(pgssJumbleState* jstate, const char* query);
int comp_location(const void* a, const void* b);
void generate_jstate(pgssJumbleState* jstate, Query* query);
}
typedef struct BuiltinUniqueSQL {
NodeTag type;
const char *unique_sql;
uint32 unique_sql_id;
uint32 unique_sql_len;
} BuiltinUniqueSQL;
static BuiltinUniqueSQL BuiltinUniqueSQLArray[] = {
{T_BarrierStmt, "CREATE BARRIER", 0, 0},
{T_DeallocateStmt, "DEALLOCATE", 0, 0}
};
void init_builtin_unique_sql()
{
for (uint32 i = 0; i < (sizeof(BuiltinUniqueSQLArray) / sizeof(BuiltinUniqueSQLArray[0])); i++) {
if (BuiltinUniqueSQLArray[i].unique_sql != NULL) {
BuiltinUniqueSQLArray[i].unique_sql_len = strlen(BuiltinUniqueSQLArray[i].unique_sql);
BuiltinUniqueSQLArray[i].unique_sql_id = UniqueSql::pgss_hash_string(BuiltinUniqueSQLArray[i].unique_sql);
if (BuiltinUniqueSQLArray[i].unique_sql_id == 0) {
BuiltinUniqueSQLArray[i].unique_sql_id = 1;
}
}
}
}
static const BuiltinUniqueSQL *find_builtin_unqiue_sql(const Query *query)
{
if (query == NULL || query->utilityStmt == NULL) {
return NULL;
}
for (uint32 i = 0; i < (sizeof(BuiltinUniqueSQLArray) / sizeof(BuiltinUniqueSQLArray[0])); i++) {
if (BuiltinUniqueSQLArray[i].type == nodeTag(query->utilityStmt)) {
if (BuiltinUniqueSQLArray[i].unique_sql != NULL) {
return (BuiltinUniqueSQLArray + i);
} else {
return NULL;
}
}
}
return NULL;
}
* create unique queryid
* query query tree
* query_string query text
* queryid queryid
*/
uint32 generate_unique_queryid(Query* query, const char* query_string)
{
pgssJumbleState jstate;
uint32 queryid = 0;
const BuiltinUniqueSQL *builtin_unique_sql = find_builtin_unqiue_sql(query);
if (builtin_unique_sql != NULL) {
queryid = builtin_unique_sql->unique_sql_id;
return queryid;
}
errno_t rc;
rc = memset_s(&jstate, sizeof(jstate), 0, sizeof(jstate));
securec_check(rc, "\0", "\0");
* For utility statements, we just hash the query string directly
* SELETE/INSERT/UPDATE/DELETE sql call JumbleQuery to generate queryid
*/
if (query->utilityStmt != NULL) {
queryid = UniqueSql::pgss_hash_string(query_string);
} else {
UniqueSql::generate_jstate(&jstate, query);
queryid = hash_any(jstate.jumble, jstate.jumble_len);
}
if (queryid == 0) {
queryid = 1;
}
return queryid;
}
static void update_multi_sql_location(pgssJumbleState* jstate, int32 multi_sql_offset)
{
if (multi_sql_offset > 0 && jstate != NULL) {
for (int i = 0; i < jstate->clocations_count; i++) {
if (jstate->clocations[i].location >= multi_sql_offset) {
jstate->clocations[i].location = jstate->clocations[i].location - multi_sql_offset;
}
}
}
}
* The caller must apply for a space no smaller than strlen(query_string),
* and return true if the function is called successfully.
* input:
* query query tree
* query-string the query text
* unique_buf returned unique query text
* len The size of the unique_buf's
*/
bool normalized_unique_querystring(Query* query, const char* query_string, char* unique_buf, int buf_len,
uint32 multi_sql_offset)
{
if (query == NULL || query_string == NULL || unique_buf == NULL) {
return false;
}
bool result = true;
char *norm_query = NULL, *mask_str = NULL;
int encoding = GetDatabaseEncoding();
int query_len;
pgssJumbleState jstate;
errno_t rc = memset_s(&jstate, sizeof(jstate), 0, sizeof(jstate));
securec_check(rc, "\0", "\0");
query_len = strlen(query_string);
if (query->utilityStmt == NULL) {
UniqueSql::generate_jstate(&jstate, query);
if (jstate.clocations_count > 0) {
update_multi_sql_location(&jstate, multi_sql_offset);
norm_query = UniqueSql::generate_normalized_query(&jstate, query_string, &query_len, encoding);
if (norm_query == NULL) {
result = false;
}
}
} else {
mask_str = maskPassword(query_string);
if (mask_str != NULL) {
query_string = mask_str;
query_len = strlen(mask_str);
}
}
if (result) {
if (norm_query != NULL) {
rc = memcpy_s(unique_buf, buf_len, norm_query, query_len);
securec_check(rc, "\0", "\0");
pfree(norm_query);
} else {
const BuiltinUniqueSQL *builtin_unique_sql = find_builtin_unqiue_sql(query);
if (builtin_unique_sql != NULL) {
query_string = builtin_unique_sql->unique_sql;
query_len = builtin_unique_sql->unique_sql_len;
}
if (query_len > buf_len) {
query_len = pg_encoding_mbcliplen(encoding, query_string, query_len,
g_instance.attr.attr_common.pgstat_track_activity_query_size - 1);
}
rc = memcpy_s(unique_buf, buf_len, query_string, query_len);
securec_check(rc, "\0", "\0");
}
}
pfree_ext(mask_str);
return result;
}
* The function generate_jstate() is used to generate jumble for query
*/
void UniqueSql::generate_jstate(pgssJumbleState* jstate, Query* query)
{
jstate->jumble = (unsigned char*)palloc(JUMBLE_SIZE);
jstate->jumble_len = 0;
jstate->clocations_buf_size = CLOCATIONS_BUF_SIZE;
jstate->clocations = (pgssLocationLen*)palloc(jstate->clocations_buf_size * sizeof(pgssLocationLen));
jstate->clocations_count = 0;
UniqueSql::JumbleQuery(jstate, query);
}
* Given an arbitrarily long query string, produce a hash for the purposes of
* identifying the query, without normalizing constants. Used when hashing
* utility statements.
*/
uint32 UniqueSql::pgss_hash_string(const char* str)
{
return hash_any((const unsigned char*)str, strlen(str));
}
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
*/
void UniqueSql::AppendJumble(pgssJumbleState* jstate, const unsigned char* item, Size size)
{
unsigned char* jumble = jstate->jumble;
Size jumble_len = jstate->jumble_len;
int rc;
* Whenever the jumble buffer is full, we hash the current contents and
* reset the buffer to contain just that hash value, thus relying on the
* hash to summarize everything so far.
*/
while (size > 0) {
Size part_size;
if (jumble_len >= JUMBLE_SIZE) {
uint32 start_hash = hash_any(jumble, JUMBLE_SIZE);
rc = memcpy_s(jumble, JUMBLE_SIZE, &start_hash, sizeof(start_hash));
securec_check(rc, "\0", "\0");
jumble_len = sizeof(start_hash);
}
part_size = Min(size, JUMBLE_SIZE - jumble_len);
rc = memcpy_s(jumble + jumble_len, JUMBLE_SIZE - jumble_len, item, part_size);
securec_check(rc, "\0", "\0");
jumble_len += part_size;
item += part_size;
size -= part_size;
}
jstate->jumble_len = jumble_len;
}
* Wrappers around AppendJumble to encapsulate details of serialization
* of individual local variable elements.
*/
#define APP_JUMB(item) UniqueSql::AppendJumble(jstate, (const unsigned char*)&(item), sizeof(item))
#define APP_JUMB_STRING(str) UniqueSql::AppendJumble(jstate, (const unsigned char*)(str), strlen(str) + 1)
* JumbleQuery: Selectively serialize the query tree, appending significant
* data to the "query jumble" while ignoring nonsignificant data.
* Rule of thumb for what to include is that we should ignore anything not
* semantically significant (such as alias names) as well as anything that can
* be deduced from child nodes (else we'd just be double-hashing that piece
* of information).
*/
void UniqueSql::JumbleQuery(pgssJumbleState* jstate, Query* query)
{
Assert(IsA(query, Query));
Assert(query->utilityStmt == NULL);
APP_JUMB(query->commandType);
UniqueSql::JumbleExpr(jstate, (Node*)query->cteList);
UniqueSql::JumbleRangeTable(jstate, query->rtable);
UniqueSql::JumbleExpr(jstate, (Node*)query->jointree);
UniqueSql::JumbleExpr(jstate, (Node*)query->targetList);
UniqueSql::JumbleExpr(jstate, (Node*)query->returningList);
UniqueSql::JumbleExpr(jstate, (Node*)query->groupClause);
UniqueSql::JumbleExpr(jstate, (Node*)query->groupingSets);
UniqueSql::JumbleExpr(jstate, query->havingQual);
UniqueSql::JumbleExpr(jstate, (Node*)query->windowClause);
UniqueSql::JumbleExpr(jstate, (Node*)query->distinctClause);
UniqueSql::JumbleExpr(jstate, (Node*)query->sortClause);
UniqueSql::JumbleExpr(jstate, query->limitOffset);
UniqueSql::JumbleExpr(jstate, query->limitCount);
UniqueSql::JumbleExpr(jstate, query->setOperations);
}
* Jumble a range table
*/
void UniqueSql::JumbleRangeTable(pgssJumbleState* jstate, List* rtable)
{
ListCell* lc = NULL;
foreach (lc, rtable) {
RangeTblEntry* rte = (RangeTblEntry*)lfirst(lc);
Assert(IsA(rte, RangeTblEntry));
APP_JUMB(rte->rtekind);
switch (rte->rtekind) {
case RTE_RELATION:
if (rte->ispartrel) {
if (rte->isContainPartition && OidIsValid(rte->partitionOid)) {
APP_JUMB(rte->partitionOid);
} else if (rte->isContainSubPartition && OidIsValid(rte->subpartitionOid)) {
APP_JUMB(rte->subpartitionOid);
} else {
APP_JUMB(rte->relid);
}
} else {
APP_JUMB(rte->relid);
}
break;
case RTE_SUBQUERY:
UniqueSql::JumbleQuery(jstate, rte->subquery);
break;
case RTE_JOIN:
APP_JUMB(rte->jointype);
break;
case RTE_FUNCTION:
UniqueSql::JumbleExpr(jstate, rte->funcexpr);
break;
case RTE_VALUES:
UniqueSql::JumbleExpr(jstate, (Node*)rte->values_lists);
break;
case RTE_CTE:
* Depending on the CTE name here isn't ideal, but it's the
* only info we have to identify the referenced WITH item.
*/
APP_JUMB_STRING(rte->ctename);
APP_JUMB(rte->ctelevelsup);
break;
case RTE_RESULT:
break;
default:
elog(ERROR, "unrecognized RTE kind: %d", (int)rte->rtekind);
break;
}
}
}
* Jumble an expression tree
*
* In general this function should handle all the same node types that
* expression_tree_walker() does, and therefore it's coded to be as parallel
* to that function as possible. However, since we are only invoked on
* queries immediately post-parse-analysis, we need not handle node types
* that only appear in planning.
*
* Note: the reason we don't simply use expression_tree_walker() is that the
* point of that function is to support tree walkers that don't care about
* most tree node types, but here we care about all types. We should complain
* about any unrecognized node type.
*/
void UniqueSql::JumbleExpr(pgssJumbleState* jstate, Node* node)
{
ListCell* temp = NULL;
if (node == NULL) {
return;
}
check_stack_depth();
* We always emit the node's NodeTag, then any additional fields that are
* considered significant, and then we recurse to any child nodes.
*/
APP_JUMB(node->type);
switch (nodeTag(node)) {
case T_Var: {
Var* var = (Var*)node;
APP_JUMB(var->varno);
APP_JUMB(var->varattno);
APP_JUMB(var->varlevelsup);
break;
}
case T_Const: {
Const* c = (Const*)node;
APP_JUMB(c->consttype);
UniqueSql::RecordConstLocation(jstate, c->location);
break;
}
case T_Param: {
Param* p = (Param*)node;
APP_JUMB(p->paramkind);
APP_JUMB(p->paramid);
APP_JUMB(p->paramtype);
break;
}
case T_Aggref: {
Aggref* expr = (Aggref*)node;
APP_JUMB(expr->aggfnoid);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
UniqueSql::JumbleExpr(jstate, (Node*)expr->aggorder);
UniqueSql::JumbleExpr(jstate, (Node*)expr->aggdistinct);
UniqueSql::JumbleExpr(jstate, (Node*)expr->aggfilter);
break;
}
case T_GroupingFunc: {
GroupingFunc* grpnode = (GroupingFunc*)node;
UniqueSql::JumbleExpr(jstate, (Node*)grpnode->refs);
break;
}
case T_WindowFunc: {
WindowFunc* expr = (WindowFunc*)node;
APP_JUMB(expr->winfnoid);
APP_JUMB(expr->winref);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
UniqueSql::JumbleExpr(jstate, (Node*)expr->keep_args);
UniqueSql::JumbleExpr(jstate, (Node*)expr->winkporder);
break;
}
case T_InitList: {
foreach (temp, (List*)node) {
APP_JUMB(lfirst_int(temp));
}
break;
}
case T_ArrayRef: {
ArrayRef* aref = (ArrayRef*)node;
UniqueSql::JumbleExpr(jstate, (Node*)aref->refupperindexpr);
UniqueSql::JumbleExpr(jstate, (Node*)aref->reflowerindexpr);
UniqueSql::JumbleExpr(jstate, (Node*)aref->refexpr);
UniqueSql::JumbleExpr(jstate, (Node*)aref->refassgnexpr);
break;
}
case T_FuncExpr: {
FuncExpr* expr = (FuncExpr*)node;
APP_JUMB(expr->funcid);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
break;
}
case T_NamedArgExpr: {
NamedArgExpr* nae = (NamedArgExpr*)node;
APP_JUMB(nae->argnumber);
UniqueSql::JumbleExpr(jstate, (Node*)nae->arg);
break;
}
case T_OpExpr:
case T_DistinctExpr:
case T_NullIfExpr: {
OpExpr* expr = (OpExpr*)node;
APP_JUMB(expr->opno);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
break;
}
case T_ScalarArrayOpExpr: {
ScalarArrayOpExpr* expr = (ScalarArrayOpExpr*)node;
APP_JUMB(expr->opno);
APP_JUMB(expr->useOr);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
break;
}
case T_BoolExpr: {
BoolExpr* expr = (BoolExpr*)node;
APP_JUMB(expr->boolop);
UniqueSql::JumbleExpr(jstate, (Node*)expr->args);
break;
}
case T_SubLink: {
SubLink* sublink = (SubLink*)node;
APP_JUMB(sublink->subLinkType);
UniqueSql::JumbleExpr(jstate, (Node*)sublink->testexpr);
UniqueSql::JumbleQuery(jstate, (Query*)sublink->subselect);
break;
}
case T_FieldSelect: {
FieldSelect* fs = (FieldSelect*)node;
APP_JUMB(fs->fieldnum);
UniqueSql::JumbleExpr(jstate, (Node*)fs->arg);
break;
}
case T_FieldStore: {
FieldStore* fstore = (FieldStore*)node;
UniqueSql::JumbleExpr(jstate, (Node*)fstore->arg);
UniqueSql::JumbleExpr(jstate, (Node*)fstore->newvals);
break;
}
case T_RelabelType: {
RelabelType* rt = (RelabelType*)node;
APP_JUMB(rt->resulttype);
UniqueSql::JumbleExpr(jstate, (Node*)rt->arg);
break;
}
case T_CoerceViaIO: {
CoerceViaIO* cio = (CoerceViaIO*)node;
APP_JUMB(cio->resulttype);
UniqueSql::JumbleExpr(jstate, (Node*)cio->arg);
break;
}
case T_ArrayCoerceExpr: {
ArrayCoerceExpr* acexpr = (ArrayCoerceExpr*)node;
APP_JUMB(acexpr->resulttype);
UniqueSql::JumbleExpr(jstate, (Node*)acexpr->arg);
break;
}
case T_ConvertRowtypeExpr: {
ConvertRowtypeExpr* crexpr = (ConvertRowtypeExpr*)node;
APP_JUMB(crexpr->resulttype);
UniqueSql::JumbleExpr(jstate, (Node*)crexpr->arg);
break;
}
case T_CollateExpr: {
CollateExpr* ce = (CollateExpr*)node;
APP_JUMB(ce->collOid);
UniqueSql::JumbleExpr(jstate, (Node*)ce->arg);
break;
}
case T_CaseExpr: {
CaseExpr* caseexpr = (CaseExpr*)node;
UniqueSql::JumbleExpr(jstate, (Node*)caseexpr->arg);
foreach (temp, caseexpr->args) {
CaseWhen* when = (CaseWhen*)lfirst(temp);
Assert(IsA(when, CaseWhen));
UniqueSql::JumbleExpr(jstate, (Node*)when->expr);
UniqueSql::JumbleExpr(jstate, (Node*)when->result);
}
UniqueSql::JumbleExpr(jstate, (Node*)caseexpr->defresult);
break;
}
case T_CaseTestExpr: {
CaseTestExpr* ct = (CaseTestExpr*)node;
APP_JUMB(ct->typeId);
break;
}
case T_ArrayExpr:
UniqueSql::JumbleExpr(jstate, (Node*)((ArrayExpr*)node)->elements);
break;
case T_RowExpr:
UniqueSql::JumbleExpr(jstate, (Node*)((RowExpr*)node)->args);
break;
case T_RowCompareExpr: {
RowCompareExpr* rcexpr = (RowCompareExpr*)node;
APP_JUMB(rcexpr->rctype);
UniqueSql::JumbleExpr(jstate, (Node*)rcexpr->largs);
UniqueSql::JumbleExpr(jstate, (Node*)rcexpr->rargs);
break;
}
case T_CoalesceExpr:
UniqueSql::JumbleExpr(jstate, (Node*)((CoalesceExpr*)node)->args);
break;
case T_MinMaxExpr: {
MinMaxExpr* mmexpr = (MinMaxExpr*)node;
APP_JUMB(mmexpr->op);
UniqueSql::JumbleExpr(jstate, (Node*)mmexpr->args);
break;
}
case T_XmlExpr: {
XmlExpr* xexpr = (XmlExpr*)node;
APP_JUMB(xexpr->op);
UniqueSql::JumbleExpr(jstate, (Node*)xexpr->named_args);
UniqueSql::JumbleExpr(jstate, (Node*)xexpr->args);
break;
}
case T_NullTest: {
NullTest* nt = (NullTest*)node;
APP_JUMB(nt->nulltesttype);
UniqueSql::JumbleExpr(jstate, (Node*)nt->arg);
break;
}
case T_NanTest: {
NanTest* nt = (NanTest*)node;
APP_JUMB(nt->nantesttype);
UniqueSql::JumbleExpr(jstate, (Node*)nt->arg);
break;
}
case T_InfiniteTest: {
InfiniteTest* it = (InfiniteTest*)node;
APP_JUMB(it->infinitetesttype);
UniqueSql::JumbleExpr(jstate, (Node*)it->arg);
break;
}
case T_BooleanTest: {
BooleanTest* bt = (BooleanTest*)node;
APP_JUMB(bt->booltesttype);
UniqueSql::JumbleExpr(jstate, (Node*)bt->arg);
break;
}
case T_CoerceToDomain: {
CoerceToDomain* cd = (CoerceToDomain*)node;
APP_JUMB(cd->resulttype);
UniqueSql::JumbleExpr(jstate, (Node*)cd->arg);
break;
}
case T_CoerceToDomainValue: {
CoerceToDomainValue* cdv = (CoerceToDomainValue*)node;
APP_JUMB(cdv->typeId);
break;
}
case T_SetToDefault: {
SetToDefault* sd = (SetToDefault*)node;
APP_JUMB(sd->typeId);
break;
}
case T_CurrentOfExpr: {
CurrentOfExpr* ce = (CurrentOfExpr*)node;
APP_JUMB(ce->cvarno);
if (ce->cursor_name) {
APP_JUMB_STRING(ce->cursor_name);
}
APP_JUMB(ce->cursor_param);
break;
}
case T_TargetEntry: {
TargetEntry* tle = (TargetEntry*)node;
APP_JUMB(tle->resno);
APP_JUMB(tle->ressortgroupref);
UniqueSql::JumbleExpr(jstate, (Node*)tle->expr);
break;
}
case T_RangeTblRef: {
RangeTblRef* rtr = (RangeTblRef*)node;
APP_JUMB(rtr->rtindex);
break;
}
case T_JoinExpr: {
JoinExpr* join = (JoinExpr*)node;
APP_JUMB(join->jointype);
APP_JUMB(join->isNatural);
APP_JUMB(join->rtindex);
UniqueSql::JumbleExpr(jstate, join->larg);
UniqueSql::JumbleExpr(jstate, join->rarg);
UniqueSql::JumbleExpr(jstate, join->quals);
break;
}
case T_FromExpr: {
FromExpr* from = (FromExpr*)node;
UniqueSql::JumbleExpr(jstate, (Node*)from->fromlist);
UniqueSql::JumbleExpr(jstate, from->quals);
break;
}
case T_List:
foreach (temp, (List*)node) {
UniqueSql::JumbleExpr(jstate, (Node*)lfirst(temp));
}
break;
case T_IntList:
foreach (temp, (List*)node) {
APP_JUMB(lfirst_int(temp));
}
break;
case T_SortGroupClause: {
SortGroupClause* sgc = (SortGroupClause*)node;
APP_JUMB(sgc->tleSortGroupRef);
APP_JUMB(sgc->eqop);
APP_JUMB(sgc->sortop);
APP_JUMB(sgc->nulls_first);
break;
}
case T_GroupingSet: {
GroupingSet* gsnode = (GroupingSet*)node;
UniqueSql::JumbleExpr(jstate, (Node*)gsnode->content);
break;
}
case T_WindowClause: {
WindowClause* wc = (WindowClause*)node;
APP_JUMB(wc->winref);
APP_JUMB(wc->frameOptions);
UniqueSql::JumbleExpr(jstate, (Node*)wc->partitionClause);
UniqueSql::JumbleExpr(jstate, (Node*)wc->orderClause);
UniqueSql::JumbleExpr(jstate, wc->startOffset);
UniqueSql::JumbleExpr(jstate, wc->endOffset);
break;
}
case T_CommonTableExpr: {
CommonTableExpr* cte = (CommonTableExpr*)node;
APP_JUMB_STRING(cte->ctename);
UniqueSql::JumbleQuery(jstate, (Query*)cte->ctequery);
break;
}
case T_SetOperationStmt: {
SetOperationStmt* setop = (SetOperationStmt*)node;
APP_JUMB(setop->op);
APP_JUMB(setop->all);
UniqueSql::JumbleExpr(jstate, setop->larg);
UniqueSql::JumbleExpr(jstate, setop->rarg);
break;
}
case T_PrefixKey: {
PrefixKey* pkey = (PrefixKey*)node;
UniqueSql::JumbleExpr(jstate, (Node*)pkey->arg);
APP_JUMB(pkey->length);
break;
}
default:
elog(DEBUG1, "unrecognized node type: %d", (int)nodeTag(node));
break;
}
}
* Record location of constant within query string of query tree
* that is currently being walked.
*/
void UniqueSql::RecordConstLocation(pgssJumbleState* jstate, int location)
{
const int DOUBLE_SIZE = 2;
if (location >= 0) {
if (jstate->clocations_count >= jstate->clocations_buf_size) {
jstate->clocations_buf_size *= DOUBLE_SIZE;
jstate->clocations =
(pgssLocationLen*)repalloc(jstate->clocations, jstate->clocations_buf_size * sizeof(pgssLocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
* Generate a normalized version of the query string that will be used to
* represent all similar queries.
*
* Note that the normalized representation may well vary depending on
* just which "equivalent" query is used to create the hashtable entry.
* We assume this is OK.
*
* *query_len_p contains the input string length, and is updated with
* the result string length (which cannot be longer) on exit.
*
* Returns a palloc'd string, which is not necessarily null-terminated.
*/
char* UniqueSql::generate_normalized_query(pgssJumbleState* jstate, const char* query, int* query_len_p, int encoding)
{
char* norm_query = NULL;
int query_len = *query_len_p;
int max_output_len, i, rc;
int len_to_wrt;
int quer_loc = 0;
int n_quer_loc = 0;
int last_off = 0;
int last_tok_len = 0;
* Get constants' lengths (core system only gives us locations). Note
* this also ensures the items are sorted by location.
*/
UniqueSql::fill_in_constant_lengths(jstate, query);
max_output_len = Min(query_len, g_instance.attr.attr_common.pgstat_track_activity_query_size - 1);
norm_query = (char*)palloc(max_output_len);
for (i = 0; i < jstate->clocations_count; i++) {
* off: Offset from start for cur tok
* tok_len: Length (in bytes) of that tok
*/
int off, tok_len;
off = jstate->clocations[i].location;
tok_len = jstate->clocations[i].length;
if (tok_len < 0) {
continue;
}
len_to_wrt = off - last_off;
len_to_wrt -= last_tok_len;
len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
* query string can't be matched(location in Query is bigger
* than query string)
* - delete from plan_table where statement_id='test statement_id',
* for sql 'delete plan_table', transformDeleteStmt method will
* modify Query member.
*/
if (len_to_wrt <= 0) {
break;
}
rc = memcpy_s(norm_query + n_quer_loc, max_output_len - n_quer_loc, query + quer_loc, len_to_wrt);
securec_check(rc, "\0", "\0");
n_quer_loc += len_to_wrt;
if (n_quer_loc < max_output_len) {
norm_query[n_quer_loc++] = '?';
}
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
if (n_quer_loc >= max_output_len) {
break;
}
}
* We've copied up until the last ignorable constant. Copy over the
* remaining bytes of the original query string, or at least as much as
* will fit.
*/
len_to_wrt = query_len - quer_loc;
len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
if (len_to_wrt > 0) {
rc = memcpy_s(norm_query + n_quer_loc, max_output_len - n_quer_loc, query + quer_loc, len_to_wrt);
securec_check(rc, "\0", "\0");
n_quer_loc += len_to_wrt;
}
* If we ran out of space, we need to do an encoding-aware truncation,
* just to make sure we don't have an incomplete character at the end.
*/
if (n_quer_loc >= max_output_len) {
query_len = pg_encoding_mbcliplen(
encoding, norm_query, n_quer_loc, g_instance.attr.attr_common.pgstat_track_activity_query_size - 1);
} else {
query_len = n_quer_loc;
}
*query_len_p = query_len;
return norm_query;
}
* Given a valid SQL string and an array of constant-location records,
* fill in the textual lengths of those constants.
*
* The constants may use any allowed constant syntax, such as float literals,
* bit-strings, single-quoted strings and dollar-quoted strings. This is
* accomplished by using the public API for the core scanner.
*
* It is the caller's job to ensure that the string is a valid SQL statement
* with constants at the indicated locations. Since in practice the string
* has already been parsed, and the locations that the caller provides will
* have originated from within the authoritative parser, this should not be
* a problem.
*
* Duplicate constant pointers are possible, and will have their lengths
* marked as '-1', so that they are later ignored. (Actually, we assume the
* lengths were initialized as -1 to start with, and don't change them here.)
*
* N.B. There is an assumption that a '-' character at a Const location begins
* a negative numeric constant. This precludes there ever being another
* reason for a constant to start with a '-'.
*/
void UniqueSql::fill_in_constant_lengths(pgssJumbleState* jstate, const char* query)
{
pgssLocationLen* locs = NULL;
core_yyscan_t yyscanner;
core_yy_extra_type yyextra;
core_YYSTYPE yylval;
YYLTYPE yylloc;
int last_loc = -1;
int i;
* Sort the records by location so that we can process them in order while
* scanning the query text.
*/
if (jstate->clocations_count > 1) {
qsort(jstate->clocations, jstate->clocations_count, sizeof(pgssLocationLen), UniqueSql::comp_location);
}
locs = jstate->clocations;
yyscanner = scanner_init(query, &yyextra, &ScanKeywords, ScanKeywordTokens);
void* coreYYlex = u_sess->hook_cxt.coreYYlexHook ? u_sess->hook_cxt.coreYYlexHook : (void*)core_yylex;
for (i = 0; i < jstate->clocations_count; i++) {
int loc = locs[i].location;
int tok;
Assert(loc >= 0);
if (loc <= last_loc) {
continue;
}
for (;;) {
tok = ((coreYYlexFunc)coreYYlex)(&yylval, &yylloc, yyscanner);
if (tok == 0) {
break;
}
* We should find the token position exactly, but if we somehow
* run past it, work with that.
*/
if (yylloc >= loc) {
if (query[loc] == '-') {
* It's a negative value - this is the one and only case
* where we replace more than a single token.
*
* Do not compensate for the core system's special-case
* adjustment of location to that of the leading '-'
* operator in the event of a negative constant. It is
* also useful for our purposes to start from the minus
* symbol. In this way, queries like "select * from foo
* where bar = 1" and "select * from foo where bar = -2"
* will have identical normalized query strings.
*/
tok = ((coreYYlexFunc)coreYYlex)(&yylval, &yylloc, yyscanner);
if (tok == 0) {
break;
}
}
* We now rely on the assumption that flex has placed a zero
* byte after the text of the current token in scanbuf.
*/
locs[i].length = strlen(yyextra.scanbuf + loc);
break;
}
}
if (tok == 0) {
break;
}
last_loc = loc;
}
scanner_finish(yyscanner);
}
* comp_location: comparator for qsorting pgssLocationLen structs by location
*/
int UniqueSql::comp_location(const void* a, const void* b)
{
int l = ((const pgssLocationLen*)a)->location;
int r = ((const pgssLocationLen*)b)->location;
if (l < r) {
return -1;
} else if (l > r) {
return +1;
} else {
return 0;
}
}