oGRAC/pkg/src/ogsql/parser/scan.l-代码预览-oGRAC:基于 openGauss 生态的多主关系型数据库项目 - AtomGit

NerifishSupport bison parser default enablement
%{
/* -------------------------------------------------------------------------
 *
 * scan.l
 *      lexical scanner for PostgreSQL
 *
 * NOTE NOTE NOTE:
 *
 * The rules in this file must be kept in sync with psql's lexer!!!
 *
 * The rules are designed so that the scanner never has to backtrack,
 * in the sense that there is always a rule that can match the input
 * consumed so far (the rule action may internally throw back some input
 * with yyless(), however).  As explained in the flex manual, this makes
 * for a useful speed increase --- about a third faster than a plain -CF
 * lexer, in simple testing.  The extra complexity is mostly in the rules
 * for handling float numbers and continued string literals.  If you change
 * the lexical rules, verify that you haven't broken the no-backtrack
 * property by running flex with the "-b" option and checking that the
 * resulting "lex.backup" file says that no backing up is needed.  (As of
 * Postgres 9.2, this check is made automatically by the Makefile.)
 *
 *
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *      src/backend/parser/scan.l
 *
 * -------------------------------------------------------------------------
 */

/* Please note that the following line will be replaced with the contents of given file name even if with starting with a comment */
/*$$include "scan-dialect-prologue-top.l.h"*/

#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>

#include "scanner.h"
#include "scansup.h"
#include "ogsql_self_func.h"

/*$$exclude in dialect begin*/
#define OG_KEYWORD(kwname, value, category) value,

const uint16 ScanKeywordTokens[] = {
#include "kwlist.h"
};

#undef OG_KEYWORD
/*$$exclude in dialect end*/

/*
 * Set the type of YYSTYPE.
 */
#define YYSTYPE core_YYSTYPE

/*
 * define core_yylex for flex >= 2.6
 */
#if FLEX_MAJOR_VERSION >= 2 && FLEX_MINOR_VERSION >= 6
#define YY_DECL int core_yylex \
               (YYSTYPE * yylval_param, YYLTYPE * yylloc_param , yyscan_t yyscanner)
#endif

/*
 * Set the type of yyextra.  All state variables used by the scanner should
 * be in yyextra, *not* statically allocated.
 */
#define YY_EXTRA_TYPE core_yy_extra_type *

#define YY_USER_INIT        \
do {                        \
    yylloc->loc.column = 1; \
    yylloc->loc.line = 1;   \
    yylloc->offset = 0;     \
} while (0)

/*
 * Each call to yylex must set yylloc to the location of the found token
 * (expressed as a byte offset from the start of the input text).
 * When we parse a token that requires multiple lexer rules to process,
 * this should be done in the first such rule, else yylloc will point
 * into the middle of the token.
 */
#define SET_YYLLOC()  update_current_location(yytext, yyextra->scanbuf, yylloc, yyextra->multi_line_sql)

#define COMMENT_NOT_IGNORED()  (yyextra->is_hint_str || yyextra->include_ora_comment)
/*
 * Advance yylloc by the given number of bytes.
 */
#define ADVANCE_YYLLOC(delta)       \
do {                                \
    yylloc->loc.column += (delta);  \
    yylloc->offset += (delta);      \
} while (0)

#define FLEX_MEM_STRDUP(dest, src)                                              \
do {                                                                            \
    size_t len = strlen(src) + 1;                                               \
    if (SECUREC_UNLIKELY(og_yyget_extra(yyscanner)->core_yy_extra.stmt->pl_context == NULL)) {    \
        if (sql_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->context,   \
            len, (void **)&dest) != OG_SUCCESS) {                                   \
            yyerror("alloc mem failed ");                                           \
        }                                                                           \
    } else if (pl_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->pl_context,            \
        len, (void **)&dest) != OG_SUCCESS) {                                   \
        yyerror("alloc mem failed ");                                           \
    }                                                                           \
    errno_t ret = memcpy_s(dest, len, src, len - 1);                            \
    knl_securec_check(ret);                                                     \
    dest[len - 1] = '\0';                                                       \
} while (0)

#define FLEX_STACK_STRDUP(dest, src)                                            \
do {                                                                            \
    size_t len = strlen(src) + 1;                                               \
    if (sql_stack_alloc(og_yyget_extra(yyscanner)->core_yy_extra.stmt,          \
        len, (void **)&dest) != OG_SUCCESS) {                                   \
        yyerror("alloc mem failed ");                                           \
    }                                                                           \
    errno_t ret = memcpy_s(dest, len, src, len - 1);                            \
    knl_securec_check(ret);                                                     \
    dest[len - 1] = '\0';                                                       \
} while (0)

#define startlit()  ( yyextra->literallen = 0 )
static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
static char *litbufdup(core_yyscan_t yyscanner);
static status_t litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner, char **res);
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
static int process_integer_literal(const char *token, YYSTYPE *lval, core_yyscan_t yyscanner);
static status_t process_binary_float_double(char *token, int len, core_yyscan_t yyscanner, YYSTYPE *lval);
static bool is_utf16_surrogate_first(uint32 c);
static bool is_utf16_surrogate_second(uint32 c);
static uint32 surrogate_pair_to_codepoint(uint32 first, uint32 second);
static unsigned char* unicode_to_utf8(uint32 c, unsigned char* utf8string);
static status_t addunicode(uint32 c, yyscan_t yyscanner);
static int pg_mblen(const char* mbstr);
status_t handle_float_overflow(char *token, core_yyscan_t yyscanner, char **result);
static void update_current_location(char *cur_pos, char *origin_str, lex_location_t *loc, bool8 multi_line_sql);
static void flex_alloc_mem(void *context, yy_size_t size, void **buf);
static bool32 need_ignore_for_func(sql_stmt_t *stmt, char* ident, int len);

#define yyerror(msg)                    \
do {                                    \
    scanner_yyerror(msg, yyscanner);    \
    return LEX_ERROR_TOKEN;             \
} while (0)

/*
 * Work around a bug in flex 2.5.35: it emits a couple of functions that
 * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
 * this would cause warnings.  Providing our own declarations should be
 * harmless even when the bug gets fixed.
 */
extern int    core_yyget_column(yyscan_t yyscanner);
extern void core_yyset_column(int column_no, yyscan_t yyscanner);

/* Some extensions, e.g., shark,
 * may include original PG lexer in addition to their own lexer.
 * This macro allows to add additional logics only at the beginning of PG lexer.
 * So you can undef the macro in your own lexer-related extension
 * to exclude the PG-specific logic.
 */
#define CT_YYLEX

/* Please note that the following line will be replaced with the contents of given file name even if with starting with a comment */
/*$$include "scan-dialect-prologue.l.h"*/

%}

%option reentrant
%option bison-bridge
%option bison-locations
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option warn
%option prefix="core_yy"

/*
 * OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
 *
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
 *  <xdolq> $foo$ quoted strings
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
 *  <xeu> Unicode surrogate pair in extended quoted string
 */

%x xc
%x xd
%x xh
%x xe
%x xq
%x xdolq
%x xbq
%x xui
%x xus
%x xeu

/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  Comments that start with -- and extend to the next
 * newline are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
 * to agree, and see also the plpgsql lexer.
 */

space            [ \t\n\r\f]
horiz_space        [ \t\f]
newline            [\n\r]
non_newline        [^\n\r]
comment            ("--"{non_newline}*)

whitespace        ({space}+|{comment})
whitespace_only    ({space}+)

/*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

special_whitespace        ({space}+|{comment}{newline})
horiz_whitespace        ({horiz_space}|{comment})
whitespace_with_newline    ({horiz_whitespace}*{newline}{special_whitespace}*)

/*
 * To ensure that {quotecontinue} can be scanned without having to back up
 * if the full pattern isn't matched, we include trailing whitespace in
 * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 * except for {quote} followed by whitespace and just one "-" (not two,
 * which would start a {comment}).  To cover that we have {quotefail}.
 * The actions for {quotestop} and {quotefail} must throw back characters
 * beyond the quote proper.
 */
quote            '
quotestop        {quote}{whitespace}*
quotecontinue    {quote}{whitespace_with_newline}{quote}
quotefail        {quote}{whitespace}*"-"

/* National character */
xnstart            [nN]{quote}

/* Quoted string that allows backslash escapes */
xestart            [eE]{quote}
xeinside        [^\\']+
xeescape        [\\][^0-7]
xeoctesc        [\\][0-7]{1,3}
xehexesc        [\\]x[0-9A-Fa-f]{1,2}
xeunicode        [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodefail    [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})

/* Extended quote
 * xqdouble implements embedded quote, ''''
 */
xqstart            {quote}
xqdouble        {quote}{quote}
xqinside        [^']+

/* Hexadecimal number */
xhstart			X{quote}
xhinside		[^']*
/* Hexadecimal number, start with 0x */
xhnumber		0x[0-9A-Fa-f]+

/* $foo$ style quotes ("dollar quoting")
 * The quoted string starts with $foo$ where "foo" is an optional string
 * in the form of an identifier, except that it may not contain "$",
 * and extends to the first occurrence of an identical string.
 * There is *no* processing of the quoted text.
 *
 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 * fails to match its trailing "$".
 */
dolq_start        [A-Za-z\200-\377_]
dolq_cont        [A-Za-z\200-\377_0-9]
dolqdelim        \$({dolq_start}{dolq_cont}*)?\$
dolqfailed        \${dolq_start}{dolq_cont}*
dolqinside        [^$]+

/* Double quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote            \"
xdstart            {dquote}
xdstop            {dquote}
xddouble        {dquote}{dquote}
xdinside        [^"]+

/*
 * backquote quote
 * Allows embedded spaces and other special characters into identifiers.
 */
bquote			\`
xbqstart		{bquote}
xbqstop			{bquote}
xbqinside		[^`]+

/* Unicode escapes */
uescape            [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
/* error rule to avoid backup */
uescapefail        ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])

/* Quoted identifier with Unicode escapes */
xuistart        [uU]&{dquote}
xuistop1        {dquote}{whitespace}*{uescapefail}?
xuistop2        {dquote}{whitespace}*{uescape}

/* Quoted string with Unicode escapes */
xusstart        [uU]&{quote}
xusstop1        {quote}{whitespace}*{uescapefail}?
xusstop2        {quote}{whitespace}*{uescape}

/* error rule to avoid backup */
xufailed        [uU]&


/* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
 * a longer match --- remember lex will prefer a longer match!  Also, if we
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
 * The solution is two-fold:
 * 1. append {op_chars}* to xcstart so that it matches as much text as
 *    {operator} would. Then the tie-breaker (first matching rule of same
 *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *    in case it contains a star-slash that should terminate the comment.
 * 2. In the operator rule, check for slash-star within the operator, and
 *    if found throw it back with yyless().  This handles the plus-slash-star
 *    problem.
 * Dash-dash comments have similar interactions with the operator rule.
 */
xcstart            \/\*{op_chars}*
xcstop            \*+\/
xcinside        [^*/]+

digit            [0-9]
ident_start        [A-Za-z\200-\377_\#]
ident_cont        [A-Za-z\200-\377_0-9\$\#]

identifier        {ident_start}{ident_cont}*

typecast        "::"
plus_join        "(+)"
dot_dot            \.\.
colon_equals    ":="
para_equals    "=>"

set_ident_start    "@@"
set_ident_cont    [A-Za-z\200-\377_0-9\$\#]
set_identifier    {set_ident_start}{set_ident_cont}*

/*
 * "self" is the set of chars that should be returned as single-character
 * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=\@\&\|]
op_chars        [\~\!\#\^\&\|\?\+\-\*\/\%\<\>\=\@]
operator        {op_chars}+

/* we no longer allow unary minus in numbers.
 * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999
 *
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */

integer            {digit}+
decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail        {digit}+\.\.
decimalf        ({integer}|{decimal})[fF]
decimald        ({integer}|{decimal})[dD]
real            ({integer}|{decimal})[Ee][-+]?{digit}+
realbadexp      ({real})[Ee]
realfail1        ({integer}|{decimal})[Ee]
realfail2        ({integer}|{decimal})[Ee][-+]
realf           ({real})[fF]
reald           ({real})[dD]

size_b           ({integer})[bB]
size_kb          ({integer})[kK]
size_mb          ({integer})[mM]
size_gb          ({integer})[gG]
size_tb          ({integer})[tT]
size_pb          ({integer})[pP]
size_eb          ({integer})[eE]

param            \${integer}

newParam        :({identifier}|{integer})

newArray        :({identifier}|{integer}){space}*\]

set_user_cont            [A-Za-z\377_0-9\$\.]
set_user_escape_quote        [^']
set_user_escape_dquote        [^"]
set_user_escape_bquote        [^`]    
setUserIdentifier        @(({set_user_cont}+)|(\'{set_user_escape_quote}+\')|(\"{set_user_escape_dquote}+\")|(\`{set_user_escape_bquote}+\`))

other            .

/*
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string.
 * Note that xcstart must appear before operator, as explained above!
 *  Also whitespace (comment) must appear before operator.
 */

/* Please note that the following line will be replaced with the contents of given file name even if with starting with a comment */
/*$$include "scan-dialect-decl.l"*/

%%

 /* Please note that the following line will be replaced with the contents of given file name even if with starting with a comment */
 /*$$include "scan-dialect-rule.l"*/

{whitespace_only}    {
                    /* ignore */
                }

{comment}        {
                    if (yyextra->include_ora_comment)
                    {
                        SET_YYLLOC();
                        addlit(yytext, yyleng, yyscanner);
                        yylval->str = litbufdup(yyscanner);
                        return COMMENTSTRING;
                    }
                    /* ignore */
                }

{xcstart}    {
                    /* Set location in case of syntax error in comment */
                    SET_YYLLOC();
                    yyextra->xcdepth = 0;
                    BEGIN(xc);
                    /* Put back any characters past slash-star; see above */
                    yyless(2);
                    if (COMMENT_NOT_IGNORED())
                    {
                        startlit();
                        addlit(yytext, yyleng, yyscanner);
                    }
        }

<xc>{xcstart}    {
                    (yyextra->xcdepth)++;
                    /* Put back any characters past slash-star; see above */
                    yyless(2);
                    if (COMMENT_NOT_IGNORED())
                    {
                        addlit(yytext, yyleng, yyscanner);
                    }
        }

<xc>{xcstop}    {
                    if (yyextra->xcdepth <= 0)
                        BEGIN(INITIAL);
                    else
                        (yyextra->xcdepth)--;

                    if (COMMENT_NOT_IGNORED())
                    {    
                        addlit(yytext, yyleng, yyscanner);
                        yylval->str = litbufdup(yyscanner);
                        yyleng = yyextra->literallen;
                        yyextra->is_hint_str = false;
                        return COMMENTSTRING;
                    }
        }

<xc>{xcinside}    {
                    if (COMMENT_NOT_IGNORED())
                    {
                        addlit(yytext, yyleng, yyscanner);
                    }
        }

<xc>{op_chars}    {
                    if (COMMENT_NOT_IGNORED())
                    {
                        addlit(yytext, yyleng, yyscanner);
                    }
        }

<xc>\*+        {
                    if (COMMENT_NOT_IGNORED())
                    {
                        addlit(yytext, yyleng, yyscanner);
                    }
        }

<xc><<EOF>>        { yyerror("unterminated /* comment"); return 0;}

{xnstart}        {
                    /* National character.
                     * We will pass this along as a normal character string,
                     * but preceded with an internally-generated "NCHAR".
                     */
                    int kwnum;

                    SET_YYLLOC();
                    yyless(1);                /* eat only 'n' this time */

                    kwnum = ScanKeywordLookup("nchar",
                                              yyextra->keywordlist);
                    if (kwnum >= 0)
                    {
                        yyextra->is_hint_str = false;
                        yylval->keyword = GetScanKeyword(kwnum, yyextra->keywordlist);
                        return yyextra->keyword_tokens[kwnum];
                    }
                    else
                    {
                        /* If NCHAR isn't a keyword, just return "n" */
                        FLEX_MEM_STRDUP(yylval->str, "n");
                        yyextra->ident_quoted = false;
                        yyextra->is_hint_str = false;
                        yyextra->origin_str = NULL;
                        return IDENT;
                    }
                }

{xqstart}        {
                    yyextra->warn_on_first_escape = true;
                    yyextra->saw_non_ascii = false;
                    SET_YYLLOC();
                    BEGIN(xq);
                    startlit();
                }
{xestart}        {
                    yyextra->warn_on_first_escape = false;
                    yyextra->saw_non_ascii = false;
                    SET_YYLLOC();
                    BEGIN(xe);
                    startlit();
                }
{xusstart}        {
                    SET_YYLLOC();
                    BEGIN(xus);
                    startlit();
                }
<xq,xe>{quotestop}    |
<xq,xe>{quotefail} {
                    yyless(1);
                    BEGIN(INITIAL);
                    /*
                     * check that the data remains valid if it might have been
                     * made invalid by unescaping any chars.
                     */
                    yylval->str = litbufdup(yyscanner);
                    yyleng = yyextra->literallen + 2;
                    yyextra->is_hint_str = false;
                    return SCONST;
                }
<xus>{xusstop1} {
                    /* throw back all but the quote */
                    yyless(1);
                    BEGIN(INITIAL);
                    if (litbuf_udeescape('\\', yyscanner, &yylval->str) != OG_SUCCESS) {
                        yyerror("invalid Unicode character");
                    }
                    yyleng = yyextra->literallen + 3 + yyleng;
                    yyextra->is_hint_str = false;
                    return SCONST;
        }
<xus>{xusstop2} {
                    BEGIN(INITIAL);
                    if (litbuf_udeescape(yytext[yyleng-2], yyscanner, &yylval->str) != OG_SUCCESS) {
                        yyerror("invalid Unicode character");
                    }
                    yyleng = yyextra->literallen + 3 + yyleng;
                    yyextra->is_hint_str = false;
                    return SCONST;
        }
<xq,xe,xus>{xqdouble} {
                    addlitchar('\'', yyscanner);
                }
<xq,xus>{xqinside}  {
                    addlit(yytext, yyleng, yyscanner);
                }
<xe>{xeinside}  {
                    addlit(yytext, yyleng, yyscanner);
                }
<xe>{xeunicode} {
                    uint32 c = strtoul(yytext+2, NULL, 16);

                    if (is_utf16_surrogate_first(c))
                    {
                        yyextra->utf16_first_part = c;
                        BEGIN(xeu);
                    } else if (is_utf16_surrogate_second(c)) {
                        yyerror("invalid Unicode surrogate pair");
                    } else {
                        if (addunicode(c, yyscanner) != OG_SUCCESS) {
                            yyerror("invalid Unicode escape value");
                        }
                    }
                }
<xeu>{xeunicode} {
                    uint32 c = strtoul(yytext+2, NULL, 16);

                    if (!is_utf16_surrogate_second(c))
                        yyerror("invalid Unicode surrogate pair");

                    c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);

                    if (addunicode(c, yyscanner) != OG_SUCCESS) {
                        yyerror("invalid Unicode escape value");
                    }

                    BEGIN(xe);
                }
<xeu>.            { yyerror("invalid Unicode surrogate pair"); }
<xeu>\n            { yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>>    { yyerror("invalid Unicode surrogate pair"); }
<xe,xeu>{xeunicodefail}    {
                }
<xe>{xeescape}  {
                    addlitchar(unescape_single_char(yytext[1], yyscanner),
                               yyscanner);
                }
<xe>{xeoctesc}  {
                    unsigned char c = strtoul(yytext+1, NULL, 8);

                    addlitchar(c, yyscanner);
                    if (c == '\0' || IS_HIGHBIT_SET(c))
                        yyextra->saw_non_ascii = true;
                }
<xe>{xehexesc}  {
                    unsigned char c = strtoul(yytext+2, NULL, 16);

                    addlitchar(c, yyscanner);
                    if (c == '\0' || IS_HIGHBIT_SET(c))
                        yyextra->saw_non_ascii = true;
                }
<xq,xe,xus>{quotecontinue} {
                    /* ignore */
                }
<xe>.            {
                    /* This is only needed for \ just before EOF */
                    addlitchar(yytext[0], yyscanner);
                }
<xq,xe,xus><<EOF>>        { yyerror("unterminated quoted string"); return 0;}

<xh>{xhinside}  {
                    addlit(yytext, yyleng, yyscanner);
                }
<xh>{quotecontinue} {
                        /* ignore */
                    }
{xhstart}   {
                SET_YYLLOC();
                BEGIN(xh);
                startlit();
                addlit(yytext, yyleng, yyscanner);
            }
<xh>{quotestop} |
<xh>{quotefail} {
                    BEGIN(INITIAL);
                    addlitchar('\'', yyscanner);
                    yylval->str = litbufdup(yyscanner);
                    yyextra->is_hint_str = false;
                    return XCONST;
                }
<xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); return 0;}
{dolqdelim}        {
                    SET_YYLLOC();
                    FLEX_MEM_STRDUP(yyextra->dolqstart, yytext);
                    BEGIN(xdolq);
                    startlit();
                }
{dolqfailed}    {
                    SET_YYLLOC();
                    /* throw back all but the initial "$" */
                    yyless(1);
                    /* and treat it as {other} */
                    yyextra->is_hint_str = false;
                    return yytext[0];
                }
<xdolq>{dolqdelim} {
                    if (strcmp(yytext, yyextra->dolqstart) == 0)
                    {
                        free(yyextra->dolqstart);
                        yyextra->dolqstart = NULL;
                        BEGIN(INITIAL);
                        yylval->str = litbufdup(yyscanner);
                        yyextra->is_hint_str = false;
                        return SCONST;
                    }
                    else
                    {
                        /*
                         * When we fail to match $...$ to dolqstart, transfer
                         * the $... part to the output, but put back the final
                         * $ for rescanning.  Consider $delim$...$junk$delim$
                         */
                        addlit(yytext, yyleng-1, yyscanner);
                        yyless(yyleng-1);
                    }
                }
<xdolq>{dolqinside} {
                    addlit(yytext, yyleng, yyscanner);
                }
<xdolq>{dolqfailed} {
                    addlit(yytext, yyleng, yyscanner);
                }
<xdolq>.        {
                    /* This is only needed for $ inside the quoted text */
                    addlitchar(yytext[0], yyscanner);
                }
<xdolq><<EOF>>    { yyerror("unterminated dollar-quoted string"); return 0;}

{xdstart}        {
                    SET_YYLLOC();
                    BEGIN(xd);
                    startlit();
                }
{xuistart}        {
                    SET_YYLLOC();
                    BEGIN(xui);
                    startlit();
                }
{xbqstart}      {
                    SET_YYLLOC();
                    BEGIN(xbq);
                    startlit();
                }
<xd>{xdstop}    {
                    char           *ident;

                    BEGIN(INITIAL);
                    if (yyextra->literallen == 0)
                        yyerror("zero-length delimited identifier");
                    ident = litbufdup(yyscanner);
                    yylval->str = ident;
                    yyleng = yyextra->literallen + 2;
                    yyextra->ident_quoted = true;
                    yyextra->is_hint_str = false;
                    yyextra->origin_str = NULL;
                    return IDENT;
                }
<xui>{xuistop1}    {
                    char           *ident;

                    BEGIN(INITIAL);
                    if (yyextra->literallen == 0)
                        yyerror("zero-length delimited identifier");
                    if (litbuf_udeescape('\\', yyscanner, &ident) != OG_SUCCESS) {
                        yyerror("invalid Unicode character");
                    }
                    yylval->str = ident;
                    /* throw back all but the quote */
                    yyless(1);
                    yyleng = yyextra->literallen + 3 + yyleng;
                    yyextra->ident_quoted = false;
                    yyextra->is_hint_str = false;
                    yyextra->origin_str = NULL;
                    return IDENT;
                }
<xui>{xuistop2}    {
                    char           *ident;

                    BEGIN(INITIAL);
                    if (yyextra->literallen == 0)
                        yyerror("zero-length delimited identifier");
                    if (litbuf_udeescape(yytext[yyleng - 2], yyscanner, &ident) != OG_SUCCESS) {
                        yyerror("invalid Unicode character");
                    }
                    yylval->str = ident;
                    yyleng = yyextra->literallen + 3 + yyleng;
                    yyextra->ident_quoted = false;
                    yyextra->is_hint_str = false;
                    yyextra->origin_str = NULL;
                    return IDENT;
                }
<xbq>{xbqstop}  {
                BEGIN(INITIAL);
                if (yyextra->literallen == 0)
                    yyerror("zero-length delimited identifier");
                char *ident = litbufdup(yyscanner);
                yylval->str = ident;
                yyextra->ident_quoted = true;
                yyextra->is_hint_str = false;
                return IDENT;
            }
<xd,xui>{xddouble}    {
                    addlitchar('"', yyscanner);
                }
<xd,xui>{xdinside}    {
                    addlit(yytext, yyleng, yyscanner);
                }
<xbq>{xbqinside}    {
                        addlit(yytext, yyleng, yyscanner);
                    }
<xd,xui><<EOF>>        { yyerror("unterminated quoted identifier"); return 0;}

{xufailed}    {
                    char           *ident;

                    SET_YYLLOC();
                    /* throw back all but the initial u/U */
                    yyless(1);
                    /* and treat it as {identifier} */
                    ident = downcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
                    yylval->str = ident;
                    yyextra->ident_quoted = false;
                    yyextra->is_hint_str = false;
                    yyextra->origin_str = NULL;
                    return IDENT;
                }

{typecast}        {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return TYPECAST;
                }

{plus_join}    {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return ORA_JOINOP;
                }

{dot_dot}        {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return DOT_DOT;
                }

{colon_equals}    {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return COLON_EQUALS;
                }

{para_equals}    {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return PARA_EQUALS;
                }

{self}            {
                    SET_YYLLOC();
                    /*
                     * Get the semicolon which is not in proc body nor in the '( )', treat it
                     * as end flag of a single query and store it in locationlist.
                     */
                    if (yyextra->dolqstart == NULL)
                    {
                        if (yytext[0] == '(')
                            yyextra->paren_depth++;
                        else if (yytext[0] == ')' && yyextra->paren_depth > 0)
                            yyextra->paren_depth--;
                    }
                    yyextra->is_hint_str = false;
                    return yytext[0];
                }

{operator}        {
                    /*
                     * Check for embedded slash-star or dash-dash; those
                     * are comment starts, so operator must stop there.
                     * Note that slash-star or dash-dash at the first
                     * character will match a prior rule, not this one.
                     */
                    int        nchars = yyleng;
                    char   *slashstar = strstr(yytext, "/*");
                    char   *dashdash = strstr(yytext, "--");

                    if (slashstar && dashdash)
                    {
                        /* if both appear, take the first one */
                        if (slashstar > dashdash)
                            slashstar = dashdash;
                    }
                    else if (!slashstar)
                        slashstar = dashdash;
                    if (slashstar)
                        nchars = slashstar - yytext;

                    char *qMark = strstr(yytext, "?");
                    if (qMark != NULL) {
                        if (nchars == 1) {
                            /* single ?, treat as param */
                            SET_YYLLOC();
                            yylval->ival = 1;
                            yyextra->is_hint_str = false;
                            return PARAM;
                        }

                        /* multibyte, get ? postition */
                        nchars = qMark - yytext;
                        if (nchars == 0) {
                            /* begin with ?, strip all char except fist ?, treat first ? as param */
                            yyless(1);
                            SET_YYLLOC();
                            yylval->ival = 1;
                            yyextra->is_hint_str = false;
                            return PARAM;
                        }
                        /*
                         * else not begin with ?, strip all char after ?, should do yyless(nchars).
                         * since it will be done in 'if (nchars < (int)yyleng)', so nothing to do here.
                         */
                    }

                    /*
                     * For SQL compatibility, '+' and '-' cannot be the
                     * last char of a multi-char operator unless the operator
                     * contains chars that are not in SQL operators.
                     * The idea is to lex '=-' as two operators, but not
                     * to forbid operator names like '?-' that could not be
                     * sequences of SQL operators.
                     */
                    while (nchars > 1 &&
                           (yytext[nchars-1] == '+' ||
                            yytext[nchars-1] == '-'))
                    {
                        if (nchars == 3 && yytext[0] == '|' && yytext[1] == '|') {
                            nchars--;
                            break;
                        }
                        int        ic;

                        for (ic = nchars-2; ic >= 0; ic--)
                        {
                            if (strchr("~!#^&|?%", yytext[ic]))
                                break;
                        }
                        if (ic >= 0)
                            break; /* found a char that makes it OK */
                        nchars--; /* else remove the +/-, and check again */
                    }

                    SET_YYLLOC();

                    if (nchars < (int)yyleng)
                    {
                        /* Strip the unwanted chars from the token */
                        yyless(nchars);
                        /*
                         * If what we have left is only one char, and it's
                         * one of the characters matching "self", then
                         * return it as a character token the same way
                         * that the "self" rule would have.
                         */
                        if (nchars == 1 &&
                            strchr(",()[].;:+-*/%^<>=@|&", yytext[0]))
                        {
                            yyextra->is_hint_str = false;
                            return yytext[0];
                        }
                    }

                    /*
                     * Complain if operator is too long.  Unlike the case
                     * for identifiers, we make this an error not a notice-
                     * and-truncate, because the odds are we are looking at
                     * a syntactic mistake anyway.
                     */
                    if (nchars >= NAMEDATALEN)
                        yyerror("operator too long");

                    /* Convert "!=" operator to "<>" for compatibility */
                    if (strcmp(yytext, "!=") == 0 || strcmp(yytext, "^=") == 0)
                    {
                        FLEX_MEM_STRDUP(yylval->str, "<>");
                        yyextra->is_hint_str = false;
                        return CmpOp;
                    }
                    else if (strcmp(yytext, ">=") == 0 || strcmp(yytext, "<=") == 0 || strcmp(yytext, "<>") == 0)
                    {
                        FLEX_MEM_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return CmpOp;
                    }
                    else if (strcmp(yytext, "||") == 0)
                    {
                        FLEX_MEM_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return OPER_CAT;
                    }
                    else if (strcmp(yytext, "<<") == 0)
                    {
                        FLEX_MEM_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return OPER_LSHIFT;
                    }
                    else if (strcmp(yytext, ">>") == 0)
                    {
                        FLEX_MEM_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return OPER_RSHIFT;
                    }
                    else
                    {
                        yyerror("invalid operator");
                    }
                }
{newArray}        {
                    yyless(1);
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return yytext[0];
                }
{param}            {
                    SET_YYLLOC();
                    yylval->ival = strlen(yytext);
                    yyextra->is_hint_str = false;
                    return PARAM;
                }
{newParam}        {
                    SET_YYLLOC();
                    yylval->ival = strlen(yytext);
                    yyextra->is_hint_str = false;
                    return PARAM;
                }
{xhnumber}  {
                SET_YYLLOC();
                yyextra->is_hint_str = false;
                FLEX_MEM_STRDUP(yylval->str, yytext);
                return XCONST;
            }
{integer}        {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return process_integer_literal(yytext, yylval, yyscanner);
                }
{decimal}        {
                    SET_YYLLOC();
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return FCONST;
                }
{decimalfail}    {
                    /* throw back the .., and treat as integer */
                    yyless(yyleng-2);
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return process_integer_literal(yytext, yylval, yyscanner);
                }
{decimalf}      {
                    SET_YYLLOC();
                    if (process_binary_float_double(yytext, yyleng, yyscanner, yylval) != OG_SUCCESS) {
                        yyerror("number overflow");
                    }
                    return FCONST_F;
                }
{realf}         {
                    SET_YYLLOC();
                    if (process_binary_float_double(yytext, yyleng, yyscanner, yylval) != OG_SUCCESS) {
                        yyerror("number overflow");
                    }
                    return FCONST_F;
                }
{decimald}      {
                    SET_YYLLOC();
                    if (process_binary_float_double(yytext, yyleng, yyscanner, yylval) != OG_SUCCESS) {
                        yyerror("number overflow");
                    }
                    return FCONST_D;
                }
{reald}         {
                    SET_YYLLOC();
                    if (process_binary_float_double(yytext, yyleng, yyscanner, yylval) != OG_SUCCESS) {
                        yyerror("number overflow");
                    }
                    return FCONST_D;
                }
{real}            {
                    SET_YYLLOC();
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return FCONST;
                }
{realbadexp}      {
                    SET_YYLLOC();
                    OG_SRC_THROW_ERROR(yylloc->loc, ERR_INVALID_NUMBER, "");
                    return LEX_ERROR_TOKEN;
                }
{realfail1}        {
                    /*
                     * throw back the [Ee], and treat as {decimal}.  Note
                     * that it is possible the input is actually {integer},
                     * but since this case will almost certainly lead to a
                     * syntax error anyway, we don't bother to distinguish.
                     */
                    yyless(yyleng-1);
                    SET_YYLLOC();
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return FCONST;
                }
{realfail2}        {
                    /* throw back the [Ee][+-], and proceed as above */
                    yyless(yyleng-2);
                    SET_YYLLOC();
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return FCONST;
                }
{size_b}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_B;
                }
{size_kb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_KB;
                }
{size_mb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_MB;
                }
{size_gb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_GB;
                }
{size_tb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_TB;
                }
{size_pb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_PB;
                }
{size_eb}            {
                        SET_YYLLOC();
                        FLEX_STACK_STRDUP(yylval->str, yytext);
                        yyextra->is_hint_str = false;
                        return SIZE_EB;
                }

{set_identifier} {
                    SET_YYLLOC();
                    yyless(2);
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return Op;
                }

{identifier}    {
                    int kwnum;
                    char *ident;

                    SET_YYLLOC();

                    if (yyleng == (int)(sizeof("connect_by_root") - 1) &&
                        cm_strcmpni(yytext, "connect_by_root", sizeof("connect_by_root") - 1) == 0) {
                        yyextra->is_hint_str = false;
                        yylval->keyword = "connect_by_root";
                        return CONNECT_BY_ROOT;
                    }

                    /* Is it a keyword? */
                    kwnum = ScanKeywordLookup(yytext, yyextra->keywordlist);

                    yyextra->is_hint_str = false;
                    if (kwnum >= 0 && !need_ignore_for_func(yyextra->stmt, yytext, yyleng))
                    {
                        yylval->keyword = GetScanKeyword(kwnum, yyextra->keywordlist);
                        uint16 token = yyextra->keyword_tokens[kwnum];

                        /* Find the CREATE PROCEDURE syntax and set dolqstart. */
                        if (token == CREATE)
                        {
                            yyextra->is_createstmt = true;
                        }
                        else if (token == TRIGGER && yyextra->is_createstmt)
                        {
                            /* Create trigger don't need set dolqstart */
                            yyextra->is_createstmt = false;
                        }
                        if (token == SELECT || token == UPDATE || token == INSERT || token == DELETE_P ||
                            token == MERGE)
                        {
                            yyextra->is_hint_str = true;
                        }

                        return token;
                    }

                    /*
                     * No.  Convert the identifier to upper case, and truncate
                     * if necessary.
                     */
                    ident = upcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
                    /* Is it _charset? */
                    yylval->str = ident;
                    yyextra->ident_quoted = false;
                    FLEX_MEM_STRDUP(yyextra->origin_str, yytext);
                    return IDENT;
                }

{setUserIdentifier}    {
                    SET_YYLLOC();
                    yyless(1);
                    FLEX_MEM_STRDUP(yylval->str, yytext);
                    yyextra->is_hint_str = false;
                    return yytext[0];
                }

{other}            {
                    SET_YYLLOC();
                    yyextra->is_hint_str = false;
                    return yytext[0];
                }

<<EOF>>            {
                    SET_YYLLOC();
                    yyterminate();
                }

%%

/*
 * Arrange access to yyextra for subroutines of the main yylex() function.
 * We expect each subroutine to have a yyscanner parameter.  Rather than
 * use the yyget_xxx functions, which might or might not get inlined by the
 * compiler, we cheat just a bit and cast yyscanner to the right type.
 */
#undef yyextra
#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)

/* Likewise for a couple of other things we need. */
#undef yylloc
#define yylloc  (((struct yyguts_t *) yyscanner)->yylloc_r)
#undef yyleng
#define yyleng  (((struct yyguts_t *) yyscanner)->yyleng_r)


/* Please note that the following line will be replaced with the contents of given file name even if with starting with a comment */
/*$$include "scan-dialect-epilogue.l.c"*/

/*$$exclude in dialect begin*/
static bool32 scanner_is_error_word_char(char ch)
{
    return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') ||
        ch == '_' || ch == '$' || ch == '#');
}

static bool32 scanner_token_text(core_yyscan_t yyscanner, int offset, text_t *word)
{
    uint32 end;

    if (word == NULL || offset < 0 || offset >= (int)yyextra->scanbuflen) {
        return OG_FALSE;
    }

    end = (uint32)offset;
    while (end < yyextra->scanbuflen && yyextra->scanbuf[end] != '\0' && yyextra->scanbuf[end] != ' ' &&
        yyextra->scanbuf[end] != '\t' && yyextra->scanbuf[end] != '\r' && yyextra->scanbuf[end] != '\n' &&
        yyextra->scanbuf[end] != ';' && yyextra->scanbuf[end] != '(' && yyextra->scanbuf[end] != ')' &&
        yyextra->scanbuf[end] != ',') {
        end++;
    }

    word->str = yyextra->scanbuf + offset;
    word->len = end - (uint32)offset;
    return word->len > 0 ? OG_TRUE : OG_FALSE;
}

static bool32 scanner_prev_word(core_yyscan_t yyscanner, int offset, text_t *word)
{
    int pos;
    int end;

    if (word == NULL || offset <= 0 || offset > (int)yyextra->scanbuflen) {
        return OG_FALSE;
    }

    pos = offset - 1;
    while (pos >= 0 && (yyextra->scanbuf[pos] == ' ' || yyextra->scanbuf[pos] == '\t' ||
        yyextra->scanbuf[pos] == '\r' || yyextra->scanbuf[pos] == '\n')) {
        pos--;
    }
    if (pos < 0 || !scanner_is_error_word_char(yyextra->scanbuf[pos])) {
        return OG_FALSE;
    }

    end = pos + 1;
    while (pos >= 0 && scanner_is_error_word_char(yyextra->scanbuf[pos])) {
        pos--;
    }
    word->str = yyextra->scanbuf + pos + 1;
    word->len = (uint32)(end - pos - 1);
    return OG_TRUE;
}

static bool32 scanner_has_case_word_before(core_yyscan_t yyscanner, int offset)
{
    text_t word;
    uint32 scan_end;

    if (offset <= 0) {
        return OG_FALSE;
    }

    scan_end = (offset < (int)yyextra->scanbuflen) ? (uint32)offset : yyextra->scanbuflen;
    for (uint32 pos = 0; pos + 4 <= scan_end; pos++) {
        if ((pos > 0 && scanner_is_error_word_char(yyextra->scanbuf[pos - 1])) ||
            (pos + 4 < yyextra->scanbuflen && scanner_is_error_word_char(yyextra->scanbuf[pos + 4]))) {
            continue;
        }
        word.str = yyextra->scanbuf + pos;
        word.len = 4;
        if (cm_text_str_equal_ins(&word, "CASE")) {
            return OG_TRUE;
        }
    }
    return OG_FALSE;
}

static const char *scanner_case_bad_word(text_t *word)
{
    if (cm_text_str_equal_ins(word, "ELSE")) {
        return "ELSE";
    }
    if (cm_text_str_equal_ins(word, "WHEN")) {
        return "WHEN";
    }
    if (cm_text_str_equal_ins(word, "END")) {
        return "END";
    }
    return NULL;
}

static bool32 scanner_try_report_case_end_extra(const char *message, core_yyscan_t yyscanner)
{
    text_t curr_word;
    text_t prev_word;
    const char *bad_word = NULL;

    if (message == NULL || strcmp(message, "syntax error") != 0) {
        return OG_FALSE;
    }
    if (!scanner_token_text(yyscanner, yylloc->offset, &curr_word)) {
        return OG_FALSE;
    }

    bad_word = scanner_case_bad_word(&curr_word);
    if (bad_word == NULL) {
        return OG_FALSE;
    }

    if (!scanner_prev_word(yyscanner, yylloc->offset, &prev_word) ||
        !cm_text_str_equal_ins(&prev_word, "END") ||
        !scanner_has_case_word_before(yyscanner, yylloc->offset)) {
        return OG_FALSE;
    }

    OG_SRC_THROW_ERROR_EX(yylloc->loc, ERR_SQL_SYNTAX_ERROR, "the word \"%s\" is not correct", bad_word);
    return OG_TRUE;
}

/*
 * scanner_yyerror
 *        Report a lexer or grammar error.
 *
 * The message's cursor position is whatever YYLLOC was last set to,
 * ie, the start of the current token if called within yylex(), or the
 * most recently lexed token if called from the grammar.
 * This is OK for syntax error messages from the Bison parser, because Bison
 * parsers report error as soon as the first unparsable token is reached.
 * Beware of using yyerror for other purposes, as the cursor position might
 * be misleading!
 */
void
scanner_yyerror(const char *message, core_yyscan_t yyscanner)
{
    if (scanner_try_report_case_end_extra(message, yyscanner)) {
        return;
    }
    OG_SRC_THROW_ERROR(yylloc->loc, ERR_SQL_SYNTAX_ERROR, message);
    return;
}
/*$$exclude in dialect end*/

/*
 * Called before any actual parsing is done
 */
static int ct_yylex_init(yyscan_t* ptr_yy_globals, sql_stmt_t *stmt)
{
    if (ptr_yy_globals == NULL){
        errno = EINVAL;
        return 1;
    }

    flex_alloc_mem(stmt->context, sizeof(struct yyguts_t), (void **)ptr_yy_globals);

    if (*ptr_yy_globals == NULL){
        errno = ENOMEM;
        return 1;
    }

    /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
    memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));

    return yy_init_globals(*ptr_yy_globals);
}

/*$$exclude in dialect begin*/
core_yyscan_t
scanner_init(const sql_text_t *sql,
             core_yy_extra_type *yyext,
             const ScanKeywordList *keywordlist,
             const uint16 *keyword_tokens,
             sql_stmt_t *stmt)
{
    size_t        slen = sql->len;
    const char *str = sql->str;
    yyscan_t    scanner;

    if (ct_yylex_init(&scanner, stmt) != 0) {
        return NULL;
    }

    core_yyset_extra(yyext, scanner);

    yyext->keywordlist = keywordlist;
    yyext->keyword_tokens = keyword_tokens;
    yyext->in_slash_proc_body = false;
    yyext->paren_depth = 0;
    //yyext->query_string_locationlist = NIL;
    yyext->is_createstmt = false;
    yyext->dolqstart = NULL;
    yyext->is_hint_str = false;
    //yyext->parameter_list = NIL;
    yyext->include_ora_comment = false;
    yyext->func_param_begin = 0;
    yyext->func_param_end = 0;
    yyext->return_pos_end = 0;

    /*
     * Make a scan buffer with special termination needed by flex.
     */
    size_t scanbuf_size = slen + 2;
    yyext->scanbuf_malloced = false;
    if (scanbuf_size > stmt->context->ctrl.memory->pool->page_size) {
        yyext->scanbuf = (char *)malloc(scanbuf_size);
        if (yyext->scanbuf == NULL) {
            OG_THROW_ERROR(ERR_ALLOC_MEMORY, (uint64)scanbuf_size, "scanner buffer");
            return NULL;
        }
        yyext->scanbuf_malloced = true;
    } else {
        if (sql_alloc_mem(stmt->context, (uint32)scanbuf_size, (void **)&yyext->scanbuf) != OG_SUCCESS) {
            return NULL;
        }
    }
    yyext->scanbuflen = slen;
    yyext->stmt = stmt;
    yyext->pending_prev_cte = NULL;
    if (sql_create_array(stmt->context, &yyext->ssa, "SUB-SELECT", OG_MAX_SUBSELECT_EXPRS) != OG_SUCCESS) {
        if (yyext->scanbuf_malloced) {
            free(yyext->scanbuf);
            yyext->scanbuf = NULL;
            yyext->scanbuf_malloced = false;
        }
        return NULL;
    }
    memcpy(yyext->scanbuf, str, slen);
    yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
    if (stmt->parser_text_valid) {
        stmt->parser_text.str = yyext->scanbuf;
        stmt->parser_text.len = (uint32)slen;
        stmt->parser_text.loc = sql->loc;
        stmt->parser_text.implicit = sql->implicit;
    }
    yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);

    /* initialize literal buffer to a reasonable but expansible size */
    yyext->literalalloc = 1024;
    yyext->literalbuf = (char *)core_yyalloc(yyext->literalalloc, scanner);
    if (yyext->literalbuf == NULL) {
        if (yyext->scanbuf_malloced) {
            free(yyext->scanbuf);
            yyext->scanbuf = NULL;
            yyext->scanbuf_malloced = false;
        }
        return NULL;
    }
    yyext->literallen = 0;
    yyext->warnOnTruncateIdent = true;

    /* plpgsql keyword params */
    yyext->isPlpgsqlKeyWord = false;
    //yyext->plKeywordValue = NULL;
    yyext->is_delimiter_name = false;
    yyext->is_last_colon = false;
    yyext->is_proc_end = false;
    yyext->multi_line_sql = strchr(str, '\n') != NULL;

    return scanner;
}


/*
 * Called after parsing is done to clean up after scanner_init()
 */
void
scanner_finish(core_yyscan_t yyscanner)
{
    core_yy_extra_type *yyext = &og_yyget_extra(yyscanner)->core_yy_extra;
    if (yyext->scanbuf_malloced) {
        free(yyext->scanbuf);
        yyext->scanbuf = NULL;
        yyext->scanbuf_malloced = false;
    }
}
/*$$exclude in dialect end*/

static void
addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
{
    /* enlarge buffer if needed */
    if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
    {
        do
        {
            yyextra->literalalloc *= 2;
        } while ((yyextra->literallen + yleng) >= yyextra->literalalloc);

        /*when yytext is larger than 512M, its double will exceed 1G, so we use repalloc_huge */
        yyextra->literalbuf = (char *) core_yyrealloc(yyextra->literalbuf,
                                                      yyextra->literalalloc,
                                                      yyscanner);
    }
    /* append new data */
    memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
    yyextra->literallen += yleng;
}


static void
addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
{
    /* enlarge buffer if needed */
    if ((yyextra->literallen + 1) >= yyextra->literalalloc)
    {
        yyextra->literalalloc *= 2;
        yyextra->literalbuf = (char *) core_yyrealloc(yyextra->literalbuf,
                                                      yyextra->literalalloc,
                                                      yyscanner);
    }
    /* append new data */
    yyextra->literalbuf[yyextra->literallen] = ychar;
    yyextra->literallen += 1;
}

/*
 * Create a palloc'd copy of literalbuf, adding a trailing null.
 */
static char *
litbufdup(core_yyscan_t yyscanner)
{
    int            llen = yyextra->literallen;
    char       *newm;

    if (sql_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->context,
        llen + 1, (void **)&newm) != OG_SUCCESS) {
        return NULL;
    }
    errno_t ret = memcpy_s(newm, llen + 1, yyextra->literalbuf, llen);
    knl_securec_check(ret);
    newm[llen] = '\0';
    return newm;
}

static int
process_integer_literal(const char *token, YYSTYPE *lval, core_yyscan_t yyscanner)
{
    int64        val;
    char       *endptr;

    errno = 0;
    val = strtoll(token, &endptr, 10);
    if (*endptr != '\0' || errno == ERANGE) {
        /* integer too large for int64, treat it as a number */
        FLEX_MEM_STRDUP(lval->str, token);
        return FCONST;
    }

    if (val != (int64)((int32)val)) {
        lval->ival64 = val;
        return I64CONST;
    }

    lval->ival = val;
    return ICONST;
}

static status_t
process_binary_float_double(char *token, int len, core_yyscan_t yyscanner, YYSTYPE *lval)
{
    yyextra->is_hint_str = false;
    startlit();
    addlit(token, len-1, yyscanner);
    char *val = litbufdup(yyscanner);
    if (handle_float_overflow(val, yyscanner, &val) != OG_SUCCESS) {
        return OG_ERROR;
    }
    lval->str = val;
    return OG_SUCCESS;
}

/*$$exclude in dialect begin*/
/* prejudge whether float const overflow */
status_t 
handle_float_overflow(char *token, core_yyscan_t yyscanner, char **result)
{

    double         val;
    char       *endptr;
    *result = token;

    errno = 0;
    val = strtod(*result, &endptr);

    if (errno == ERANGE) {
        if (val == 0.0 || val >= HUGE_VAL || val <= -HUGE_VAL) {
            return OG_ERROR;
        }
    }
    return OG_SUCCESS;
}
/*$$exclude in dialect end*/

static uint32
hexval(unsigned char c)
{
    if (c >= '0' && c <= '9')
        return c - '0';
    if (c >= 'a' && c <= 'f')
        return c - 'a' + 0xA;
    if (c >= 'A' && c <= 'F')
        return c - 'A' + 0xA;
    return 0; /* not reached */
}

static status_t 
check_unicode_value(uint32 c, const char *loc, core_yyscan_t yyscanner)
{
    if (c > 0x7F)
    {
        ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3);   /* 3 for U&" */
        return OG_ERROR;
    }
    return OG_SUCCESS;
}

static bool
is_utf16_surrogate_first(uint32 c)
{
    return (c >= 0xD800 && c <= 0xDBFF);
}

static bool
is_utf16_surrogate_second(uint32 c)
{
    return (c >= 0xDC00 && c <= 0xDFFF);
}

static uint32
surrogate_pair_to_codepoint(uint32 first, uint32 second)
{
    return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}

static unsigned char* unicode_to_utf8(uint32 c, unsigned char* utf8string)
{
    if (c <= 0x7F) {
        utf8string[0] = c;
    } else if (c <= 0x7FF) {
        utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
        utf8string[1] = 0x80 | (c & 0x3F);
    } else if (c <= 0xFFFF) {
        utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
        utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
        utf8string[2] = 0x80 | (c & 0x3F);
    } else {
        utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
        utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
        utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
        utf8string[3] = 0x80 | (c & 0x3F);
    }

    return utf8string;
}

static int pg_mblen(const char* mbstr)
{
    return 1;
}

static status_t addunicode(uint32 c, core_yyscan_t yyscanner)
{
    char buf[8];

    if (c == 0 || c > 0x10FFFF)  {
        return OG_ERROR;
    }
    if (c > 0x7F)
    {
        yyextra->saw_non_ascii = true;
    }
    unicode_to_utf8(c, (unsigned char *) buf);
    addlit(buf, pg_mblen(buf), yyscanner);
    return OG_SUCCESS;
}

static status_t litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner, char **res)
{
    char *litbuf, *in, *out;
    uint32 pair_first = 0;

    if (isxdigit(escape)
        || escape == '+'
        || escape == '\''
        || escape == '"'
        || scanner_isspace(escape))
    {
        ADVANCE_YYLLOC(yyextra->literallen + yyleng + 1);
        return OG_ERROR;
    }

    /* Make literalbuf null-terminated to simplify the scanning loop */
    litbuf = yyextra->literalbuf;
    litbuf[yyextra->literallen] = '\0';

    /*
     * This relies on the subtle assumption that a UTF-8 expansion
     * cannot be longer than its escaped representation.
     */
    if (sql_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->context,
        yyextra->literallen + 1, (void **)res) != OG_SUCCESS) {
        return OG_ERROR;
    }
    char *newm = *res;

    in = litbuf;
    out = newm;
    while (*in)
    {
        if (in[0] == escape)
        {
            if (in[1] == escape)
            {
                if (pair_first)
                {
                    ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
                    return OG_ERROR;
                }
                *out++ = escape;
                in += 2;
            }
            else if (isxdigit((unsigned char) in[1]) &&
                     isxdigit((unsigned char) in[2]) &&
                     isxdigit((unsigned char) in[3]) &&
                     isxdigit((unsigned char) in[4]))
            {
                uint32 unicode;

                unicode = (hexval(in[1]) << 12) +
                    (hexval(in[2]) << 8) +
                    (hexval(in[3]) << 4) +
                    hexval(in[4]);
                if (check_unicode_value(unicode, in, yyscanner) != OG_SUCCESS) {
                    return OG_ERROR;
                }
                if (pair_first)
                {
                    if (is_utf16_surrogate_second(unicode))
                    {
                        unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                        pair_first = 0;
                    }
                    else
                    {
                        ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
                        return OG_ERROR;
                    }
                }
                else if (is_utf16_surrogate_second(unicode))
                    return OG_ERROR;

                if (is_utf16_surrogate_first(unicode))
                    pair_first = unicode;
                else
                {
                    unicode_to_utf8(unicode, (unsigned char *) out);
                    out += pg_mblen(out);
                }
                in += 5;
            }
            else if (in[1] == '+' &&
                     isxdigit((unsigned char) in[2]) &&
                     isxdigit((unsigned char) in[3]) &&
                     isxdigit((unsigned char) in[4]) &&
                     isxdigit((unsigned char) in[5]) &&
                     isxdigit((unsigned char) in[6]) &&
                     isxdigit((unsigned char) in[7]))
            {
                uint32 unicode;

                unicode = (hexval(in[2]) << 20) +
                    (hexval(in[3]) << 16) +
                    (hexval(in[4]) << 12) +
                    (hexval(in[5]) << 8) +
                    (hexval(in[6]) << 4) +
                    hexval(in[7]);
                if (check_unicode_value(unicode, in, yyscanner) != OG_SUCCESS) {
                    return OG_ERROR;
                }
                if (pair_first)
                {
                    if (is_utf16_surrogate_second(unicode))
                    {
                        unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                        pair_first = 0;
                    }
                    else
                    {
                        ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
                        return OG_ERROR;
                    }
                }
                else if (is_utf16_surrogate_second(unicode))
                    return OG_ERROR;

                if (is_utf16_surrogate_first(unicode))
                    pair_first = unicode;
                else
                {
                    unicode_to_utf8(unicode, (unsigned char *) out);
                    out += pg_mblen(out);
                }
                in += 8;
            }
            else
            {
                ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
                return OG_ERROR;
            }
        }
        else
        {
            if (pair_first)
            {
                ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
                return OG_ERROR;
            }
            *out++ = *in++;
        }
    }

    /* unfinished surrogate pair? */
    if (pair_first)
    {
        ADVANCE_YYLLOC(in - litbuf + 3);            /* 3 for U&" */
        return OG_ERROR;
    }

    *out = '\0';
    return OG_SUCCESS;
}

static unsigned char
unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
{
    switch (c)
    {
        case 'b':
            return '\b';
        case 'f':
            return '\f';
        case 'n':
            return '\n';
        case 'r':
            return '\r';
        case 't':
            return '\t';
        default:
            /* check for backslash followed by non-7-bit-ASCII */
            if (c == '\0' || IS_HIGHBIT_SET(c))
                yyextra->saw_non_ascii = true;

            return c;
    }
}

/*
 * Interface functions to make flex use palloc() instead of malloc().
 * It'd be better to make these static, but flex insists otherwise.
 */

static void flex_alloc_mem(void *context, yy_size_t size, void **buf)
{
    void *ptr = NULL;
    yy_size_t len = size + FLEX_MEM_HEADER_SIZE;

    *buf = NULL;
    if (SECUREC_UNLIKELY(len < size || len > OG_MAX_UINT32)) {
        OG_THROW_ERROR(ERR_ALLOC_MEMORY, (uint64)size, "bison scanner");
        return;
    }
    if (sql_alloc_mem(context, (uint32)len, (void **)&ptr) != OG_SUCCESS) {
        return;
    }

    flex_mem_header *header = (flex_mem_header*)ptr;
    header->magic_number = FLEX_MEM_MAGIC_NUMBER;
    header->bytes = size;

    *buf = FLEX_MEM_GET_POINTER(ptr);
}

/*$$exclude in dialect begin*/
void *
core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
{
    void *buf = NULL;
    flex_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->context, bytes, (void **)&buf);
    return buf;
}

void *
core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
{
    void *buf = NULL;
    flex_alloc_mem(og_yyget_extra(yyscanner)->core_yy_extra.stmt->context, bytes, (void **)&buf);
    if (buf == NULL) {
        return NULL;
    }

    if (ptr) {
        flex_mem_header *old_header = FLEX_MEM_GET_HEADER(ptr);
        CM_ASSERT(old_header->magic_number == FLEX_MEM_MAGIC_NUMBER);

        errno_t err = memcpy_s(buf, bytes, ptr, old_header->bytes);
        if (err != EOK) {
            scanner_yyerror("realloc failed", yyscanner);
        }
    }
    return buf;
}

void
core_yyfree(void *ptr, core_yyscan_t yyscanner)
{
    return;
}

int ct_yyget_leng(core_yyscan_t yyscanner)
{
    return ((struct yyguts_t *) yyscanner)->yyleng_r;
}
/*$$exclude in dialect end*/

static void update_current_location(char *cur_pos, char *origin_str, lex_location_t *loc, bool8 multi_line_sql)
{
    loc->offset = cur_pos - origin_str;
    /* quick check, origin string don't have '\n', so line num always 1, just need to update column num */
    if (!multi_line_sql) {
        loc->loc.line = 1;
        loc->loc.column = loc->offset + 1;
        return;
    }

    /* reset location, maybe we have better way to use last loction to speed calculate... */
    loc->loc.line = 1;
    loc->loc.column = 1;
    /* calculate current line and column num */
    while (origin_str < cur_pos) {
        if (*origin_str == '\n') {
            loc->loc.line++;
            loc->loc.column = 1;
        } else {
            loc->loc.column++;
        }
        origin_str++;
    }

    /* end of new line: 'insert into\n;' */
    if (*origin_str == '\0') {
        loc->loc.line++;
        loc->loc.column = 1;
    }
    return;
}

static bool32 need_ignore_for_func(sql_stmt_t *stmt, char* ident, int len)
{
    text_t user;
    if (len == (int)(sizeof("connect_by_root") - 1) &&
        cm_strcmpni(ident, "connect_by_root", sizeof("connect_by_root") - 1) == 0) {
        return OG_FALSE;
    }

    user.str = stmt->session->db_user;
    user.len = (uint32)strlen(stmt->session->db_user);

    text_t name = { .str = ident, .len = len };
    return sql_self_func_configed(&user, &name);
}