%{
#include "parser/parse_hint.h"
#include "parser/scansup.h"
#include "utils/memutils.h"
#include "parser/parser.h"				/* only needed for GUC variables */
#include "mb/pg_wchar.h"


#include "parser/gramparse.h"

#undef yyextra
#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)

/* Likewise for a couple of other things we need. */
#undef yylloc
#define yylloc  (((struct yyguts_t *) yyscanner)->yylloc_r)
#undef yyleng
#define yyleng  (((struct yyguts_t *) yyscanner)->yyleng_r)

#define startlit()  ( yyextra->literallen = 0 )

#define YY_EXTRA_TYPE hint_yy_extra_type *

static const hintKeyword parsers[] =
{
	{HINT_NESTLOOP, NestLoop_P},
	{HINT_MERGEJOIN, MergeJoin_P},
	{HINT_HASHJOIN, HashJoin_P},
	{HINT_NO, No_P},
	{HINT_LEADING, Leading_P},
	{HINT_ROWS, Rows_P},
	{HINT_BROADCAST, Broadcast_P},
	{HINT_REDISTRIBUTE, Redistribute_P},
	{HINT_BLOCKNAME, BlockName_P},
	{HINT_TABLESCAN, TableScan_P},
	{HINT_INDEXSCAN, IndexScan_P},
	{HINT_INDEXONLYSCAN, IndexOnlyScan_P},
	{HINT_SKEW, Skew_P},
	{HINT_MULTI_NODE, HINT_MULTI_NODE_P},
	{HINT_NULL, NULL_P},
	{HINT_TRUE, TRUE_P},
	{HINT_FALSE, FALSE_P},
	{HINT_PRED_PUSH, Predpush_P},
	{HINT_PRED_PUSH_SAME_LEVEL, PredpushSameLevel_P},
	{HINT_REWRITE, Rewrite_P},
	{HINT_GATHER, Gather_P},
	{HINT_SET, Set_P},
	{HINT_CPLAN, USE_CPLAN_P},
	{HINT_GPLAN, USE_GPLAN_P},
	{HINT_NO_EXPAND, No_expand_P},
    {HINT_CHOOSE_ADAPTIVE_GPLAN, CHOOSE_ADAPTIVE_GPLAN_P},
	{HINT_NO_GPC, NO_GPC_P},
	{HINT_SQL_IGNORE, SQL_IGNORE_P},
};

static const hintKeyword* HintKeywordLookup(const char *str);
static int process_integer_literal(const char *token, YYSTYPE *lval);
static char * litbufdup(yyscan_t yyscanner);
static void addlit(const char *ytext, int yleng, yyscan_t yyscanner);
static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
extern void hint_scanner_yyerror(const char *msg, yyscan_t yyscanner);
static void hint_scanner_yyerror_emit(const char *msg, yyscan_t yyscanner);
static char *litbuf_udeescape(unsigned char escape, yyscan_t yyscanner);
static unsigned int hexval(unsigned char c, yyscan_t yyscanner);
static unsigned char unescape_single_char(unsigned char c, yyscan_t yyscanner);
static bool is_utf16_surrogate_first(pg_wchar c);
static bool is_utf16_surrogate_second(pg_wchar c);
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
static void addunicode(pg_wchar c, yyscan_t yyscanner);
static void check_string_escape_warning(unsigned char ychar, yyscan_t yyscanner);
static void check_escape_warning(yyscan_t yyscanner);
static void check_unicode_value(pg_wchar c, char *loc, yyscan_t yyscanner);
extern void output_hint_warning(List* warning, int lev);

%}

%option 8bit
%option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option reentrant
%option bison-bridge
%option noinput
%option nounput
%option never-interactive
%option warn
%option yylineno

/*s
 * OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
 *
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xb> bit string literal
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
 *  <xdolq> $foo$ quoted strings
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
 *  <xeu> Unicode surrogate pair in extended quoted string
 */

%x xb
%x xd
%x xh
%x xe
%x xq
%x xdolq
%x xui
%x xus
%x xeu

digit			[0-9]
ident_start		[A-Za-z\200-\377_]
ident_cont		[A-Za-z\200-\377_0-9\$\#]

identifier      {ident_start}{ident_cont}*
integer			{digit}+
decimal		(({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail 	{digit}+\.\.
real			({integer}|{decimal})[Ee][-+]?{digit}+
realfail1		({integer}|{decimal})[Ee]
realfail2		({integer}|{decimal})[Ee][-+]

space			[ \t\n\r\f]
self			[,()\[\].';\:\+\-\*\/\\\%\^\<\>\=\~\!\@\#\^\&\|\`\?]

horiz_space		[ \t\f]
newline			[\n\r]
non_newline		[^\n\r]

comment			("--"{non_newline}*)

whitespace		({space}+|{comment})

/*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

special_whitespace		({space}+|{comment}{newline})
horiz_whitespace		({horiz_space}|{comment})
whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)

/*
 * To ensure that {quotecontinue} can be scanned without having to back up
 * if the full pattern isn't matched, we include trailing whitespace in
 * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 * except for {quote} followed by whitespace and just one "-" (not two,
 * which would start a {comment}).  To cover that we have {quotefail}.
 * The actions for {quotestop} and {quotefail} must throw back characters
 * beyond the quote proper.
 */
quote			'
quotestop		{quote}{whitespace}*
quotecontinue	{quote}{whitespace_with_newline}{quote}
quotefail		{quote}{whitespace}*"-"

/* Bit string
 * It is tempting to scan the string for only those characters
 * which are allowed. However, this leads to silently swallowed
 * characters if illegal characters are included in the string.
 * For example, if xbinside is [01] then B'ABCD' is interpreted
 * as a zero-length string, and the ABCD' is lost!
 * Better to pass the string forward and let the input routines
 * validate the contents.
 */
xbstart			[bB]{quote}
xbinside		[^']*

/* Hexadecimal number */
xhstart			[xX]{quote}
xhinside		[^']*

/* National character */
xnstart			[nN]{quote}

/* Quoted string that allows backslash escapes */
xestart			[eE]{quote}
xeinside		[^\\']+
xeescape		[\\][^0-7]
xeoctesc		[\\][0-7]{1,3}
xehexesc		[\\]x[0-9A-Fa-f]{1,2}
xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})

/* Extended quote
 * xqdouble implements embedded quote, ''''
 */
xqstart			{quote}
xqdouble		{quote}{quote}
xqinside		[^']+

/* $foo$ style quotes ("dollar quoting")
 * The quoted string starts with $foo$ where "foo" is an optional string
 * in the form of an identifier, except that it may not contain "$",
 * and extends to the first occurrence of an identical string.
 * There is *no* processing of the quoted text.
 *
 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 * fails to match its trailing "$".
 */
dolq_start		[A-Za-z\200-\377_]
dolq_cont		[A-Za-z\200-\377_0-9]
dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
dolqfailed		\${dolq_start}{dolq_cont}*
dolqinside		[^$]+

/* Double quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
xddouble		{dquote}{dquote}
xdinside		[^"]+

/* Unicode escapes */
uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
/* error rule to avoid backup */
uescapefail		("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])

/* Quoted identifier with Unicode escapes */
xuistart		[uU]&{dquote}
xuistop1		{dquote}{whitespace}*{uescapefail}?
xuistop2		{dquote}{whitespace}*{uescape}

/* Quoted string with Unicode escapes */
xusstart		[uU]&{quote}
xusstop1		{quote}{whitespace}*{uescapefail}?
xusstop2		{quote}{whitespace}*{uescape}

/* error rule to avoid backup */
xufailed		[uU]&

%%
{whitespace}	{
					/* ignore */
				}
{xbstart}		{
					/* Binary bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "b" on the string
					 * to mark it for the input routine as a binary string.
					 */
					BEGIN(xb);
					startlit();
					addlitchar('b', yyscanner);
				}
<xb>{quotestop}	|
<xb>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = true;
					return BCONST;
				}
<xh>{xhinside}	|
<xb>{xbinside}	{
					addlit(yytext, yyleng, yyscanner);
				}
<xh>{quotecontinue}	|
<xb>{quotecontinue}	{
					/* ignore */
				}
<xb><<EOF>>		{ hint_scanner_yyerror_emit("unterminated bit string literal", yyscanner); return 0;}

{xhstart}		{
					/* Hexadecimal bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "x" on the string
					 * to mark it for the input routine as a hex string.
					 */
					BEGIN(xh);
					startlit();
					addlitchar('x', yyscanner);
				}
<xh>{quotestop}	|
<xh>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = true;
					return XCONST;
				}
<xh><<EOF>>		{ hint_scanner_yyerror_emit("unterminated hexadecimal string literal", yyscanner); return 0;}

{xnstart}		{
					/* National character.
					 * We will pass this along as a normal character string,
					 * but preceded with an internally-generated "NCHAR".
					 */
					int kwnum;

					yyless(1);				/* eat only 'n' this time */

					kwnum = ScanKeywordLookup("nchar",
											  yyextra->keywordlist);
					if (kwnum >= 0)
					{
						yyextra->is_hint_str = true;
						return yyextra->keyword_tokens[kwnum];
					}
					else
					{
						/* If NCHAR isn't a keyword, just return "n" */
						yylval->str = pstrdup("n");
						yyextra->ident_quoted = false;
						yyextra->is_hint_str = true;
						return IDENT;
					}
				}

{xqstart}		{
					yyextra->warn_on_first_escape = true;
					yyextra->saw_non_ascii = false;
					if (u_sess->attr.attr_sql.standard_conforming_strings)
						BEGIN(xq);
					else
						BEGIN(xe);
					startlit();
				}
{xestart}		{
					yyextra->warn_on_first_escape = false;
					yyextra->saw_non_ascii = false;
					BEGIN(xe);
					startlit();
				}
{xusstart}		{
					if (!u_sess->attr.attr_sql.standard_conforming_strings)
						hint_scanner_yyerror("unsafe use of string constant with Unicode escapes. String constants with Unicode escapes cannot be used when standard_conforming_strings is off.", yyscanner);

					BEGIN(xus);
					startlit();
				}
<xq,xe>{quotestop}	|
<xq,xe>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					/*
					 * check that the data remains valid if it might have been
					 * made invalid by unescaping any chars.
					 */
					if (yyextra->saw_non_ascii)
						pg_verifymbstr(yyextra->literalbuf,
									   yyextra->literallen,
									   false);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = true;
					return SCONST;
				}
<xus>{xusstop1} {
					/* throw back all but the quote */
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbuf_udeescape('\\', yyscanner);
					yyextra->is_hint_str = true;
					return SCONST;
		}
<xus>{xusstop2} {
					BEGIN(INITIAL);
					yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
					yyextra->is_hint_str = true;
					return SCONST;
		}
<xq,xe,xus>{xqdouble} {
					addlitchar('\'', yyscanner);
				}
<xq,xus>{xqinside}  {
					addlit(yytext, yyleng, yyscanner);
				}
<xe>{xeinside}  {
					addlit(yytext, yyleng, yyscanner);
				}
<xe>{xeunicode} {
					pg_wchar c = strtoul(yytext+2, NULL, 16);

					check_escape_warning(yyscanner);

					if (is_utf16_surrogate_first(c))
					{
						yyextra->utf16_first_part = c;
						BEGIN(xeu);
					}
					else if (is_utf16_surrogate_second(c))
						hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
					else
						addunicode(c, yyscanner);
				}
<xeu>{xeunicode} {
					pg_wchar c = strtoul(yytext+2, NULL, 16);

					if (!is_utf16_surrogate_second(c))
						hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);

					c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);

					addunicode(c, yyscanner);

					BEGIN(xe);
				}
<xeu>.			{ hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner); }
<xeu>\n			{ hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner); }
<xeu><<EOF>>	{ hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner); return 0; }
<xe,xeu>{xeunicodefail}	{
						hint_scanner_yyerror("invalid Unicode escape. Unicode escapes must be \\uXXXX or \\UXXXXXXXX.", yyscanner);
				}
<xe>{xeescape}  {
					if (yytext[1] == '\'')
					{
						if (u_sess->attr.attr_sql.backslash_quote == BACKSLASH_QUOTE_OFF ||
							(u_sess->attr.attr_sql.backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
							 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
							hint_scanner_yyerror("unsafe use of \\' in a string literal. Use '' to write quotes in strings. \\' is insecure in client-only encodings.", yyscanner);
					}
					check_string_escape_warning(yytext[1], yyscanner);
					addlitchar(unescape_single_char(yytext[1], yyscanner),
							   yyscanner);
				}
<xe>{xeoctesc}  {
					unsigned char c = strtoul(yytext+1, NULL, 8);

					check_escape_warning(yyscanner);
					addlitchar(c, yyscanner);
					if (c == '\0' || IS_HIGHBIT_SET(c))
						yyextra->saw_non_ascii = true;
				}
<xe>{xehexesc}  {
					unsigned char c = strtoul(yytext+2, NULL, 16);

					check_escape_warning(yyscanner);
					addlitchar(c, yyscanner);
					if (c == '\0' || IS_HIGHBIT_SET(c))
						yyextra->saw_non_ascii = true;
				}
<xq,xe,xus>{quotecontinue} {
					/* ignore */
				}
<xe>.			{
					/* This is only needed for \ just before EOF */
					addlitchar(yytext[0], yyscanner);
				}
<xq,xe,xus><<EOF>>		{ hint_scanner_yyerror_emit("unterminated quoted string", yyscanner); return 0; }

{dolqdelim}		{
					yyextra->dolqstart = pstrdup(yytext);
					BEGIN(xdolq);
					startlit();
				}
{dolqfailed}	{
					/* throw back all but the initial "$" */
					yyless(1);
					/* and treat it as {other} */
					yyextra->is_hint_str = true;
					return yytext[0];
				}
<xdolq>{dolqdelim} {
					if (strcmp(yytext, yyextra->dolqstart) == 0)
					{
						pfree(yyextra->dolqstart);
						yyextra->dolqstart = NULL;
						BEGIN(INITIAL);
						yylval->str = litbufdup(yyscanner);
						yyextra->is_hint_str = true;
						return SCONST;
					}
					else
					{
						/*
						 * When we fail to match $...$ to dolqstart, transfer
						 * the $... part to the output, but put back the final
						 * $ for rescanning.  Consider $delim$...$junk$delim$
						 */
						addlit(yytext, yyleng-1, yyscanner);
						yyless(yyleng-1);
					}
				}
<xdolq>{dolqinside} {
					addlit(yytext, yyleng, yyscanner);
				}
<xdolq>{dolqfailed} {
					addlit(yytext, yyleng, yyscanner);
				}
<xdolq>.		{
					/* This is only needed for $ inside the quoted text */
					addlitchar(yytext[0], yyscanner);
				}
<xdolq><<EOF>>	{ hint_scanner_yyerror_emit("unterminated dollar-quoted string", yyscanner); return 0; }

{xdstart}		{
					BEGIN(xd);
					startlit();
				}
{xuistart}		{
					BEGIN(xui);
					startlit();
				}
<xd>{xdstop}	{
					char		   *ident;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						hint_scanner_yyerror("zero-length delimited identifier", yyscanner);
					ident = litbufdup(yyscanner);
					if (yyextra->literallen >= NAMEDATALEN)
						truncate_identifier(ident, yyextra->literallen, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = true;
					yyextra->is_hint_str = true;
					return IDENT;
				}
<xui>{xuistop1}	{
					char		   *ident;
					int             identlen;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						hint_scanner_yyerror("zero-length delimited identifier", yyscanner);
					ident = litbuf_udeescape('\\', yyscanner);
					identlen = strlen(ident);
					if (identlen >= NAMEDATALEN)
						truncate_identifier(ident, identlen, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					/* throw back all but the quote */
					yyless(1);
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = true;
					return IDENT;
				}
<xui>{xuistop2}	{
					char		   *ident;
					int             identlen;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						hint_scanner_yyerror("zero-length delimited identifier", yyscanner);
					ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
					identlen = strlen(ident);
					if (identlen >= NAMEDATALEN)
						truncate_identifier(ident, identlen, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = true;
					return IDENT;
				}
<xd,xui>{xddouble}	{
					addlitchar('"', yyscanner);
				}
<xd,xui>{xdinside}	{
					addlit(yytext, yyleng, yyscanner);
				}
<xd,xui><<EOF>>		{ hint_scanner_yyerror_emit("unterminated quoted identifier", yyscanner); return 0; }

{xufailed}	{
					char		   *ident;

					/* throw back all but the initial u/U */
					yyless(1);
					/* and treat it as {identifier} */
					ident = downcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = true;
					return IDENT;
				}
{identifier}  {
			const hintKeyword *keyword = NULL;
			char             *ident;


			keyword = HintKeywordLookup(yytext);

			if (keyword != NULL)
			{
				return keyword->value;
			}
			else
			{
				ident = downcase_truncate_identifier(yytext, yyleng, true);
				yylval->str = ident;
				return IDENT;
			}
	      }
{integer}		{
			return process_integer_literal(yytext, yylval);
	      	}
{decimal}		{
					yylval->str = pstrdup(yytext);
					return FCONST;
				}
{decimalfail}	{
					/* throw back the .., and treat as integer */
					yyless(yyleng-2);
					yyextra->is_hint_str = true;
					return process_integer_literal(yytext, yylval);
				}
{real}			{
					yylval->str = pstrdup(yytext);
					return FCONST;
				}
{realfail1}		{
					/*
					 * throw back the [Ee], and treat as {decimal}.  Note
					 * that it is possible the input is actually {integer},
					 * but since this case will almost certainly lead to a
					 * syntax error anyway, we don't bother to distinguish.
					 */
					yyless(yyleng-1);
					yylval->str = pstrdup(yytext);
					return FCONST;
				}
{realfail2}		{
					/* throw back the [Ee][+-], and proceed as above */
					yyless(yyleng-2);
					yylval->str = pstrdup(yytext);
					return FCONST;
				}
{self}		{
			return yytext[0];
		}

%%

void *
yyalloc(yy_size_t bytes, yyscan_t yyscanner)
{
	return palloc(bytes);
}

void *
yyrealloc(void *ptr, yy_size_t bytes, yyscan_t yyscanner)
{
	if (ptr)
		return repalloc(ptr, bytes);
	else
		return palloc(bytes);
}

void
yyfree(void *ptr, yyscan_t yyscanner)
{
	if (ptr)
		pfree(ptr);
}


yyscan_t
hint_scanner_init(const char *str, hint_yy_extra_type *yyext)
{
	size_t		slen = strlen(str);
	yyscan_t	scanner;
	errno_t errorno = EOK;


	if (yylex_init(&scanner) != 0)
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
				 errmsg("yylex_init() failed: %m")));

	yyset_extra(yyext, scanner);

	/*
	 * Make a scan buffer with special termination needed by flex.
	 */
	yyext->scanbuf = (char *) palloc(slen + 2);
	yyext->scanbuflen = slen;
	errorno = memcpy_s(yyext->scanbuf, slen+2, str, slen);
	securec_check_c(errorno, "\0", "\0");

	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
	yy_scan_buffer(yyext->scanbuf,slen + 2,scanner);

	/* initialize literal buffer to a reasonable but expansible size */
	yyext->literalalloc = 1024;
	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
	yyext->literallen = 0;

	return scanner;

}

static const hintKeyword*
HintKeywordLookup(const char *str)
{
	const hintKeyword* start = parsers;

	for (int i = 0; i < (int) lengthof(parsers); i++, start++)	
	{
		if (strcasecmp(start->name, str) == 0)
		{
			return start;
		}
	}

	return NULL;
}

void
hint_scanner_destroy(yyscan_t yyscanner)
 {
	/* copied from scanner_finish */
	pfree(yyextra->scanbuf);
	pfree(yyextra->literalbuf);
	yylex_destroy(yyscanner);
}

void
hint_scanner_yyerror(const char *msg, yyscan_t yyscanner)
{
	StringInfoData buf;

	initStringInfo(&buf);
	appendStringInfo(&buf, "LINE %d: %s at '%s'", yyget_lineno(yyscanner), msg, yyget_text(yyscanner));
	u_sess->parser_cxt.hint_warning = lappend(u_sess->parser_cxt.hint_warning, makeString(buf.data));

	return;
}

static void
hint_scanner_yyerror_emit(const char* msg, yyscan_t yyscanner)
{
	output_hint_warning(u_sess->parser_cxt.hint_warning, WARNING);
	ereport(ERROR,
		(errcode(ERRCODE_SYNTAX_ERROR),
			errmsg("LINE %d: %s at '%s'", yyget_lineno(yyscanner), msg, yyget_text(yyscanner))));
}

static int
process_integer_literal(const char *token, YYSTYPE *lval)
{
	long		val;
	char	   *endptr;

	errno = 0;
	val = strtol(token, &endptr, 10);
	if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64
		/* if long > 32 bits, check for overflow of int4 */
		|| val != (long) ((int32) val)
#endif
		)
	{
		/* integer too large, treat it as a float */
		lval->str = pstrdup(token);
		return FCONST;
	}
	lval->ival = val;
	return ICONST;
}


/*
 * Create a palloc'd copy of literalbuf, adding a trailing null.
 */
static char *
litbufdup(yyscan_t yyscanner)
{
	int			llen = yyextra->literallen;
	char	   *newm;

	newm = (char *)palloc(llen + 1);
	memcpy(newm, yyextra->literalbuf, llen);
	newm[llen] = '\0';
	return newm;
}


static void
addlit(const char *ytext, int yleng, yyscan_t yyscanner)
{
	/* enlarge buffer if needed */
	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
	{
		do
		{
			yyextra->literalalloc *= 2;
		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);

		/*when yytext is larger than 512M, its double will exceed 1G, so we use repalloc_huge */
		yyextra->literalbuf = (char *) repalloc_huge(yyextra->literalbuf,
												yyextra->literalalloc);
	}
	/* append new data */
	memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
	yyextra->literallen += yleng;
}


static void
addlitchar(unsigned char ychar, yyscan_t yyscanner)
{
	/* enlarge buffer if needed */
	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
	{
		yyextra->literalalloc *= 2;
		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
												yyextra->literalalloc);
	}
	/* append new data */
	yyextra->literalbuf[yyextra->literallen] = ychar;
	yyextra->literallen += 1;
}

static unsigned int
hexval(unsigned char c, yyscan_t yyscanner)
{
	if (c >= '0' && c <= '9')
		return c - '0';
	if (c >= 'a' && c <= 'f')
		return c - 'a' + 0xA;
	if (c >= 'A' && c <= 'F')
		return c - 'A' + 0xA;

	hint_scanner_yyerror("invalid hexadecimal digit", yyscanner);

	return 0; /* not reached */
}

static void
check_unicode_value(pg_wchar c, char *loc, yyscan_t yyscanner)
{
	if (GetDatabaseEncoding() == PG_UTF8)
		return;

	if (c > 0x7F)
	{
		hint_scanner_yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8", yyscanner);
	}
}

static bool
is_utf16_surrogate_first(pg_wchar c)
{
	return (c >= 0xD800 && c <= 0xDBFF);
}

static bool
is_utf16_surrogate_second(pg_wchar c)
{
	return (c >= 0xDC00 && c <= 0xDFFF);
}

static pg_wchar
surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
{
	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}

static void
addunicode(pg_wchar c, yyscan_t yyscanner)
{
	char buf[8];

	if (c == 0 || c > 0x10FFFF)
		hint_scanner_yyerror("invalid Unicode escape value", yyscanner);
	if (c > 0x7F)
	{
		if (GetDatabaseEncoding() != PG_UTF8)
			hint_scanner_yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8", yyscanner);
		yyextra->saw_non_ascii = true;
	}
	unicode_to_utf8(c, (unsigned char *) buf);
	addlit(buf, pg_mblen(buf), yyscanner);
}

static char *
litbuf_udeescape(unsigned char escape, yyscan_t yyscanner)
{
	char *newm;
	char *litbuf, *in, *out;
	pg_wchar pair_first = 0;

	if (isxdigit(escape)
		|| escape == '+'
		|| escape == '\''
		|| escape == '"'
		|| scanner_isspace(escape))
	{
		hint_scanner_yyerror("invalid Unicode escape character", yyscanner);
	}

	/* Make literalbuf null-terminated to simplify the scanning loop */
	litbuf = yyextra->literalbuf;
	litbuf[yyextra->literallen] = '\0';

	/*
	 * This relies on the subtle assumption that a UTF-8 expansion
	 * cannot be longer than its escaped representation.
	 */
	newm = (char *)palloc(yyextra->literallen + 1);

	in = litbuf;
	out = newm;
	while (*in)
	{
		if (in[0] == escape)
		{
			if (in[1] == escape)
			{
				if (pair_first)
				{
					hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
				}
				*out++ = escape;
				in += 2;
			}
			else if (isxdigit((unsigned char) in[1]) &&
					 isxdigit((unsigned char) in[2]) &&
					 isxdigit((unsigned char) in[3]) &&
					 isxdigit((unsigned char) in[4]))
			{
				pg_wchar unicode;

				unicode = (hexval(in[1], yyscanner) << 12) +
					(hexval(in[2], yyscanner) << 8) +
					(hexval(in[3], yyscanner) << 4) +
					hexval(in[4], yyscanner);
				check_unicode_value(unicode, in, yyscanner);
				if (pair_first)
				{
					if (is_utf16_surrogate_second(unicode))
					{
						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
						pair_first = 0;
					}
					else
					{
						hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
					}
				}
				else if (is_utf16_surrogate_second(unicode))
					hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);

				if (is_utf16_surrogate_first(unicode))
					pair_first = unicode;
				else
				{
					unicode_to_utf8(unicode, (unsigned char *) out);
					out += pg_mblen(out);
				}
				in += 5;
			}
			else if (in[1] == '+' &&
					 isxdigit((unsigned char) in[2]) &&
					 isxdigit((unsigned char) in[3]) &&
					 isxdigit((unsigned char) in[4]) &&
					 isxdigit((unsigned char) in[5]) &&
					 isxdigit((unsigned char) in[6]) &&
					 isxdigit((unsigned char) in[7]))
			{
				pg_wchar unicode;

				unicode = (hexval(in[2], yyscanner) << 20) +
					(hexval(in[3], yyscanner) << 16) +
					(hexval(in[4], yyscanner) << 12) +
					(hexval(in[5], yyscanner) << 8) +
					(hexval(in[6], yyscanner) << 4) +
					hexval(in[7], yyscanner);
				check_unicode_value(unicode, in, yyscanner);
				if (pair_first)
				{
					if (is_utf16_surrogate_second(unicode))
					{
						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
						pair_first = 0;
					}
					else
					{
						hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
					}
				}
				else if (is_utf16_surrogate_second(unicode))
					hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);

				if (is_utf16_surrogate_first(unicode))
					pair_first = unicode;
				else
				{
					unicode_to_utf8(unicode, (unsigned char *) out);
					out += pg_mblen(out);
				}
				in += 8;
			}
			else
			{
				hint_scanner_yyerror("invalid Unicode escape value", yyscanner);
			}
		}
		else
		{
			if (pair_first)
			{
				hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
			}
			*out++ = *in++;
		}
	}

	/* unfinished surrogate pair? */
	if (pair_first)
	{
		hint_scanner_yyerror("invalid Unicode surrogate pair", yyscanner);
	}

	*out = '\0';
	/*
	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
	 * codes; but it's probably not worth the trouble, since this isn't
	 * likely to be a performance-critical path.
	 */
	pg_verifymbstr(newm, out - newm, false);
	return newm;
}

static unsigned char
unescape_single_char(unsigned char c, yyscan_t yyscanner)
{
	switch (c)
	{
		case 'b':
			return '\b';
		case 'f':
			return '\f';
		case 'n':
			return '\n';
		case 'r':
			return '\r';
		case 't':
			return '\t';
		default:
			/* check for backslash followed by non-7-bit-ASCII */
			if (c == '\0' || IS_HIGHBIT_SET(c))
				yyextra->saw_non_ascii = true;

			return c;
	}
}

static void
check_string_escape_warning(unsigned char ychar, yyscan_t yyscanner)
{
	if (ychar == '\'')
	{
		if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
			hint_scanner_yyerror("nonstandard use of \\' in a string literal. Use '' to write quotes in strings, or use the escape string syntax (E'...').", yyscanner);

		yyextra->warn_on_first_escape = false;	/* warn only once per string */
	}
	else if (ychar == '\\')
	{
		if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
			hint_scanner_yyerror("nonstandard use of \\\\ in a string literal. Use the escape string syntax for backslashes, e.g., E'\\\\'.", yyscanner);
		yyextra->warn_on_first_escape = false;	/* warn only once per string */
	}
	else
		check_escape_warning(yyscanner);
}

static void
check_escape_warning(yyscan_t yyscanner)
{
	if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
		hint_scanner_yyerror("nonstandard use of escape in a string literal. Use the escape string syntax for escapes, e.g., E'\\r\\n'.", yyscanner);
	yyextra->warn_on_first_escape = false;	/* warn only once per string */
}