/* -------------------------------------------------------------------------
 *  This file is part of the oGRAC project.
 * Copyright (c) 2024 Huawei Technologies Co.,Ltd.
 *
 * oGRAC is licensed under Mulan PSL v2.
 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *
 *          http://license.coscl.org.cn/MulanPSL2
 *
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PSL v2 for more details.
 * -------------------------------------------------------------------------
 *
 * kwlookup.c
 *
 *
 * IDENTIFICATION
 * src/ogsql/parser/kwlookup.c
 *
 * -------------------------------------------------------------------------
 */
#include <ctype.h>
#include <stddef.h>
#include <string.h>

#include "cm_types.h"
#include "kwlookup.h"

/*
 * ScanKeywordLookup - see if a given word is a keyword
 *
 * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
 *
 * The match is done case-insensitively.  Note that we deliberately use a
 * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
 * even if we are in a locale where tolower() would produce more or different
 * translations.  This is to conform to the SQL99 spec, which says that
 * keywords are to be matched in this way even though non-keyword identifiers
 * receive a different case-normalization mapping.
 */
int ScanKeywordLookup(const char *str, const ScanKeywordList *keywords)
{
    size_t len;
    int h;
    const char *kw;

    /*
     * Reject immediately if too long to be any keyword.  This saves useless
     * hashing and downcasing work on long strings.
     */
    len = strlen(str);
    if (len > (size_t)keywords->max_kw_len) {
        return -1;
    }

    /*
     * Compute the hash function.  We assume it was generated to produce
     * case-insensitive results.  Since it's a perfect hash, we need only
     * match to the specific keyword it identifies.
     */
    h = keywords->hash(str, len);
    /* An out-of-range result implies no match */
    if (h < 0 || h >= keywords->num_keywords) {
        return -1;
    }

    /*
     * Compare character-by-character to see if we have a match, applying an
     * ASCII-only downcasing to the input characters.  We must not use
     * tolower() since it may produce the wrong translation in some locales
     * (eg, Turkish).
     */
    kw = GetScanKeyword(h, keywords);
    while (*str != '\0') {
        char ch = *str++;
        if (ch >= 'A' && ch <= 'Z') {
            ch += 'a' - 'A';
        }
        if (ch != *kw++) {
            return -1;
        }
    }
    if (*kw != '\0') {
        return -1;
    }

    /* Success! */
    return h;
}