* Copyright (c) 2021-2025 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "keywordsUtil.h"
#include "generated/keywords.h"
#include "lexer/lexer.h"
#include "unicode/uchar.h"
#include "util/enumbitops.h"
namespace ark::es2panda::lexer {
enum class AsciiFlags : uint32_t {
NONE = 0U,
ID_START = 1U << 0U,
ID_CONTINUE = 1U << 1U,
};
constexpr AsciiFlags operator|(AsciiFlags a, AsciiFlags b)
{
using Utype = std::underlying_type_t<AsciiFlags>;
return static_cast<AsciiFlags>(static_cast<Utype>(a) | static_cast<Utype>(b));
}
inline std::underlying_type_t<AsciiFlags> operator&(AsciiFlags a, AsciiFlags b)
{
using Utype = std::underlying_type_t<AsciiFlags>;
return static_cast<Utype>(static_cast<Utype>(a) & static_cast<Utype>(b));
}
constexpr std::array<AsciiFlags, 128> ASCII_FLAGS = {{
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE
}};
bool KeywordsUtil::IsIdentifierStart(char32_t cp)
{
if (cp < LEX_ASCII_MAX_BITS) {
return (ASCII_FLAGS[cp] & AsciiFlags::ID_START) != 0;
}
auto uchar = static_cast<UChar32>(cp);
return u_hasBinaryProperty(uchar, UCHAR_ID_START);
}
bool KeywordsUtil::IsIdentifierPart(char32_t cp)
{
if (cp < LEX_ASCII_MAX_BITS) {
return (ASCII_FLAGS[cp] & AsciiFlags::ID_CONTINUE) != 0;
}
auto uchar = static_cast<UChar32>(cp);
return (u_hasBinaryProperty(uchar, UCHAR_ID_CONTINUE) || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ);
}
void KeywordsUtil::ScanIdentifierStart(const Keywords *kws, char32_t cp)
{
if (!KeywordsUtil::IsIdentifierStart(cp)) {
lexer_->LogError(diagnostic::UNEXPECTED_TOKEN_ID);
}
cp_ = cp;
const auto map = kws->KeywordMap(cp);
ScanIdContinueMaybeKeyword(kws, map);
}
void KeywordsUtil::ScanIdContinue()
{
util::UString ident(lexer_->Allocator());
size_t startPos = lexer_->GetToken().Start().index;
if (HasEscape()) {
ident.Append(cp_);
startPos = Iterator().Index();
}
auto escapeEnd = startPos;
while (true) {
if (Iterator().Peek() == LEX_CHAR_BACKSLASH) {
ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));
auto cp = ScanUnicodeEscapeSequence();
if (!IsIdentifierPart(cp)) {
lexer_->LogError(diagnostic::INVALID_IDENTIFIER_PART);
}
escapeEnd = Iterator().Index();
ident.Append(cp);
continue;
}
size_t cpSize {};
auto cp = Iterator().PeekCp(&cpSize);
if (!IsIdentifierPart(cp) &&
(cp != LEX_CHAR_PERCENT || (Flags() & NextTokenFlags::CHAR_PERCENT_ALLOWED) == 0)) {
break;
}
Iterator().Forward(cpSize);
}
lexer_->GetToken().type_ = TokenType::LITERAL_IDENT;
lexer_->GetToken().keywordType_ = TokenType::LITERAL_IDENT;
if (HasEscape()) {
ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));
lexer_->GetToken().src_ = ident.View();
} else {
lexer_->GetToken().src_ = lexer_->SourceView(startPos, Iterator().Index());
}
}
void KeywordsUtil::ScanIdContinueMaybeKeyword(const Keywords *kws, Span<const KeywordString> map)
{
ScanIdContinue();
if (!HasEscape() || map.empty()) {
return;
}
const auto &str = lexer_->GetToken().Ident().Utf8();
size_t start = 0;
size_t end = map.size();
size_t middle = end / 2;
while (true) {
const auto &kw = map[middle];
int relation = str.compare(kw.Str());
if (relation == 0) {
kws->HandlePotentialEscapedKeyword(kw);
return;
}
if (relation > 0) {
start = middle + 1;
} else {
end = middle;
}
middle = (start + end) / 2U;
if (start >= end) {
return;
}
}
}
char32_t KeywordsUtil::ScanUnicodeEscapeSequence()
{
ES2PANDA_ASSERT(Iterator().Peek() == LEX_CHAR_BACKSLASH);
lexer_->GetToken().flags_ |= lexer::TokenFlags::HAS_ESCAPE;
Iterator().Forward(1);
if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
return util::StringView::Iterator::INVALID_CP;
}
return lexer_->ScanUnicodeEscapeSequence();
}
}