* Copyright (c) 2021-2022 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "keywordsUtil.h"
#include <gen/keywords.h>
#include <unicode/uchar.h>
namespace panda::es2panda::lexer {
enum class AsciiFlags : uint8_t {
NONE = 0,
ID_START = 1 << 0,
ID_CONTINUE = 1 << 1,
};
constexpr AsciiFlags operator|(AsciiFlags a, AsciiFlags b)
{
using utype = std::underlying_type_t<AsciiFlags>;
return static_cast<AsciiFlags>(static_cast<utype>(a) | static_cast<utype>(b));
}
inline std::underlying_type_t<AsciiFlags> operator&(AsciiFlags a, AsciiFlags b)
{
using utype = std::underlying_type_t<AsciiFlags>;
return static_cast<utype>(static_cast<utype>(a) & static_cast<utype>(b));
}
constexpr std::array<AsciiFlags, 128> ASCII_FLAGS = {{
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::ID_START | AsciiFlags::ID_CONTINUE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE,
AsciiFlags::NONE
}};
bool KeywordsUtil::IsIdentifierStart(char32_t cp)
{
if (cp < LEX_ASCII_MAX_BITS) {
return (ASCII_FLAGS[cp] & AsciiFlags::ID_START) != 0;
}
auto uchar = static_cast<UChar32>(cp);
return u_hasBinaryProperty(uchar, UCHAR_ID_START);
}
bool KeywordsUtil::IsIdentifierPart(char32_t cp)
{
if (cp < LEX_ASCII_MAX_BITS) {
return (ASCII_FLAGS[cp] & AsciiFlags::ID_CONTINUE) != 0;
}
* u_isIDPart or Other_ID_Continue characters or ZWJ/ZWNJ.
* Unicode {xxxxx} may consist of 4 bytes information and cannot be forcibly converted to 2 bytes
*/
auto uchar = static_cast<UChar32>(cp);
return (u_hasBinaryProperty(uchar, UCHAR_ID_CONTINUE) || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ);
}
void KeywordsUtil::ScanIdentifierStart(char32_t cp)
{
if (!KeywordsUtil::IsIdentifierStart(cp)) {
lexer_->ThrowError("Expected an identifier");
}
cp_ = cp;
const auto map = KeywordsMap::Map(cp);
ScanIdContinueMaybeKeyword(map);
}
void KeywordsUtil::ScanIdContinue()
{
util::UString ident(lexer_->Allocator());
size_t startPos = lexer_->GetToken().Start().index;
if (HasEscape()) {
ident.Append(cp_);
startPos = Iterator().Index();
}
auto escapeEnd = startPos;
do {
if (Iterator().Peek() == LEX_CHAR_BACKSLASH) {
ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));
auto cp = ScanUnicodeEscapeSequence();
if (!IsIdentifierPart(cp)) {
lexer_->ThrowError("Invalid identifier part");
}
escapeEnd = Iterator().Index();
ident.Append(cp);
continue;
}
size_t cpSize {};
auto cp = Iterator().PeekCp(&cpSize);
if (!IsIdentifierPart(cp)) {
break;
}
Iterator().Forward(cpSize);
} while (true);
lexer_->GetToken().type_ = TokenType::LITERAL_IDENT;
lexer_->GetToken().keywordType_ = TokenType::EOS;
if (HasEscape()) {
ident.Append(lexer_->SourceView(escapeEnd, Iterator().Index()));
lexer_->GetToken().src_ = ident.View();
} else {
lexer_->GetToken().src_ = lexer_->SourceView(startPos, Iterator().Index());
}
}
void KeywordsUtil::ScanIdContinueMaybeKeyword(Span<const KeywordString> map)
{
ScanIdContinue();
if (!HasEscape() || map.empty()) {
return;
}
const auto &str = lexer_->GetToken().Ident().Utf8();
int start = 0;
int end = static_cast<int>(map.size());
int middle = end / 2;
while (true) {
const auto &kws = map[middle];
int relation = str.compare(kws.str);
if (relation == 0) {
Keywords::SetKeyword(this, kws);
}
if (relation > 0) {
start = middle + 1;
} else {
end = middle;
}
middle = (start + end) / 2;
if (start >= end) {
return;
}
}
}
char32_t KeywordsUtil::ScanUnicodeEscapeSequence()
{
ASSERT(Iterator().Peek() == LEX_CHAR_BACKSLASH);
lexer_->GetToken().flags_ |= lexer::TokenFlags::HAS_ESCAPE;
Iterator().Forward(1);
if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
return util::StringView::Iterator::INVALID_CP;
}
return lexer_->ScanUnicodeEscapeSequence();
}
}