* Copyright (c) 2021-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
#define ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
#include "ecmascript/base/builtins_base.h"
#include "ecmascript/builtins/builtins_string.h"
#include "ecmascript/ecma_runtime_call_info.h"
#include "ecmascript/js_tagged_value.h"
#include "ecmascript/regexp/regexp_parser.h"
#include "ecmascript/tagged_array-inl.h"
namespace panda::ecmascript::builtins {
class BuiltinsRegExp : public base::BuiltinsBase {
public:
enum RegExpSymbol {
SPLIT,
SEARCH,
MATCH,
MATCHALL,
REPLACE,
UNKNOWN
};
static JSTaggedValue RegExpConstructor(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Exec(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Test(EcmaRuntimeCallInfo *argv);
static JSTaggedValue ToString(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetFlags(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetGlobal(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetIgnoreCase(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetMultiline(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetDotAll(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetSource(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetSticky(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetUnicode(EcmaRuntimeCallInfo *argv);
static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Match(EcmaRuntimeCallInfo *argv);
static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Search(EcmaRuntimeCallInfo *argv);
static JSTaggedValue Split(EcmaRuntimeCallInfo *argv);
static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle<JSTaggedValue> &pattern,
const JSHandle<JSTaggedValue> &flags);
static JSTaggedValue RegExpCreateWithRawFlags(JSThread *thread, const JSHandle<JSTaggedValue> &pattern,
const JSHandle<JSTaggedValue> &flags);
static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags);
static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle<JSTaggedValue> ®exp,
const JSHandle<JSTaggedValue> &inputString, bool useCache,
bool isIntermediateResult = false);
static int64_t AdvanceStringIndex(const JSThread *thread, const JSHandle<JSTaggedValue> &inputStr, int64_t index,
bool unicode);
static JSTaggedValue GetHasIndices(EcmaRuntimeCallInfo *argv);
static JSTaggedValue ReplaceInternal(JSThread *thread,
JSHandle<JSTaggedValue> thisObj,
JSHandle<JSTaggedValue> string,
JSHandle<JSTaggedValue> inputReplaceValue);
static JSTaggedValue GetAllFlagsInternal(JSThread *thread, JSHandle<JSTaggedValue> &thisObj);
static bool IsFastRegExp(JSThread *thread, JSTaggedValue regexp,
RegExpSymbol symbolTag = RegExpSymbol::UNKNOWN);
static bool GetFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag, bool isFastPath);
static bool GetOriginalFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag);
static void SetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
JSTaggedValue lastIndex, bool isFastPath);
static int64_t GetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp, bool isFastPath);
static JSTaggedValue RegExpBuiltinExecWithoutResult(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> inputStr,
bool isFastPath, uint32_t lastIndex, bool useCache);
static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> inputStr,
bool isFastPath, bool useCache, bool isIntermediateResult = false);
static JSTaggedValue RegExpSearch(JSThread *thread,
const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> string);
static JSTaggedValue RegExpSearchFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> string);
static JSTaggedValue RegExpSplit(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
JSHandle<JSTaggedValue> jsString, JSHandle<JSTaggedValue> limit,
bool isFastPath);
static JSTaggedValue GetExecResultIndex(JSThread *thread, const JSHandle<JSTaggedValue> &execResults,
bool isFastPath);
static JSTaggedValue GetExecResultGroups(JSThread *thread, const JSHandle<JSTaggedValue> &execResults,
bool isFastPath);
static JSTaggedValue RegExpMatch(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> string, bool isFastPath);
static JSTaggedValue RegExpMatchAll(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const JSHandle<EcmaString> string, bool isFastPath);
#define SET_GET_CAPTURE(index) \
static JSTaggedValue GetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj); \
static bool SetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj, \
const JSHandle<JSTaggedValue> &value, bool mayThrow);
SET_GET_CAPTURE(1)
SET_GET_CAPTURE(2)
SET_GET_CAPTURE(3)
SET_GET_CAPTURE(4)
SET_GET_CAPTURE(5)
SET_GET_CAPTURE(6)
SET_GET_CAPTURE(7)
SET_GET_CAPTURE(8)
SET_GET_CAPTURE(9)
#undef SET_GET_CAPTURE
#define REGEXP_SYMBOL_FUNCTION_LIST(V) \
V(SPLIT, Split) \
V(SEARCH, Search) \
V(MATCH, Match) \
V(MATCHALL, MatchAll) \
V(REPLACE, Replace)
private:
enum class StringSource {
ONHEAP_STRING,
OFFHEAP_STRING,
};
static constexpr uint32_t MIN_REGEXP_PATTERN_LENGTH_EXECUTE_WITH_OFFHEAP_STRING = 4000;
static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000;
static constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFu;
static constexpr uint32_t REGEXP_GLOBAL_ARRAY_SIZE = 9;
static constexpr uint32_t LAST_INDEX_OFFSET = 0;
static constexpr uint32_t EXEC_RESULT_INDEX_OFFSET = 1;
static constexpr uint32_t EXEC_RESULT_INPUT_OFFSET = 2;
static constexpr uint32_t EXEC_RESULT_GROUPS_OFFSET = 3;
static constexpr uint32_t REPLACE_RESULT_VAL = 2;
static constexpr unsigned REPLACE_LENGTH_BITS = 30;
static constexpr unsigned REPLACE_POSITION_BITS = 30;
using ReplaceLengthField = BitField<uint32_t, 0, REPLACE_LENGTH_BITS>;
using ReplacePositionField = ReplaceLengthField::NextField<uint32_t, REPLACE_POSITION_BITS>;
static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
const uint8_t *buffer, size_t length, int32_t lastindex,
bool isUtf16, StringSource source, uint32_t extraFlags);
static JSTaggedValue GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
const JSHandle<JSTaggedValue> &constructor, const uint8_t mask);
static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle<JSTaggedValue> &newTarget);
static uint32_t UpdateExpressionFlags(JSThread *thread, const CString &checkStr);
template <bool needFlagsTransition = true>
static JSTaggedValue RegExpInitialize(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
const JSHandle<JSTaggedValue> &pattern, const JSHandle<JSTaggedValue> &flags);
static EcmaString *EscapeRegExpPattern(JSThread *thread, const JSHandle<JSTaggedValue> &src,
const JSHandle<JSTaggedValue> &flags);
static JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
JSHandle<EcmaString> inputString, uint32_t inputLength);
static JSTaggedValue GetLastIndex(JSThread *thread, JSHandle<JSTaggedValue> regexp,
uint32_t &lastIndex);
static bool ShouldUseCache(JSThread *thread, JSHandle<EcmaString> inputString);
static JSTaggedValue MatchAndReplace(JSThread *thread, JSHandle<JSTaggedValue> regexp,
JSHandle<EcmaString> inputString, uint32_t &flags,
uint32_t lastIndex, uint32_t inputLength,
std::string &resultString);
static JSTaggedValue RegExpTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> inputString, bool useCache);
static JSTaggedValue RegExpExecForTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> inputStr, bool useCache);
static JSHandle<JSTaggedValue> MakeMatchIndicesIndexPairArray(JSThread* thread,
const std::vector<std::pair<JSTaggedValue, JSTaggedValue>>& indices,
const std::vector<JSHandle<JSTaggedValue>>& groupNames, bool hasGroups);
static bool RegExpExecInternal(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
JSHandle<EcmaString> inputString, int32_t lastIndex, uint32_t extraFlags = 0);
static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
JSHandle<JSTaggedValue> string, uint32_t limit, bool useCache);
static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread,
const CVector<JSHandle<JSTaggedValue>> &resultArray,
const std::vector<uint64_t> &resultLengthArray, JSHandle<EcmaString> srcString,
uint32_t resultStrLength, bool isUtf8);
};
class RegExpExecResultCache : public TaggedArray {
public:
enum CacheType {
REPLACE_TYPE,
SPLIT_TYPE,
MATCH_TYPE,
EXEC_TYPE,
INTERMEDIATE_REPLACE_TYPE,
TEST_TYPE,
SEARCH_TYPE,
};
static RegExpExecResultCache *Cast(TaggedObject *object)
{
return reinterpret_cast<RegExpExecResultCache *>(object);
}
static JSTaggedValue CreateCacheTable(JSThread *thread);
static void ShrinkCacheTable(JSThread *thread, JSHandle<RegExpExecResultCache> table);
static void ClearCache(JSThread* thread, JSHandle<JSTaggedValue> cache);
template <RBMode mode = RBMode::DEFAULT_RB>
JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle<JSTaggedValue> input,
CacheType type, const JSHandle<JSTaggedValue> regexp,
JSTaggedValue lastIndexInput, JSHandle<JSTaggedValue> extend,
bool isIntermediateResult = false);
static void AddResultInCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache,
const JSHandle<JSTaggedValue> regexp,
const JSHandle<JSTaggedValue> input, const JSHandle<JSTaggedValue> resultArray,
CacheType type, uint32_t lastIndexInput, uint32_t lastIndex,
const JSHandle<JSTaggedValue> extend,
bool isIntermediateResult = false);
static void GrowRegexpCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache);
void ClearEntry(JSThread *thread, int entry);
void SetEntry(JSThread *thread, int entry, JSTaggedValue &patten, JSTaggedValue &flags, JSTaggedValue &input,
JSTaggedValue &lastIndexInputValue, JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue,
JSTaggedValue &resTableArray);
void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type);
template <RBMode mode = RBMode::DEFAULT_RB>
bool Match(JSThread *thread, int entry, JSTaggedValue &pattenStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr,
JSTaggedValue &lastIndexInputValue, JSTaggedValue &extend, CacheType type);
static JSTaggedValue GetGlobalTable(JSThread *thread);
inline void SetHitCount(JSThread *thread, int hitCount)
{
Set<false>(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount));
}
inline int GetHitCount()
{
return GetPrimitive(CACHE_HIT_COUNT_INDEX).GetInt();
}
inline void SetCacheCount(JSThread *thread, int hitCount)
{
Set<false>(thread, CACHE_COUNT_INDEX, JSTaggedValue(hitCount));
}
inline int GetCacheCount()
{
return GetPrimitive(CACHE_COUNT_INDEX).GetInt();
}
void Print()
{
std::cout << "cache count: " << GetCacheCount() << std::endl;
std::cout << "cache hit count: " << GetHitCount() << std::endl;
}
inline void SetLargeStrCount(JSThread *thread, uint32_t newCount)
{
Set<false>(thread, LARGE_STRING_COUNT_INDEX, JSTaggedValue(newCount));
}
inline void SetConflictCount(JSThread *thread, uint32_t newCount)
{
Set<false>(thread, CONFLICT_COUNT_INDEX, JSTaggedValue(newCount));
}
inline void SetStrLenThreshold(JSThread *thread, uint32_t newThreshold)
{
Set<false>(thread, STRING_LENGTH_THRESHOLD_INDEX, JSTaggedValue(newThreshold));
}
inline uint32_t GetLargeStrCount()
{
return GetPrimitive(LARGE_STRING_COUNT_INDEX).GetInt();
}
inline uint32_t GetConflictCount()
{
return GetPrimitive(CONFLICT_COUNT_INDEX).GetInt();
}
inline uint32_t GetStrLenThreshold()
{
return GetPrimitive(STRING_LENGTH_THRESHOLD_INDEX).GetInt();
}
inline void SetCacheLength(JSThread *thread, int length)
{
Set<false>(thread, CACHE_LENGTH_INDEX, JSTaggedValue(length));
}
inline int GetCacheLength()
{
return GetPrimitive(CACHE_LENGTH_INDEX).GetInt();
}
inline void SetLastMatchGlobalTableIndex(JSThread *thread, int index)
{
Set<false>(thread, LAST_MATCH_GLOBAL_TABLE_INDEX, JSTaggedValue(index));
}
inline int GetLastMatchGlobalTableIndex()
{
return GetPrimitive(LAST_MATCH_GLOBAL_TABLE_INDEX).GetInt();
}
inline void SetUseLastMatch(JSThread *thread, bool useLastMatchIndex)
{
Set<false>(thread, USE_LAST_MATCH_INDEX, JSTaggedValue(useLastMatchIndex));
}
inline bool GetUseLastMatch()
{
return GetPrimitive(USE_LAST_MATCH_INDEX).IsTrue();
}
inline void SetNeedUpdateGlobal(JSThread *thread, bool needUpdateGlobal)
{
Set<false>(thread, NEED_UPDATE_GLOBAL_INDEX, JSTaggedValue(needUpdateGlobal));
}
inline bool GetNeedUpdateGlobal()
{
return GetPrimitive(NEED_UPDATE_GLOBAL_INDEX).IsTrue();
}
class CacheGuardScope {
public:
CacheGuardScope(JSHandle<RegExpExecResultCache> cache): cache_(cache) { cache_->SetGuard(1); }
~CacheGuardScope() { cache_->SetGuard(0); }
private:
JSHandle<RegExpExecResultCache> cache_;
};
inline bool CacheInGuard()
{
return GetExtraLength() == 1;
}
inline void SetGuard(uint32_t tag)
{
ASSERT(tag == 0 || tag == 1);
SetExtraLength(tag);
}
static constexpr int DEFAULT_LARGE_STRING_COUNT = 10;
static constexpr int DEFAULT_CONFLICT_COUNT = 100;
static constexpr int INITIAL_CACHE_NUMBER = 0x10;
static constexpr int DEFAULT_CACHE_NUMBER = 0x1000;
static constexpr int DEFAULT_LAST_MATCH_INDEX = -1;
static constexpr int CACHE_COUNT_INDEX = 0;
static constexpr int CACHE_HIT_COUNT_INDEX = 1;
static constexpr int LARGE_STRING_COUNT_INDEX = 2;
static constexpr int CONFLICT_COUNT_INDEX = 3;
static constexpr int STRING_LENGTH_THRESHOLD_INDEX = 4;
static constexpr int CACHE_LENGTH_INDEX = 5;
static constexpr int LAST_MATCH_GLOBAL_TABLE_INDEX = 6;
static constexpr int USE_LAST_MATCH_INDEX = 7;
static constexpr int NEED_UPDATE_GLOBAL_INDEX = 8;
static constexpr int CACHE_TABLE_HEADER_SIZE = 9;
static constexpr int PATTERN_INDEX = 0;
static constexpr int FLAG_INDEX = 1;
static constexpr int INPUT_STRING_INDEX = 2;
static constexpr int LAST_INDEX_INPUT_INDEX = 3;
static constexpr int LAST_INDEX_INDEX = 4;
static constexpr int RESULT_REPLACE_INDEX = 5;
static constexpr int RESULT_SPLIT_INDEX = 6;
static constexpr int RESULT_MATCH_INDEX = 7;
static constexpr int RESULT_EXEC_INDEX = 8;
static constexpr int RESULT_INTERMEDIATE_REPLACE_INDEX = 9;
static constexpr int RESULT_TEST_INDEX = 10;
static constexpr int RESULT_SEARCH_INDEX = 11;
static constexpr int EXTEND_INDEX = 12;
static constexpr int CAPTURE_SIZE = 13;
static constexpr int ENTRY_SIZE = 14;
};
class RegExpGlobalResult : public TaggedArray {
public:
static RegExpGlobalResult *Cast(TaggedObject *object)
{
return reinterpret_cast<RegExpGlobalResult *>(object);
}
static JSTaggedValue CreateGlobalResultTable(JSThread *thread);
void SetCapture(JSThread *thread, int index, JSTaggedValue value)
{
ASSERT(CAPTURE_START_INDEX + index - 1 < GLOBAL_TABLE_SIZE);
Set(thread, CAPTURE_START_INDEX + index - 1, value);
}
void ResetDollar(JSThread *thread)
{
for (uint32_t i = 0; i < DOLLAR_NUMBER; i++) {
Set(thread, CAPTURE_START_INDEX + i, JSTaggedValue::Hole());
}
}
template <int N>
static JSTaggedValue GetCapture(JSThread *thread);
void SetTotalCaptureCounts(JSThread *thread, JSTaggedValue counts)
{
Set(thread, TOTAL_CAPTURE_COUNTS_INDEX, counts);
}
JSTaggedValue GetTotalCaptureCounts()
{
return GetPrimitive(TOTAL_CAPTURE_COUNTS_INDEX);
}
void SetEndIndex(JSThread *thread, JSTaggedValue endIndex)
{
Set(thread, END_INDEX, endIndex);
}
JSTaggedValue GetEndIndex()
{
return GetPrimitive(END_INDEX);
}
void SetInputString(JSThread *thread, JSTaggedValue string)
{
Set(thread, INPUT_STRING_INDEX, string);
}
JSTaggedValue GetInputString(JSThread *thread)
{
return Get(thread, INPUT_STRING_INDEX);
}
void SetStartOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
{
Set(thread, FIRST_CAPTURE_INDEX + index * 2, value);
}
void SetEndOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
{
Set(thread, FIRST_CAPTURE_INDEX + index * 2 + 1, value);
}
JSTaggedValue GetStartOfCaptureIndex(uint32_t index)
{
return GetPrimitive(FIRST_CAPTURE_INDEX + index * 2);
}
JSTaggedValue GetEndOfCaptureIndex(uint32_t index)
{
return GetPrimitive(FIRST_CAPTURE_INDEX + index * 2 + 1);
}
static JSHandle<RegExpGlobalResult> GrowCapturesCapacity(JSThread *thread,
JSHandle<RegExpGlobalResult>result, uint32_t length);
static constexpr int FIRST_CAPTURE_INDEX = 12;
private:
static constexpr int GLOBAL_TABLE_SIZE = 12;
static constexpr int DOLLAR_NUMBER = 9;
static constexpr int CAPTURE_START_INDEX = 0;
static constexpr int TOTAL_CAPTURE_COUNTS_INDEX = 9;
static constexpr int INPUT_STRING_INDEX = 10;
static constexpr int END_INDEX = 11;
static constexpr int INITIAL_CAPTURE_INDICES = 18;
};
}
#endif