* Copyright (c) 2025 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ECMASCRIPT_STRING_BASE_STRING_IMPL_H
#define ECMASCRIPT_STRING_BASE_STRING_IMPL_H
#include "ecmascript/string/base_string.h"
#include <vector>
#ifdef ENABLE_HISPEED_PLUGIN
#include "common_components/base/utf_helper.h"
#endif
#include "ecmascript/platform/string_hash.h"
#include "ecmascript/platform/string_hash_helper.h"
#include "ecmascript/string/external_string-inl.h"
#include "ecmascript/string/line_string-inl.h"
#include "ecmascript/string/sliced_string-inl.h"
#include "ecmascript/string/tree_string-inl.h"
#include "objects/utils/utf_utils.h"
#include "objects/utils/span.h"
#include "securec.h"
#if !ENABLE_V70_OPTIMIZATION
#undef ENABLE_LINXKIT
#endif
#ifdef ENABLE_LINXKIT
#include "ecmascript/platform/arm64/linxkit_string.h"
#endif
namespace panda::ecmascript {
std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen);
std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen);
template <typename T1, typename T2>
int32_t CompareStringSpan(common::Span<T1> &lhsSp, common::Span<T2> &rhsSp, int32_t count);
template <typename T1, typename T2>
bool IsSubStringAtSpan(common::Span<T1> &lhsSp, common::Span<T2> &rhsSp, uint32_t offset);
template <typename T>
uint32_t BaseString::ComputeHashForData(const T* data, size_t size,
uint32_t hashSeed)
{
#ifndef ENABLE_LINXKIT
if (size <= static_cast<size_t>(StringHash::MIN_SIZE_FOR_UNROLLING)) {
uint32_t hash = hashSeed;
for (uint32_t i = 0; i < size; i++) {
hash = (hash << static_cast<uint32_t>(StringHash::HASH_SHIFT)) - hash + data[i];
}
return hash;
}
return StringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed);
#else
return LinxkitComputeHashForData(data, size, hashSeed);
#endif
}
template
uint32_t BaseString::ComputeHashForData<uint8_t>(const uint8_t*, size_t, uint32_t);
template
uint32_t BaseString::ComputeHashForData<uint16_t>(const uint16_t*, size_t, uint32_t);
template <typename T1, typename T2>
uint32_t BaseString::CalculateDataConcatHashCode(const T1* dataFirst, size_t sizeFirst,
const T2* dataSecond, size_t sizeSecond)
{
uint32_t totalHash = ComputeHashForData(dataFirst, sizeFirst, 0);
totalHash = ComputeHashForData(dataSecond, sizeSecond, totalHash);
return MixHashcode(totalHash, NOT_INTEGER);
}
template
uint32_t BaseString::CalculateDataConcatHashCode<uint8_t, uint8_t>(const uint8_t* dataFirst, size_t sizeFirst,
const uint8_t* dataSecond, size_t sizeSecond);
template
uint32_t BaseString::CalculateDataConcatHashCode<uint16_t, uint16_t>(const uint16_t* dataFirst, size_t sizeFirst,
const uint16_t* dataSecond, size_t sizeSecond);
template
uint32_t BaseString::CalculateDataConcatHashCode<uint8_t, uint16_t>(const uint8_t* dataFirst, size_t sizeFirst,
const uint16_t* dataSecond, size_t sizeSecond);
template
uint32_t BaseString::CalculateDataConcatHashCode<uint16_t, uint8_t>(const uint16_t* dataFirst, size_t sizeFirst,
const uint8_t* dataSecond, size_t sizeSecond);
template <typename ReadBarrier>
uint32_t BaseString::ComputeHashcode(ReadBarrier &&readBarrier) const
{
#if defined(PANDA_32_BIT_MANAGED_POINTER)
return ComputeRawHashcode32bits(readBarrier);
#else
auto [hash, isInteger] = ComputeRawHashcode(readBarrier);
return MixHashcode(hash, isInteger);
#endif
}
template <typename ReadBarrier>
uint32_t BaseString::ComputeRawHashcode32bits(ReadBarrier &&readBarrier) const
{
uint32_t length = GetLength();
if (length == 0) {
return 0;
}
if (IsUtf8()) {
std::vector<uint8_t> buf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
return ComputeHashForData(data, length, 0);
}
std::vector<uint16_t> buf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
return ComputeHashForData(data, length, 0);
}
template <typename ReadBarrier>
std::pair<uint32_t, bool> BaseString::ComputeRawHashcode(ReadBarrier &&readBarrier) const
{
uint32_t hash = 0;
uint32_t length = GetLength();
if (length == 0) {
return {hash, false};
}
if (IsUtf8()) {
std::vector<uint8_t> buf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(data, length, &hash, 0)) {
return {hash, true};
}
hash = ComputeHashForData<uint8_t>(data, length, 0);
return {hash, false};
} else {
std::vector<uint16_t> buf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
hash = ComputeHashForData<uint16_t>(data, length, 0);
return {hash, false};
}
}
template <typename T>
inline static bool IsDecimalDigitChar(const T c)
{
return (c >= '0' && c <= '9');
}
inline bool ComputeIntegerHash(uint32_t *num, uint8_t c)
{
if (!IsDecimalDigitChar(c)) {
return false;
}
int charDate = c - '0';
*num = (*num) * 10 + charDate;
return true;
}
template <typename T>
bool BaseString::HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
{
if (hashSeed == 0) {
if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
uint32_t num = data[0] - '0';
uint32_t i = 1;
do {
if (i == size) {
if (num <= MAX_INTEGER_HASH_NUMBER) {
*hash = MixHashcode(num, IS_INTEGER);
return true;
}
return false;
}
} while (ComputeIntegerHash(&num, data[i++]));
}
if (size == 1 && (data[0] == '0')) {
*hash = MixHashcode(0, IS_INTEGER);
return true;
}
} else {
if (IsDecimalDigitChar(data[0])) {
uint32_t num = hashSeed * 10 + (data[0] - '0');
uint32_t i = 1;
do {
if (i == size) {
if (num <= MAX_INTEGER_HASH_NUMBER) {
*hash = MixHashcode(num, IS_INTEGER);
return true;
}
return false;
}
} while (ComputeIntegerHash(&num, data[i++]));
}
}
return false;
}
template <typename ReadBarrier>
bool BaseString::EqualToSplicedString(ReadBarrier &&readBarrier, const BaseString *str1, const BaseString *str2)
{
DCHECK_CC(!IsTreeString());
DCHECK_CC(!str1->IsTreeString() && !str2->IsTreeString());
if (GetLength() != str1->GetLength() + str2->GetLength()) {
return false;
}
if (IsUtf16()) {
std::vector<uint16_t> buf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
if (BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength())) {
return BaseString::StringsAreEqualUtf16(std::forward<ReadBarrier>(readBarrier), str2,
data + str1->GetLength(), str2->GetLength());
}
} else {
std::vector<uint8_t> buf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
if (BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str1, data, str1->GetLength(),
this->IsUtf8())) {
return BaseString::StringIsEqualUint8Data(std::forward<ReadBarrier>(readBarrier), str2,
data + str1->GetLength(), str2->GetLength(), this->IsUtf8());
}
}
return false;
}
template <typename ReadBarrier>
std::u16string BaseString::ToU16String(ReadBarrier &&readBarrier, uint32_t len)
{
uint32_t length = len > 0 ? len : GetLength();
std::u16string result;
if (IsUtf16()) {
std::vector<uint16_t> buf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
result = Utf16ToU16String(data, length);
} else {
std::vector<uint8_t> buf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
result = Utf8ToU16String(data, length);
}
return result;
}
template <typename ReadBarrier>
const uint8_t *BaseString::GetNonTreeUtf8Data(ReadBarrier &&readBarrier, const BaseString *src)
{
DCHECK_CC(src->IsUtf8());
DCHECK_CC(!src->IsTreeString());
if (src->IsLineString()) {
return LineString::ConstCast(src)->GetDataUtf8();
} else if (src->IsSlicedString()) {
const SlicedString *str = SlicedString::ConstCast(src);
return LineString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf8() +
str->GetStartIndex();
} else {
DCHECK_CC(src->IsCachedExternalString());
return CachedExternalString::ConstCast(src)->GetDataUtf8();
}
}
template <typename ReadBarrier>
const uint16_t *BaseString::GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const BaseString *src)
{
DCHECK_CC(src->IsUtf16());
DCHECK_CC(!src->IsTreeString());
if (src->IsLineString()) {
return LineString::ConstCast(src)->GetDataUtf16();
} else if (src->IsSlicedString()) {
const SlicedString *str = SlicedString::ConstCast(src);
return LineString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf16() +
str->GetStartIndex();
} else {
DCHECK_CC(src->IsCachedExternalString());
return CachedExternalString::ConstCast(src)->GetDataUtf16();
}
}
template <typename ReadBarrier>
bool BaseString::StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *left, BaseString *right)
{
std::vector<uint16_t> bufLeftUft16;
std::vector<uint16_t> bufRightUft16;
std::vector<uint8_t> bufLeftUft8;
std::vector<uint8_t> bufRightUft8;
int32_t lhsCount = static_cast<int32_t>(left->GetLength());
int32_t rhsCount = static_cast<int32_t>(right->GetLength());
if (!left->IsUtf16() && !right->IsUtf16()) {
const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
common::Span<const uint8_t> lhsSp(data1, lhsCount);
common::Span<const uint8_t> rhsSp(data2, rhsCount);
return BaseString::StringsAreEquals(lhsSp, rhsSp);
}
if (!left->IsUtf16()) {
const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft8);
const uint16_t *data2 =
BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft16);
common::Span<const uint8_t> lhsSp(data1, lhsCount);
common::Span<const uint16_t> rhsSp(data2, rhsCount);
return BaseString::StringsAreEquals(lhsSp, rhsSp);
}
if (!right->IsUtf16()) {
const uint16_t *data1 =
BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft8);
common::Span<const uint16_t> lhsSp(data1, lhsCount);
common::Span<const uint8_t> rhsSp(data2, rhsCount);
return BaseString::StringsAreEquals(lhsSp, rhsSp);
}
const uint16_t *data1 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), left, bufLeftUft16);
const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), right, bufRightUft16);
common::Span<const uint16_t> lhsSp(data1, lhsCount);
common::Span<const uint16_t> rhsSp(data2, rhsCount);
return StringsAreEquals(lhsSp, rhsSp);
}
template <typename ReadBarrier>
bool BaseString::StringsAreEqual(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2)
{
DCHECK_CC(str1 != nullptr);
DCHECK_CC(str2 != nullptr);
if (str1 == str2) {
return true;
}
uint32_t str1Len = str1->GetLength();
if (str1Len != str2->GetLength()) {
return false;
}
if (str1Len == 0) {
return true;
}
uint32_t str1Hash;
uint32_t str2Hash;
if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
if (str1Hash != str2Hash) {
return false;
}
}
return StringsAreEqualDiffUtfEncoding(std::forward<ReadBarrier>(readBarrier), str1, str2);
}
template <typename ReadBarrier>
bool BaseString::StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseString *str1, const uint8_t *dataAddr,
uint32_t dataLen, bool canBeCompressToUtf8)
{
DCHECK_CC(str1 != nullptr);
uint32_t strLen = str1->GetLength();
if (str1->IsUtf8()) {
if (strLen != dataLen) {
return false;
}
std::vector<uint8_t> buf;
common::Span<const uint8_t> data1(BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier),
str1, buf), strLen);
common::Span<const uint8_t> data2(dataAddr, dataLen);
return BaseString::StringsAreEquals(data1, data2);
}
if (canBeCompressToUtf8 && strLen != dataLen) {
return false;
}
std::vector<uint16_t> buf;
const uint16_t *strAddr = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
return IsUtf8EqualsUtf16(dataAddr, dataLen, strAddr, strLen);
}
template <typename ReadBarrier>
bool BaseString::StringsAreEqualUtf16(ReadBarrier &&readBarrier, const BaseString *str1, const uint16_t *utf16Data,
uint32_t utf16Len)
{
uint32_t length = str1->GetLength();
if (length != utf16Len) {
return false;
}
if (str1->IsUtf8()) {
std::vector<uint8_t> buf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), str1, buf);
return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
}
std::vector<uint16_t> buf;
common::Span<const uint16_t> data1(BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier),
str1, buf), length);
common::Span<const uint16_t> data2(utf16Data, utf16Len);
return BaseString::StringsAreEquals(data1, data2);
}
#include <vector>
#include "securec.h"
template <typename ReadBarrier>
size_t BaseString::GetUtf8Length(ReadBarrier &&readBarrier, bool modify, bool isGetBufferSize) const
{
if (!IsUtf16()) {
return GetLength() + 1;
}
std::vector<uint16_t> tmpBuf;
const uint16_t *data = GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
return common::UtfUtils::Utf16ToUtf8Size(data, GetLength(), modify, isGetBufferSize);
}
template <typename ReadBarrier, typename Vec,
std::enable_if_t<common::objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint16_t>, int>>
const uint16_t *BaseString::GetUtf16DataFlat(ReadBarrier &&readBarrier, const BaseString *src, Vec &buf)
{
DCHECK_CC(src->IsUtf16());
uint32_t length = src->GetLength();
if (src->IsLineString()) {
return LineString::ConstCast(src)->GetDataUtf16();
} else if (src->IsTreeString()) {
if (src->IsFlat(std::forward<ReadBarrier>(readBarrier))) {
src = BaseString::Cast(
TreeString::ConstCast(src)->GetLeftSubString<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
return LineString::ConstCast(src)->GetDataUtf16();
} else {
buf.reserve(length);
WriteToFlat(std::forward<ReadBarrier>(readBarrier), src, buf.data(), length);
return buf.data();
}
} else if (src->IsSlicedString()) {
const SlicedString *str = SlicedString::ConstCast(src);
return LineString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf16() +
str->GetStartIndex();
} else {
DCHECK_CC(src->IsCachedExternalString());
return CachedExternalString::ConstCast(src)->GetDataUtf16();
}
}
constexpr bool BaseString::IsStringType(EcmaStringType type)
{
return (type >= EcmaStringType::STRING_FIRST && type <= EcmaStringType::STRING_LAST);
}
inline EcmaStringType BaseString::GetStringType() const
{
EcmaStringType type = GetBaseStringClass()->GetEcmaStringType();
DCHECK_CC(IsStringType(type) && ("Invalid ObjectType"));
return type;
}
template <bool VERIFY, typename ReadBarrier>
uint16_t BaseString::At(ReadBarrier &&readBarrier, int32_t index) const
{
auto length = static_cast<int32_t>(GetLength());
if constexpr (VERIFY) {
if ((index < 0) || (index >= length)) {
return 0;
}
}
switch (GetStringType()) {
case EcmaStringType::LINE_STRING:
return LineString::ConstCast(this)->Get<VERIFY>(index);
case EcmaStringType::SLICED_STRING:
return SlicedString::ConstCast(this)->Get<VERIFY>(std::forward<ReadBarrier>(readBarrier), index);
case EcmaStringType::TREE_STRING:
return TreeString::ConstCast(this)->Get<VERIFY>(std::forward<ReadBarrier>(readBarrier), index);
case EcmaStringType::CACHED_EXTERNAL_STRING:
return CachedExternalString::ConstCast(this)->Get<VERIFY>(index);
default:
UNREACHABLE_CC();
}
}
template <typename ReadBarrier>
bool BaseString::IsFlat(ReadBarrier &&readBarrier) const
{
if (!this->IsTreeString()) {
return true;
}
return TreeString::ConstCast(this)->IsFlat(std::forward<ReadBarrier>(readBarrier));
}
template <typename Char, typename ReadBarrier>
void BaseString::WriteToFlat(ReadBarrier &&readBarrier, const BaseString *src, Char *buf, uint32_t maxLength)
{
uint32_t length = src->GetLength();
if (length == 0) {
return;
}
while (true) {
DCHECK_CC(length <= maxLength && length > 0);
DCHECK_CC(length <= src->GetLength());
switch (src->GetStringType()) {
case EcmaStringType::LINE_STRING: {
if (src->IsUtf8()) {
CopyChars(buf, LineString::ConstCast(src)->GetDataUtf8(), length);
} else {
CopyChars(buf, LineString::ConstCast(src)->GetDataUtf16(), length);
}
return;
}
case EcmaStringType::CACHED_EXTERNAL_STRING: {
if (src->IsUtf8()) {
CopyChars(buf, CachedExternalString::ConstCast(src)->GetDataUtf8(), length);
} else {
CopyChars(buf, CachedExternalString::ConstCast(src)->GetDataUtf16(), length);
}
return;
}
case EcmaStringType::TREE_STRING: {
const TreeString *treeSrc = TreeString::ConstCast(src);
BaseString *left =
BaseString::Cast(treeSrc->GetLeftSubString<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
BaseString *right =
BaseString::Cast(treeSrc->GetRightSubString<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
uint32_t leftLength = left->GetLength();
uint32_t rightLength = right->GetLength();
if (rightLength >= leftLength) {
WriteToFlat(std::forward<ReadBarrier>(readBarrier), left, buf, maxLength);
if (left == right) {
CopyChars(buf + leftLength, buf, leftLength);
return;
}
buf += leftLength;
maxLength -= leftLength;
src = right;
length -= leftLength;
} else {
if (length > leftLength) {
if (rightLength == 1) {
buf[leftLength] =
static_cast<Char>(right->At<false>(std::forward<ReadBarrier>(readBarrier), 0));
} else if ((right->IsLineString()) && right->IsUtf8()) {
CopyChars(buf + leftLength, LineString::Cast(right)->GetDataUtf8(), rightLength);
} else {
WriteToFlat(std::forward<ReadBarrier>(readBarrier), right, buf + leftLength,
maxLength - leftLength);
}
length -= rightLength;
}
maxLength = leftLength;
src = left;
}
continue;
}
case EcmaStringType::SLICED_STRING: {
BaseString *parent = BaseString::Cast(
SlicedString::ConstCast(src)->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
if (src->IsUtf8()) {
CopyChars(buf,
LineString::Cast(parent)->GetDataUtf8() + SlicedString::ConstCast(src)->GetStartIndex(),
length);
} else {
CopyChars(buf,
LineString::Cast(parent)->GetDataUtf16() + SlicedString::ConstCast(src)->GetStartIndex(),
length);
}
return;
}
default:
UNREACHABLE_CC();
}
}
}
template <typename Char, typename ReadBarrier>
void BaseString::WriteToFlatWithPos(ReadBarrier &&readBarrier, BaseString *src, Char *buf, uint32_t length,
uint32_t pos)
{
[[maybe_unused]] uint32_t maxLength = src->GetLength();
if (length == 0) {
return;
}
while (true) {
DCHECK_CC(length + pos <= maxLength && length > 0);
DCHECK_CC(length <= src->GetLength());
DCHECK_CC(pos >= 0);
switch (src->GetStringType()) {
case EcmaStringType::LINE_STRING: {
if (src->IsUtf8()) {
CopyChars(buf, LineString::Cast(src)->GetDataUtf8() + pos, length);
} else {
CopyChars(buf, LineString::Cast(src)->GetDataUtf16() + pos, length);
}
return;
}
case EcmaStringType::CACHED_EXTERNAL_STRING: {
if (src->IsUtf8()) {
CopyChars(buf, CachedExternalString::ConstCast(src)->GetDataUtf8() + pos, length);
} else {
CopyChars(buf, CachedExternalString::ConstCast(src)->GetDataUtf16() + pos, length);
}
return;
}
case EcmaStringType::TREE_STRING: {
TreeString *treeSrc = TreeString::Cast(src);
BaseString *left =
BaseString::Cast(treeSrc->GetLeftSubString<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
DCHECK_CC(left->IsLineString());
src = left;
continue;
}
case EcmaStringType::SLICED_STRING: {
BaseString *parent = BaseString::Cast(
SlicedString::Cast(src)->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
if (src->IsUtf8()) {
CopyChars(buf,
LineString::Cast(parent)->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex() + pos,
length);
} else {
CopyChars(buf,
LineString::Cast(parent)->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex() +
pos, length);
}
return;
}
default:
UNREACHABLE_CC();
}
}
}
template <typename ReadBarrier>
size_t BaseString::WriteUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength, bool isWriteBuffer) const
{
if (maxLength == 0) {
return 1;
}
buf[maxLength - 1] = '\0';
return CopyDataRegionUtf8(std::forward<ReadBarrier>(readBarrier), buf, 0, GetLength(), maxLength, true,
isWriteBuffer) +
1;
}
template <typename ReadBarrier>
size_t BaseString::WriteUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
{
if (bufLength == 0) {
return 0;
}
return CopyDataToUtf16(std::forward<ReadBarrier>(readBarrier), buf, targetLength, bufLength);
}
template <typename ReadBarrier>
size_t BaseString::WriteOneByte(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength) const
{
if (maxLength == 0) {
return 0;
}
buf[maxLength - 1] = '\0';
uint32_t length = GetLength();
if (!IsUtf16()) {
std::vector<uint8_t> tmpBuf;
const uint8_t *data = GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (length > maxLength) {
length = maxLength;
}
if (memcpy_s(buf, maxLength, data, length) != EOK) {
UNREACHABLE_CC();
}
return length;
}
std::vector<uint16_t> tmpBuf;
const uint16_t *data = GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (length > maxLength) {
return common::UtfUtils::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
}
return common::UtfUtils::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
}
template <typename ReadBarrier>
uint32_t BaseString::CopyDataUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t maxLength) const
{
uint32_t length = GetLength();
if (length > maxLength) {
return 0;
}
if (IsUtf16()) {
std::vector<uint16_t> tmpBuf;
const uint16_t *data = GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
UNREACHABLE_CC();
}
return length;
}
std::vector<uint8_t> tmpBuf;
const uint8_t *data = GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
return common::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
}
template <typename ReadBarrier, typename Vec,
std::enable_if_t<common::objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>>
common::Span<const uint8_t> BaseString::ToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify, bool cesu8)
{
common::Span<const uint8_t> str;
uint32_t strLen = GetLength();
if (UNLIKELY_CC(IsUtf16())) {
using U16Vec = common::objects_traits::vector_with_same_alloc_t<Vec, uint16_t>;
U16Vec tmpBuf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
DCHECK_CC(common::UtfUtils::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
size_t len = common::UtfUtils::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
buf.reserve(len);
len = common::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
str = common::Span<const uint8_t>(buf.data(), len);
} else {
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
str = common::Span<const uint8_t>(data, strLen);
}
return str;
}
template <typename ReadBarrier, typename Vec,
std::enable_if_t<common::objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>>
common::Span<const uint8_t> BaseString::DebuggerToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify)
{
common::Span<const uint8_t> str;
uint32_t strLen = GetLength();
if (UNLIKELY_CC(IsUtf16())) {
using U16Vec = common::objects_traits::vector_with_same_alloc_t<Vec, uint16_t>;
U16Vec tmpBuf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
size_t len = common::UtfUtils::Utf16ToUtf8Size(data, strLen, modify) - 1;
buf.reserve(len);
len = common::UtfUtils::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
str = common::Span<const uint8_t>(buf.data(), len);
} else {
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, buf);
str = common::Span<const uint8_t>(data, strLen);
}
return str;
}
template <typename DstType, typename SrcType>
void BaseString::CopyChars(DstType *dst, SrcType *src, uint32_t count)
{
common::Span<SrcType> srcSp(src, count);
common::Span<DstType> dstSp(dst, count);
for (uint32_t i = 0; i < count; i++) {
dstSp[i] = srcSp[i];
}
}
template <typename ReadBarrier, typename Vec,
std::enable_if_t<common::objects_traits::is_std_vector_of_v<std::decay_t<Vec>, uint8_t>, int>>
const uint8_t *BaseString::GetUtf8DataFlat(ReadBarrier &&readBarrier, const BaseString *src, Vec &buf)
{
DCHECK_CC(src->IsUtf8());
uint32_t length = src->GetLength();
if (src->IsLineString()) {
return LineString::ConstCast(src)->GetDataUtf8();
} else if (src->IsTreeString()) {
if (src->IsFlat(std::forward<ReadBarrier>(readBarrier))) {
src = BaseString::Cast(
TreeString::ConstCast(src)->GetLeftSubString<BaseObject *>(std::forward<ReadBarrier>(readBarrier)));
return LineString::ConstCast(src)->GetDataUtf8();
} else {
buf.reserve(length);
WriteToFlat(std::forward<ReadBarrier>(readBarrier), src, buf.data(), length);
return buf.data();
}
} else if (src->IsSlicedString()) {
const SlicedString *str = SlicedString::ConstCast(src);
return LineString::Cast(str->GetParent<BaseObject *>(std::forward<ReadBarrier>(readBarrier)))->GetDataUtf8() +
str->GetStartIndex();
} else {
DCHECK_CC(src->IsCachedExternalString());
return CachedExternalString::ConstCast(src)->GetDataUtf8();
}
}
template <typename ReadBarrier>
size_t BaseString::CopyDataRegionUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t start, size_t length,
size_t maxLength, bool modify, bool isWriteBuffer) const
{
uint32_t len = GetLength();
if (start + length > len) {
return 0;
}
constexpr size_t TWO_TIMES = 2;
if (!IsUtf16()) {
if (length > (std::numeric_limits<size_t>::max() / TWO_TIMES - 1)) {
UNREACHABLE_CC();
}
std::vector<uint8_t> tmpBuf;
const uint8_t *data = GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf) + start;
auto dataLen = std::min(length, maxLength);
std::copy(data, data + dataLen, buf);
return dataLen;
}
std::vector<uint16_t> tmpBuf;
const uint16_t *data = GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (length > maxLength) {
return common::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, modify,
isWriteBuffer);
}
return common::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, modify, isWriteBuffer);
}
template <typename ReadBarrier>
size_t BaseString::CopyDataToUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t length, uint32_t bufLength) const
{
if (IsUtf16()) {
std::vector<uint16_t> tmpBuf;
const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (length > bufLength) {
if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
UNREACHABLE_CC();
}
return bufLength;
}
if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
UNREACHABLE_CC();
}
return length;
}
std::vector<uint8_t> tmpBuf;
const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward<ReadBarrier>(readBarrier), this, tmpBuf);
if (length > bufLength) {
return common::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
}
return common::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
}
#ifdef PANDA_32_BIT_MANAGED_POINTER
template <typename T>
uint32_t BaseString::ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
{
uint32_t hash = hashSeed;
for (uint32_t i = 0; i < size; i++) {
hash = (hash << static_cast<uint32_t>(HASH_SHIFT)) - hash + data[i];
}
return hash;
}
#endif
inline bool BaseString::IsASCIICharacter(uint16_t data)
{
if (data == 0) {
return false;
}
return data <= common::UtfUtils::UTF8_1B_MAX;
}
template <typename T>
bool BaseString::MemCopyChars(common::Span<T> &dst, size_t dstMax, common::Span<const T> &src, size_t count)
{
DCHECK_CC(dstMax >= count);
DCHECK_CC(dst.Size() >= src.Size());
if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
UNREACHABLE_CC();
}
return true;
}
template <typename T1, typename T2>
int32_t BaseString::LastIndexOf(common::Span<const T1> &lhsSp, common::Span<const T2> &rhsSp, int32_t pos)
{
int rhsSize = static_cast<int>(rhsSp.size());
DCHECK_CC(rhsSize > 0);
auto first = rhsSp[0];
for (int32_t i = pos; i >= 0; i--) {
if (lhsSp[i] != first) {
continue;
}
int j = 1;
while (j < rhsSize) {
if (rhsSp[j] != lhsSp[i + j]) {
break;
}
j++;
}
if (j == rhsSize) {
return i;
}
}
return -1;
}
template <typename T1, typename T2>
int32_t BaseString::IndexOf(common::Span<const T1> &lhsSp, common::Span<const T2> &rhsSp, int32_t pos, int32_t max)
{
auto first = static_cast<int32_t>(rhsSp[0]);
for (int32_t i = pos; i <= max; i++) {
if (static_cast<int32_t>(lhsSp[i]) != first) {
i++;
while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
i++;
}
}
if (i <= max) {
int j = i + 1;
int end = j + static_cast<int>(rhsSp.size()) - 1;
for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
}
if (j == end) {
return i;
}
}
}
return -1;
}
inline bool BaseString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
{
#if ENABLE_LATEST_OPTIMIZATION && defined(ENABLE_HISPEED_PLUGIN)
auto hispeedUtf8CanBeCompressed =
common::utf_helper::HispeedLibSingleton::GetInstance().GetHispeedUtf8CanBeCompressed();
if (hispeedUtf8CanBeCompressed != nullptr) {
return hispeedUtf8CanBeCompressed(utf8Data, utf8Len);
}
#endif
uint32_t index = 0;
for (; index + 4 <= utf8Len; index += 4) {
if (!IsASCIICharacter(utf8Data[index]) ||
!IsASCIICharacter(utf8Data[index + 1]) ||
!IsASCIICharacter(utf8Data[index + 2]) ||
!IsASCIICharacter(utf8Data[index + 3])) {
return false;
}
}
for (; index < utf8Len; ++index) {
if (!IsASCIICharacter(utf8Data[index])) {
return false;
}
}
return true;
}
inline bool BaseString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
{
#if ENABLE_LATEST_OPTIMIZATION && defined(ENABLE_HISPEED_PLUGIN)
auto hispeedUtf16CanBeCompressed =
common::utf_helper::HispeedLibSingleton::GetInstance().GetHispeedUtf16CanBeCompressed();
if (hispeedUtf16CanBeCompressed != nullptr) {
return hispeedUtf16CanBeCompressed(utf16Data, utf16Len);
}
#endif
uint32_t index = 0;
for (; index + 4 <= utf16Len; index += 4) {
if (!IsASCIICharacter(utf16Data[index]) ||
!IsASCIICharacter(utf16Data[index + 1]) ||
!IsASCIICharacter(utf16Data[index + 2]) ||
!IsASCIICharacter(utf16Data[index + 3])) {
return false;
}
}
for (; index < utf16Len; ++index) {
if (!IsASCIICharacter(utf16Data[index])) {
return false;
}
}
return true;
}
template <typename T1, typename T2>
int32_t CompareStringSpan(common::Span<T1> &lhsSp, common::Span<T2> &rhsSp, int32_t count)
{
for (int32_t i = 0; i < count; ++i) {
auto left = static_cast<int32_t>(lhsSp[i]);
auto right = static_cast<int32_t>(rhsSp[i]);
if (left != right) {
return left - right;
}
}
return 0;
}
#if defined(PANDA_32_BIT_MANAGED_POINTER)
inline uint32_t BaseString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
{
if (canBeCompress) {
return ComputeHashForData(utf8Data, utf8Len, 0);
}
auto utf16Len = common::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
std::vector<uint16_t> tmpBuffer(utf16Len);
[[maybe_unused]] auto len = common::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(),
utf8Len, utf16Len);
DCHECK_CC(len == utf16Len);
return ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
}
#else
inline uint32_t BaseString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
{
if (utf8Len == 0) {
return MixHashcode(0, NOT_INTEGER);
}
if (canBeCompress) {
uint32_t mixHash = 0;
if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
return mixHash;
}
uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
return MixHashcode(hash, NOT_INTEGER);
}
auto utf16Len = common::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
std::vector<uint16_t> tmpBuffer(utf16Len);
[[maybe_unused]] auto len = common::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(),
utf8Len, utf16Len);
DCHECK_CC(len == utf16Len);
uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
return MixHashcode(hash, NOT_INTEGER);
}
#endif
#if defined(PANDA_32_BIT_MANAGED_POINTER)
inline uint32_t BaseString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
{
return ComputeHashForData(utf16Data, length, 0);
}
#else
inline uint32_t BaseString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
{
if (length == 0) {
return MixHashcode(0, NOT_INTEGER);
}
uint32_t mixHash = 0;
if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
return mixHash;
}
uint32_t hash = ComputeHashForData(utf16Data, length, 0);
return MixHashcode(hash, NOT_INTEGER);
}
#endif
static size_t FixUtf8Len(const uint8_t *utf8, size_t utf8Len)
{
constexpr size_t TWO_BYTES_LENGTH = 2;
constexpr size_t THREE_BYTES_LENGTH = 3;
size_t trimSize = 0;
if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
trimSize = 1;
}
if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
trimSize = TWO_BYTES_LENGTH;
}
if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
trimSize = THREE_BYTES_LENGTH;
}
return utf8Len - trimSize;
}
inline bool BaseString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
uint32_t utf16Len)
{
constexpr size_t LOW_3BITS = 0x7;
constexpr size_t LOW_4BITS = 0xF;
constexpr size_t LOW_5BITS = 0x1F;
constexpr size_t LOW_6BITS = 0x3F;
constexpr size_t L_SURROGATE_START = 0xDC00;
constexpr size_t H_SURROGATE_START = 0xD800;
constexpr size_t SURROGATE_RAIR_START = 0x10000;
constexpr size_t OFFSET_18POS = 18;
constexpr size_t OFFSET_12POS = 12;
constexpr size_t OFFSET_10POS = 10;
constexpr size_t OFFSET_6POS = 6;
size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
const uint8_t *utf8End = utf8Data + utf8Len;
const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
const uint16_t *utf16End = utf16Data + utf16Len;
while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
uint8_t src = *utf8Data;
switch (src & 0xF0) {
case 0xF0: {
const uint8_t c2 = *(++utf8Data);
const uint8_t c3 = *(++utf8Data);
const uint8_t c4 = *(++utf8Data);
uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
if (codePoint >= SURROGATE_RAIR_START) {
if (utf16Data >= utf16End - 1) {
return false;
}
codePoint -= SURROGATE_RAIR_START;
if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
return false;
} else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
return false;
}
} else {
if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
return false;
}
}
utf8Data++;
break;
}
case 0xE0: {
const uint8_t c2 = *(++utf8Data);
const uint8_t c3 = *(++utf8Data);
if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
return false;
}
utf8Data++;
break;
}
case 0xD0:
case 0xC0: {
const uint8_t c2 = *(++utf8Data);
if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
return false;
}
utf8Data++;
break;
}
default:
do {
if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
return false;
}
} while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
break;
}
}
while (utf8Data < utf8End && utf16Data < utf16End) {
if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
return false;
}
}
return utf8Data == utf8End && utf16Data == utf16End;
}
}
#endif