* Copyright (c) 2025 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ecmascript/base/string_helper.h"
#include "common_components/base/utf_helper.h"
#include "ecmascript/builtins/builtins_global.h"
#include "ecmascript/builtins/builtins_global_uri.h"
#include "ecmascript/ecma_string-inl.h"
namespace panda::ecmascript::builtins {
using StringHelper = base::StringHelper;
void BuiltinsGlobal::AppendPercentEncodedByte(std::u16string &sStr, uint8_t byte, uint8_t &len)
{
sStr[++len] = common::utf_helper::GetHexChar16((byte >> 4) & BIT_MASK);
sStr[++len] = common::utf_helper::GetHexChar16(byte & BIT_MASK);
++len;
}
void BuiltinsGlobal::AppendU32Data(std::u16string &resStr, uint32_t data)
{
uint8_t len = 0;
std::u16string sStr(u"%00%00%00%00");
if (data <= 0x7F) {
AppendPercentEncodedByte(sStr, data, len);
} else if (data <= 0x7FF) {
AppendPercentEncodedByte(sStr, BIT_MASK_TWO + (data >> 6), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);
} else if (data <= 0xFFFF) {
AppendPercentEncodedByte(sStr, BIT_MASK_THR + (data >> 12), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);
} else {
AppendPercentEncodedByte(sStr, BIT_MASK_FOR + (data >> 18), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 12) & SIX_BIT_MASK), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len);
AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);
}
resStr.append(sStr, 0, len);
}
template <typename T>
uint16_t BuiltinsGlobal::GetCodeUnit(Span<T> &sp, int32_t index, int32_t length)
{
if ((index < 0) || (index >= length)) {
return 0;
}
return sp[index];
}
JSTaggedValue BuiltinsGlobal::Encode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
{
BUILTINS_API_TRACE(thread, Global, Encode);
CString errorMsg;
auto stringAcc = EcmaStringAccessor(str);
uint32_t strLen = stringAcc.GetLength();
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
std::u16string resStr;
resStr.reserve(strLen);
JSHandle<EcmaString> string;
bool isTreeString = stringAcc.IsTreeString();
if (isTreeString) {
string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
stringAcc = EcmaStringAccessor(string);
}
uint32_t k = 0;
while (true) {
if (k == strLen) {
auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
uint32_t resSize = resStr.size();
return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
}
uint16_t cc = stringAcc.Get(thread, k);
if (LIKELY(IsInURISet(cc))) {
resStr.push_back(static_cast<const char16_t>(cc));
} else {
if (cc >= common::utf_helper::DECODE_TRAIL_LOW && cc <= common::utf_helper::DECODE_TRAIL_HIGH) {
JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint32_t vv;
if (cc < common::utf_helper::DECODE_LEAD_LOW || cc > common::utf_helper::DECODE_LEAD_HIGH) {
vv = cc;
} else {
k++;
if (k == strLen) {
JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint16_t kc = stringAcc.Get(thread, k);
if (kc < common::utf_helper::DECODE_TRAIL_LOW || kc > common::utf_helper::DECODE_TRAIL_HIGH) {
JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
vv = common::utf_helper::UTF16Decode(cc, kc);
}
AppendU32Data(resStr, vv);
}
k++;
}
}
JSTaggedValue BuiltinsGlobal::Decode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
{
BUILTINS_API_TRACE(thread, Global, Decode);
JSHandle<EcmaString> string = str;
if (EcmaStringAccessor(str).IsTreeString()) {
string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
}
auto stringAcc = EcmaStringAccessor(string);
JSTaggedValue result;
if (stringAcc.IsLineOrCachedExternalString()) {
if (!stringAcc.IsUtf16()) {
result = DoDecode<uint8_t>(thread, string, IsInURISet, stringAcc.GetDataUtf8());
} else {
result = DoDecode<uint16_t>(thread, string, IsInURISet, stringAcc.GetDataUtf16());
}
} else {
ASSERT(stringAcc.IsSlicedString());
auto parent = SlicedEcmaString::Cast(string.GetTaggedValue())->GetParent(thread);
auto parentStrAcc = EcmaStringAccessor(parent);
auto startIndex = SlicedEcmaString::Cast(string.GetTaggedValue())->GetStartIndex();
if (!parentStrAcc.IsUtf8()) {
result = DoDecode<uint16_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf16() + startIndex);
} else {
result = DoDecode<uint8_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf8() + startIndex);
}
}
return result;
}
template <typename T>
JSTaggedValue BuiltinsGlobal::DoDecode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet,
const T *data)
{
int32_t strLen = static_cast<int32_t>(EcmaStringAccessor(str).GetLength());
ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
std::u16string resStr;
resStr.reserve(strLen);
std::vector<T> tmpVec;
tmpVec.resize(strLen);
if (LIKELY(strLen != 0)) {
if (memcpy_s(tmpVec.data(), sizeof(T) * strLen, data, sizeof(T) * strLen) != EOK) {
LOG_FULL(FATAL) << "memcpy_s failed";
UNREACHABLE();
}
}
Span<T> sp(tmpVec.data(), strLen);
int32_t k = 0;
while (true) {
if (k == strLen) {
auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
uint32_t resSize = resStr.size();
return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
}
uint16_t cc = GetCodeUnit<T>(sp, k, strLen);
if (cc != '%') {
if (cc == 0 && strLen == 1) {
JSHandle<EcmaString> tmpEcmaString = factory->NewFromUtf16Literal(&cc, 1);
return tmpEcmaString.GetTaggedValue();
}
resStr.push_back(static_cast<const char16_t>(cc));
} else {
DecodePercentEncoding<T>(thread, str, k, IsInURISet, strLen, resStr, sp);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
}
k++;
}
}
void BuiltinsGlobal::HandleSingleByteCharacter(JSThread *thread, uint8_t &bb,
const JSHandle<EcmaString> &str,
uint32_t &start, int32_t &k,
std::u16string &resStr, judgURIFunc IsInURISet)
{
if (!IsInURISet(bb)) {
resStr.push_back(static_cast<const char16_t>(bb));
} else {
auto substr = EcmaStringAccessor::FastSubString(
thread->GetEcmaVM(), str, start, k - start + 1U);
resStr.append(StringHelper::StringToU16string(
EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
}
}
template <typename T>
JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, const JSHandle<EcmaString> &str, int32_t &k,
judgURIFunc IsInURISet, int32_t strLen, std::u16string &resStr,
Span<T> &sp)
{
[[maybe_unused]] uint32_t start = static_cast<uint32_t>(k);
CString errorMsg;
if ((k + 2) >= strLen) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint16_t frontChar = GetCodeUnit<T>(sp, k + 1, strLen);
uint16_t behindChar = GetCodeUnit<T>(sp, k + 2, strLen);
if (!(common::utf_helper::IsHexDigits(frontChar) && common::utf_helper::IsHexDigits(behindChar))) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint8_t bb = common::utf_helper::GetValueFromTwoHex(frontChar, behindChar);
k += 2;
if ((bb & BIT_MASK_ONE) == 0) {
HandleSingleByteCharacter(thread, bb, str, start, k, resStr, IsInURISet);
} else {
int32_t n = 0;
while ((((static_cast<uint32_t>(bb) << static_cast<uint32_t>(n)) & BIT_MASK_ONE) != 0)) {
n++;
if (n > 4) {
break;
}
}
if ((n == 1) || (n > 4)) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
std::vector<uint8_t> oct = {bb};
if (k + (3 * (n - 1)) >= strLen) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
DecodePercentEncoding<T>(thread, n, k, str, bb, oct, sp, strLen);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
UTF16EncodeCodePoint(thread, IsInURISet, oct, str, start, k, resStr);
RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
}
return JSTaggedValue::True();
}
template <typename T>
JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, int32_t &n,
int32_t &k, const JSHandle<EcmaString> &str,
uint8_t &bb, std::vector<uint8_t> &oct, Span<T> &sp, int32_t strLen)
{
CString errorMsg;
int32_t j = 1;
while (j < n) {
k++;
uint16_t codeUnit = GetCodeUnit<T>(sp, k, strLen);
if (!(codeUnit == '%')) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint16_t frontChart = GetCodeUnit<T>(sp, k + 1, strLen);
uint16_t behindChart = GetCodeUnit<T>(sp, k + 2, strLen);
if (!(common::utf_helper::IsHexDigits(frontChart) && common::utf_helper::IsHexDigits(behindChart))) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
bb = common::utf_helper::GetValueFromTwoHex(frontChart, behindChart);
if (!((bb & BIT_MASK_TWO) == BIT_MASK_ONE)) {
errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
k += 2;
oct.push_back(bb);
j++;
}
return JSTaggedValue::True();
}
JSTaggedValue BuiltinsGlobal::UTF16EncodeCodePoint(JSThread *thread, judgURIFunc IsInURISet,
const std::vector<uint8_t> &oct, const JSHandle<EcmaString> &str,
uint32_t &start, int32_t &k, std::u16string &resStr)
{
if (!common::utf_helper::IsValidUTF8(oct)) {
CString errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
}
uint32_t vv = StringHelper::Utf8ToU32String(oct);
if (vv < common::utf_helper::DECODE_SECOND_FACTOR) {
if (!IsInURISet(vv)) {
resStr.append(StringHelper::Utf16ToU16String(reinterpret_cast<uint16_t *>(&vv), 1));
} else {
auto substr = EcmaStringAccessor::FastSubString(
thread->GetEcmaVM(), str, start, static_cast<uint32_t>(k) - start + 1U);
resStr.append(StringHelper::StringToU16string(
EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
}
} else {
uint16_t lv = (((vv - common::utf_helper::DECODE_SECOND_FACTOR) & BIT16_MASK) +
common::utf_helper::DECODE_TRAIL_LOW);
uint16_t hv = ((((vv - common::utf_helper::DECODE_SECOND_FACTOR) >> 10U) & BIT16_MASK) +
common::utf_helper::DECODE_LEAD_LOW);
resStr.push_back(static_cast<const char16_t>(hv));
resStr.push_back(static_cast<const char16_t>(lv));
}
return JSTaggedValue::True();
}
}