#include <array>
#include <string_view>
#include <string.h>
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
namespace url {
namespace {
inline bool IsRemovableURLWhitespace(int ch) {
return ch == '\r' || ch == '\n' || ch == '\t';
}
template <typename CHAR>
std::basic_string_view<CHAR> DoRemoveUrlWhitespace(
std::basic_string_view<CHAR> input,
CanonOutputT<CHAR>* buffer,
bool* potentially_dangling_markup) {
bool found_whitespace = false;
if (sizeof(CHAR) == 1 && input.length() >= kMinimumLengthForSIMD) {
const CHAR* data = input.data();
size_t input_len = input.length();
found_whitespace = UNSAFE_TODO(memchr(data, '\n', input_len)) != nullptr ||
UNSAFE_TODO(memchr(data, '\r', input_len)) != nullptr ||
UNSAFE_TODO(memchr(data, '\t', input_len)) != nullptr;
} else {
for (const CHAR ch : input) {
if (!IsRemovableURLWhitespace(ch)) {
continue;
}
found_whitespace = true;
break;
}
}
if (!found_whitespace) {
return input;
}
if (input.length() > 5 && input[0] == 'd' && input[1] == 'a' &&
input[2] == 't' && input[3] == 'a' && input[4] == ':') {
return input;
}
for (const CHAR ch : input) {
if (!IsRemovableURLWhitespace(ch)) {
if (potentially_dangling_markup && ch == 0x3C) {
*potentially_dangling_markup = true;
}
buffer->push_back(ch);
}
}
return buffer->view();
}
const std::array<char, 0x80> kSchemeCanonical = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '+', 0, '-', '.', 0,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0 , 0, 0 , 0,
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 };
inline bool IsSchemeFirstChar(unsigned char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
template <typename CHAR, typename UCHAR>
bool DoScheme(std::optional<std::basic_string_view<CHAR>> input,
CanonOutput* output,
Component* out_scheme) {
if (!input.has_value() || input->empty()) {
*out_scheme = Component(output->length(), 0);
output->push_back(':');
return false;
}
auto input_value = input.value();
out_scheme->begin = output->length();
bool success = true;
for (size_t i = 0; i < input_value.length(); i++) {
UCHAR ch = static_cast<UCHAR>(input_value[i]);
char replacement = 0;
if (ch < 0x80) {
if (i == 0) {
if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
replacement = kSchemeCanonical[ch];
} else {
replacement = kSchemeCanonical[ch];
}
}
if (replacement) {
output->push_back(replacement);
} else if (ch == '%') {
success = false;
output->push_back('%');
} else {
success = false;
AppendUtf8EscapedChar(input_value, &i, output);
}
}
out_scheme->len = output->length() - out_scheme->begin;
output->push_back(':');
return success;
}
template <typename CHAR, typename UCHAR>
bool DoUserInfo(std::optional<std::basic_string_view<CHAR>> username,
std::optional<std::basic_string_view<CHAR>> password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
if ((!username.has_value() || username->empty()) &&
(!password.has_value() || password->empty())) {
*out_username = Component();
*out_password = Component();
return true;
}
out_username->begin = output->length();
if (username.has_value() && !username->empty()) {
AppendStringOfType(username.value(), CHAR_USERINFO, output);
}
out_username->len = output->length() - out_username->begin;
if (password.has_value() && !password->empty()) {
output->push_back(':');
out_password->begin = output->length();
AppendStringOfType(password.value(), CHAR_USERINFO, output);
out_password->len = output->length() - out_password->begin;
} else {
*out_password = Component();
}
output->push_back('@');
return true;
}
inline void WritePortInt(char* output, int output_len, int port) {
_itoa_s(port, output, output_len, 10);
}
template <typename CHAR, typename UCHAR>
bool DoPort(std::optional<std::basic_string_view<CHAR>> port_view,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
if (!port_view) {
*out_port = Component();
return true;
}
int port_num = ParsePort(*port_view, Component(*port_view));
if (port_num == PORT_UNSPECIFIED || port_num == default_port_for_scheme) {
*out_port = Component();
return true;
}
if (port_num == PORT_INVALID) {
output->push_back(':');
out_port->begin = output->length();
AppendInvalidNarrowString(*port_view, output);
out_port->len = output->length() - out_port->begin;
return false;
}
const int buf_size = 6;
std::array<char, buf_size> buf;
WritePortInt(buf.data(), buf_size, port_num);
output->push_back(':');
out_port->begin = output->length();
for (int i = 0; i < buf_size && buf[i]; i++)
output->push_back(buf[i]);
out_port->len = output->length() - out_port->begin;
return true;
}
const std::array<bool, 0x80> kShouldEscapeCharInFragment = {
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
true, false, true, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, true, false, true, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
true, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, true
};
template <typename CHAR, typename UCHAR>
void DoCanonicalizeRef(std::optional<std::basic_string_view<CHAR>> input,
CanonOutput* output,
Component* out_ref) {
if (!input.has_value()) {
*out_ref = Component();
return;
}
auto input_value = input.value();
output->push_back('#');
out_ref->begin = output->length();
for (size_t i = 0; i < input_value.length(); ++i) {
UCHAR current_char = static_cast<UCHAR>(input.value()[i]);
if (current_char < 0x80) {
if (kShouldEscapeCharInFragment[current_char])
AppendEscapedChar(static_cast<unsigned char>(input_value[i]), output);
else
output->push_back(static_cast<char>(input_value[i]));
} else {
AppendUtf8EscapedChar(input_value, &i, output);
}
}
out_ref->len = output->length() - out_ref->begin;
}
}
std::string_view RemoveUrlWhitespace(std::string_view input,
CanonOutputT<char>* buffer,
bool* potentially_dangling_markup) {
return DoRemoveUrlWhitespace(input, buffer, potentially_dangling_markup);
}
std::u16string_view RemoveUrlWhitespace(std::u16string_view input,
CanonOutputT<char16_t>* buffer,
bool* potentially_dangling_markup) {
return DoRemoveUrlWhitespace(input, buffer, potentially_dangling_markup);
}
char CanonicalSchemeChar(char16_t ch) {
if (ch >= 0x80)
return 0;
return kSchemeCanonical[ch];
}
bool CanonicalizeScheme(std::optional<std::string_view> input,
CanonOutput* output,
Component* out_scheme) {
return DoScheme<char, unsigned char>(input, output, out_scheme);
}
bool CanonicalizeScheme(std::optional<std::u16string_view> input,
CanonOutput* output,
Component* out_scheme) {
return DoScheme<char16_t, char16_t>(input, output, out_scheme);
}
bool CanonicalizeUserInfo(std::optional<std::string_view> username,
std::optional<std::string_view> password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
return DoUserInfo<char, unsigned char>(username, password, output,
out_username, out_password);
}
bool CanonicalizeUserInfo(std::optional<std::u16string_view> username,
std::optional<std::u16string_view> password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
return DoUserInfo<char16_t, char16_t>(username, password, output,
out_username, out_password);
}
bool CanonicalizePort(std::optional<std::string_view> port_view,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
return DoPort<char, unsigned char>(port_view, default_port_for_scheme, output,
out_port);
}
bool CanonicalizePort(std::optional<std::u16string_view> port_view,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
return DoPort<char16_t, char16_t>(port_view, default_port_for_scheme, output,
out_port);
}
void CanonicalizeRef(std::optional<std::string_view> input,
CanonOutput* output,
Component* out_ref) {
DoCanonicalizeRef<char, unsigned char>(input, output, out_ref);
}
void CanonicalizeRef(std::optional<std::u16string_view> input,
CanonOutput* output,
Component* out_ref) {
DoCanonicalizeRef<char16_t, char16_t>(input, output, out_ref);
}
}