#include "extensions/browser/api/web_request/form_data_parser.h"
#include <stddef.h>
#include <memory>
#include <string_view>
#include <vector>
#include "base/check.h"
#include "base/containers/to_vector.h"
#include "base/memory/raw_ptr.h"
#include "base/no_destructor.h"
#include "base/notreached.h"
#include "base/strings/escape.h"
#include "base/strings/string_util.h"
#include "base/types/optional_util.h"
#include "base/values.h"
#include "extensions/buildflags/buildflags.h"
#include "net/http/http_request_headers.h"
#include "third_party/re2/src/re2/re2.h"
static_assert(BUILDFLAG(ENABLE_EXTENSIONS_CORE));
using re2::RE2;
namespace extensions {
namespace {
const char kContentDisposition[] = "content-disposition:";
const size_t kContentDispositionLength = std::size(kContentDisposition) - 1;
const char kCharacterPattern[] =
"(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))";
const char kCRLF[] = "\r\n";
const char kContentTypeOctetString[] =
"Content-Type: application/octet-stream\r\n";
struct Patterns {
Patterns();
~Patterns() = delete;
const RE2 transfer_padding_pattern;
const RE2 closing_pattern;
const RE2 epilogue_pattern;
const RE2 crlf_free_pattern;
const RE2 preamble_pattern;
const RE2 header_pattern;
const RE2 content_disposition_pattern;
const RE2 name_pattern;
const RE2 value_pattern;
const RE2 url_encoded_pattern;
};
Patterns::Patterns()
: transfer_padding_pattern("[ \\t]*\\r\\n"),
closing_pattern("--[ \\t]*"),
epilogue_pattern("|\\r\\n(?s:.)*"),
crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"),
preamble_pattern(".+?"),
header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"),
content_disposition_pattern(std::string("(?i:") + kContentDisposition +
")"),
name_pattern("\\bname=\"([^\"]*)\""),
value_pattern("\\bfilename=\"([^\"]*)\""),
url_encoded_pattern(std::string("(") + kCharacterPattern + "*)=(" +
kCharacterPattern + "*)") {}
const Patterns* GetPatterns() {
static base::NoDestructor<Patterns> instance;
return instance.get();
}
bool ConsumePrefix(std::string_view* str, std::string_view prefix) {
if (!str->starts_with(prefix)) {
return false;
}
str->remove_prefix(prefix.size());
return true;
}
}
class FormDataParserUrlEncoded : public FormDataParser {
public:
FormDataParserUrlEncoded();
FormDataParserUrlEncoded(const FormDataParserUrlEncoded&) = delete;
FormDataParserUrlEncoded& operator=(const FormDataParserUrlEncoded&) = delete;
~FormDataParserUrlEncoded() override;
bool AllDataReadOK() override;
bool GetNextNameValue(Result* result) override;
bool SetSource(std::string_view source) override;
private:
const RE2& pattern() const { return patterns_->url_encoded_pattern; }
static const size_t args_size_ = 2u;
std::string_view source_;
bool source_set_;
bool source_malformed_;
std::string name_;
std::string value_;
const RE2::Arg arg_name_;
const RE2::Arg arg_value_;
std::array<const RE2::Arg*, args_size_> args_;
raw_ptr<const Patterns> patterns_;
};
class FormDataParserMultipart : public FormDataParser {
public:
explicit FormDataParserMultipart(const std::string& boundary_separator);
FormDataParserMultipart(const FormDataParserMultipart&) = delete;
FormDataParserMultipart& operator=(const FormDataParserMultipart&) = delete;
~FormDataParserMultipart() override;
bool AllDataReadOK() override;
bool GetNextNameValue(Result* result) override;
bool SetSource(std::string_view source) override;
private:
enum State {
STATE_INIT,
STATE_READY,
STATE_FINISHED,
STATE_SUSPEND,
STATE_ERROR
};
static bool StartsWithPattern(std::string_view input, const RE2& pattern);
bool TryReadHeader(std::string_view* name,
std::string_view* value,
bool* value_assigned,
bool* value_is_binary);
bool FinishReadingPart(std::string_view* data);
const RE2& transfer_padding_pattern() const {
return patterns_->transfer_padding_pattern;
}
const RE2& closing_pattern() const {
return patterns_->closing_pattern;
}
const RE2& epilogue_pattern() const {
return patterns_->epilogue_pattern;
}
const RE2& crlf_free_pattern() const {
return patterns_->crlf_free_pattern;
}
const RE2& preamble_pattern() const {
return patterns_->preamble_pattern;
}
const RE2& header_pattern() const {
return patterns_->header_pattern;
}
const RE2& content_disposition_pattern() const {
return patterns_->content_disposition_pattern;
}
const RE2& name_pattern() const {
return patterns_->name_pattern;
}
const RE2& value_pattern() const {
return patterns_->value_pattern;
}
std::string dash_boundary_separator_;
State state_;
std::string_view source_;
raw_ptr<const Patterns> patterns_;
};
FormDataParser::Result::Result() = default;
FormDataParser::Result::~Result() = default;
void FormDataParser::Result::SetBinaryValue(std::string_view str) {
value_ = base::Value(base::ToVector(str));
}
void FormDataParser::Result::SetStringValue(std::string str) {
value_ = base::Value(std::move(str));
}
FormDataParser::~FormDataParser() = default;
std::unique_ptr<FormDataParser> FormDataParser::Create(
const net::HttpRequestHeaders& request_headers) {
return CreateFromContentTypeHeader(base::OptionalToPtr(
request_headers.GetHeader(net::HttpRequestHeaders::kContentType)));
}
std::unique_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader(
const std::string* content_type_header) {
enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE};
ParserChoice choice = ERROR_CHOICE;
std::string boundary;
if (content_type_header == nullptr) {
choice = URL_ENCODED;
} else {
const std::string content_type(
content_type_header->substr(0, content_type_header->find(';')));
if (base::EqualsCaseInsensitiveASCII(content_type,
"application/x-www-form-urlencoded")) {
choice = URL_ENCODED;
} else if (base::EqualsCaseInsensitiveASCII(content_type,
"multipart/form-data")) {
static const char kBoundaryString[] = "boundary=";
size_t offset = content_type_header->find(kBoundaryString);
if (offset == std::string::npos) {
return nullptr;
}
offset += sizeof(kBoundaryString) - 1;
boundary = content_type_header->substr(
offset, content_type_header->find(';', offset));
if (!boundary.empty()) {
choice = MULTIPART;
}
}
}
switch (choice) {
case URL_ENCODED:
return std::make_unique<FormDataParserUrlEncoded>();
case MULTIPART:
return std::unique_ptr<FormDataParser>(
new FormDataParserMultipart(boundary));
case ERROR_CHOICE:
return nullptr;
}
NOTREACHED();
}
FormDataParser::FormDataParser() = default;
FormDataParserUrlEncoded::FormDataParserUrlEncoded()
: source_set_(false),
source_malformed_(false),
arg_name_(&name_),
arg_value_(&value_),
patterns_(GetPatterns()) {
args_[0] = &arg_name_;
args_[1] = &arg_value_;
}
FormDataParserUrlEncoded::~FormDataParserUrlEncoded() = default;
bool FormDataParserUrlEncoded::AllDataReadOK() {
return source_set_ && source_.empty() && !source_malformed_;
}
bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
if (!source_set_ || source_malformed_) {
return false;
}
bool success = RE2::ConsumeN(&source_, pattern(), args_.data(), args_size_);
if (success) {
const base::UnescapeRule::Type kUnescapeRules =
base::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
std::string unescaped_name =
base::UnescapeBinaryURLComponent(name_, kUnescapeRules);
result->set_name(unescaped_name);
std::string unescaped_value =
base::UnescapeBinaryURLComponent(value_, kUnescapeRules);
if (base::IsStringUTF8(unescaped_value)) {
result->SetStringValue(std::move(unescaped_value));
} else {
result->SetBinaryValue(unescaped_value);
}
}
if (source_.length() > 0) {
if (source_[0] == '&') {
source_.remove_prefix(1);
} else {
source_malformed_ = true;
}
}
return success && !source_malformed_;
}
bool FormDataParserUrlEncoded::SetSource(std::string_view source) {
if (source_set_) {
return false;
}
source_ = source;
source_set_ = true;
source_malformed_ = false;
return true;
}
bool FormDataParserMultipart::StartsWithPattern(std::string_view input,
const RE2& pattern) {
return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, nullptr, 0);
}
FormDataParserMultipart::FormDataParserMultipart(
const std::string& boundary_separator)
: dash_boundary_separator_("--" + boundary_separator),
state_(STATE_INIT),
patterns_(GetPatterns()) {}
FormDataParserMultipart::~FormDataParserMultipart() = default;
bool FormDataParserMultipart::AllDataReadOK() {
return state_ == STATE_FINISHED;
}
bool FormDataParserMultipart::FinishReadingPart(std::string_view* data) {
std::string_view orig = source_;
while (!source_.starts_with(dash_boundary_separator_)) {
if (!RE2::Consume(&source_, crlf_free_pattern()) ||
!ConsumePrefix(&source_, kCRLF)) {
state_ = STATE_ERROR;
return false;
}
}
if (data != nullptr) {
if (orig.size() == source_.size()) {
state_ = STATE_ERROR;
return false;
}
orig.remove_suffix(source_.size() + 2);
*data = orig;
}
CHECK(ConsumePrefix(&source_, dash_boundary_separator_));
if (StartsWithPattern(source_, closing_pattern())) {
CHECK(RE2::Consume(&source_, closing_pattern()));
if (RE2::Consume(&source_, epilogue_pattern())) {
state_ = STATE_FINISHED;
} else {
state_ = STATE_ERROR;
}
} else {
if (!RE2::Consume(&source_, transfer_padding_pattern())) {
state_ = STATE_ERROR;
}
}
return state_ != STATE_ERROR;
}
bool FormDataParserMultipart::GetNextNameValue(Result* result) {
if (source_.empty() || state_ != STATE_READY) {
return false;
}
std::string_view name;
std::string_view value;
bool value_assigned = false;
bool value_is_binary = false;
bool value_assigned_temp;
bool value_is_binary_temp;
while (TryReadHeader(&name, &value, &value_assigned_temp,
&value_is_binary_temp)) {
value_is_binary |= value_is_binary_temp;
value_assigned |= value_assigned_temp;
}
if (name.empty() || state_ == STATE_ERROR) {
state_ = STATE_ERROR;
return false;
}
if (!ConsumePrefix(&source_, kCRLF)) {
state_ = STATE_ERROR;
return false;
}
bool return_value;
if (value_assigned && source_.empty()) {
return_value = true;
state_ = STATE_SUSPEND;
} else {
return_value = FinishReadingPart(value_assigned ? nullptr : &value);
}
result->set_name(base::UnescapeBinaryURLComponent(name));
if (value_assigned) {
result->SetStringValue(std::string(value));
} else if (value_is_binary) {
result->SetBinaryValue(value);
} else {
result->SetStringValue(std::string(value));
}
return return_value;
}
bool FormDataParserMultipart::SetSource(std::string_view source) {
if (source.data() == nullptr || !source_.empty()) {
return false;
}
source_ = source;
switch (state_) {
case STATE_INIT:
while (!source_.starts_with(dash_boundary_separator_)) {
if (!RE2::Consume(&source_, preamble_pattern())) {
state_ = STATE_ERROR;
break;
}
}
if (state_ != STATE_ERROR) {
if (!ConsumePrefix(&source_, dash_boundary_separator_) ||
!RE2::Consume(&source_, transfer_padding_pattern())) {
state_ = STATE_ERROR;
} else {
state_ = STATE_READY;
}
}
break;
case STATE_READY:
break;
case STATE_SUSPEND:
state_ = FinishReadingPart(nullptr) ? STATE_READY : STATE_ERROR;
break;
default:
state_ = STATE_ERROR;
}
return state_ != STATE_ERROR;
}
bool FormDataParserMultipart::TryReadHeader(std::string_view* name,
std::string_view* value,
bool* value_assigned,
bool* value_is_binary) {
*value_assigned = false;
*value_is_binary = false;
if (ConsumePrefix(&source_, kContentTypeOctetString)) {
*value_is_binary = true;
return true;
}
const char* header_start = source_.data();
if (!RE2::Consume(&source_, header_pattern())) {
return false;
}
std::string_view header(header_start, source_.data() - header_start - 2);
if (!StartsWithPattern(header, content_disposition_pattern())) {
return true;
}
std::string_view groups[2];
if (!name_pattern().Match(header,
kContentDispositionLength, header.size(),
RE2::UNANCHORED, groups, 2)) {
state_ = STATE_ERROR;
return true;
}
*name = groups[1];
if (value_pattern().Match(header,
kContentDispositionLength, header.size(),
RE2::UNANCHORED, groups, 2)) {
*value = groups[1];
*value_assigned = true;
}
return true;
}
}