#include "net/http/http_content_disposition.h"
#include <string>
#include <string_view>
#include "base/base64.h"
#include "base/check_op.h"
#include "base/strings/escape.h"
#include "base/strings/string_tokenizer.h"
#include "base/strings/string_util.h"
#include "base/strings/sys_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "net/base/net_string_util.h"
#include "net/http/http_util.h"
namespace net {
namespace {
enum RFC2047EncodingType {
Q_ENCODING,
B_ENCODING
};
bool DecodeQEncoding(std::string_view input, std::string* output) {
std::string temp;
temp.reserve(input.size());
for (auto it = input.begin(); it != input.end(); ++it) {
if (*it == '_') {
temp.push_back(' ');
} else if (*it == '=') {
if ((input.end() - it < 3) ||
!base::IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
!base::IsHexDigit(static_cast<unsigned char>(*(it + 2))))
return false;
unsigned char ch =
base::HexDigitToInt(*(it + 1)) * 16 + base::HexDigitToInt(*(it + 2));
temp.push_back(static_cast<char>(ch));
++it;
++it;
} else if (0x20 < *it && *it < 0x7F && *it != '?') {
DCHECK_NE('=', *it);
DCHECK_NE('?', *it);
DCHECK_NE('_', *it);
temp.push_back(*it);
} else {
return false;
}
}
output->swap(temp);
return true;
}
bool DecodeBQEncoding(std::string_view part,
RFC2047EncodingType enc_type,
const std::string& charset,
std::string* output) {
std::string decoded;
if (!((enc_type == B_ENCODING) ?
base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) {
return false;
}
if (decoded.empty()) {
output->clear();
return true;
}
return ConvertToUtf8(decoded, charset.c_str(), output);
}
bool DecodeWord(std::string_view encoded_word,
const std::string& referrer_charset,
bool* is_rfc2047,
std::string* output,
int* parse_result_flags) {
*is_rfc2047 = false;
output->clear();
if (encoded_word.empty())
return true;
if (!base::IsStringASCII(encoded_word)) {
if (base::IsStringUTF8(encoded_word)) {
*output = std::string(encoded_word);
} else {
std::u16string utf16_output;
if (!referrer_charset.empty() &&
ConvertToUTF16(encoded_word, referrer_charset.c_str(),
&utf16_output)) {
*output = base::UTF16ToUTF8(utf16_output);
} else {
*output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word));
}
}
*parse_result_flags |= HttpContentDisposition::HAS_NON_ASCII_STRINGS;
return true;
}
std::string decoded_word;
*is_rfc2047 = true;
int part_index = 0;
std::string charset;
base::StringViewTokenizer t(encoded_word, "?");
RFC2047EncodingType enc_type = Q_ENCODING;
while (*is_rfc2047 && t.GetNext()) {
std::string_view part = t.token_piece();
switch (part_index) {
case 0:
if (part != "=") {
*is_rfc2047 = false;
break;
}
++part_index;
break;
case 1:
charset = std::string(part);
++part_index;
break;
case 2:
if (part.size() > 1 ||
part.find_first_of("bBqQ") == std::string::npos) {
*is_rfc2047 = false;
break;
}
if (part[0] == 'b' || part[0] == 'B') {
enc_type = B_ENCODING;
}
++part_index;
break;
case 3:
*is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);
if (!*is_rfc2047) {
return false;
}
++part_index;
break;
case 4:
if (part != "=") {
*is_rfc2047 = false;
return false;
}
++part_index;
break;
default:
*is_rfc2047 = false;
return false;
}
}
if (*is_rfc2047) {
if (*(encoded_word.end() - 1) == '=') {
output->swap(decoded_word);
*parse_result_flags |=
HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS;
return true;
}
*is_rfc2047 = false;
return false;
}
decoded_word = base::UnescapeBinaryURLComponent(encoded_word,
base::UnescapeRule::NORMAL);
if (decoded_word != encoded_word)
*parse_result_flags |= HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS;
if (base::IsStringUTF8(decoded_word)) {
output->swap(decoded_word);
return true;
}
return false;
}
bool DecodeFilenameValue(std::string_view input,
const std::string& referrer_charset,
std::string* output,
int* parse_result_flags) {
int current_parse_result_flags = 0;
std::string decoded_value;
bool is_previous_token_rfc2047 = true;
base::StringViewTokenizer t(input, " \t\n\r");
t.set_options(base::StringViewTokenizer::RETURN_DELIMS);
while (t.GetNext()) {
if (t.token_is_delim()) {
if (!is_previous_token_rfc2047)
decoded_value.push_back(' ');
continue;
}
std::string decoded;
if (!DecodeWord(t.token_piece(), referrer_charset,
&is_previous_token_rfc2047, &decoded,
¤t_parse_result_flags))
return false;
decoded_value.append(decoded);
}
output->swap(decoded_value);
if (parse_result_flags && !output->empty())
*parse_result_flags |= current_parse_result_flags;
return true;
}
bool ParseExtValueComponents(std::string_view input,
std::string* charset,
std::string* value_chars) {
base::StringViewTokenizer t(input, "'");
t.set_options(base::StringTokenizer::RETURN_DELIMS);
std::string_view temp_charset;
std::string_view temp_value;
int num_delims_seen = 0;
while (t.GetNext()) {
if (t.token_is_delim()) {
++num_delims_seen;
continue;
} else {
switch (num_delims_seen) {
case 0:
temp_charset = t.token_piece();
break;
case 1:
break;
case 2:
temp_value = t.token_piece();
break;
default:
return false;
}
}
}
if (num_delims_seen != 2)
return false;
if (temp_charset.empty() || temp_value.empty())
return false;
*charset = std::string(temp_charset);
*value_chars = std::string(temp_value);
return true;
}
bool DecodeExtValue(std::string_view param_value, std::string* decoded) {
if (param_value.find('"') != std::string::npos)
return false;
std::string charset;
std::string value;
if (!ParseExtValueComponents(param_value, &charset, &value))
return false;
if (!base::IsStringASCII(value)) {
decoded->clear();
return true;
}
std::string unescaped =
base::UnescapeBinaryURLComponent(value, base::UnescapeRule::NORMAL);
return ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded);
}
}
HttpContentDisposition::HttpContentDisposition(
const std::string& header,
const std::string& referrer_charset) {
Parse(header, referrer_charset);
}
HttpContentDisposition::~HttpContentDisposition() = default;
std::string_view HttpContentDisposition::ConsumeDispositionType(
std::string_view header) {
DCHECK(type_ == INLINE);
size_t delimiter = header.find(';');
std::string_view type = header.substr(0, delimiter);
type = HttpUtil::TrimLWS(type);
if (type.empty() || !HttpUtil::IsToken(type))
return header;
parse_result_flags_ |= HAS_DISPOSITION_TYPE;
DCHECK(type.find('=') == std::string_view::npos);
if (base::EqualsCaseInsensitiveASCII(type, "inline")) {
type_ = INLINE;
} else if (base::EqualsCaseInsensitiveASCII(type, "attachment")) {
type_ = ATTACHMENT;
} else {
parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE;
type_ = ATTACHMENT;
}
if (delimiter == std::string_view::npos) {
return std::string_view();
}
return header.substr(delimiter + 1);
}
void HttpContentDisposition::Parse(const std::string& header,
const std::string& referrer_charset) {
DCHECK(type_ == INLINE);
DCHECK(filename_.empty());
std::string_view params = ConsumeDispositionType(header);
std::string filename;
std::string ext_filename;
HttpUtil::NameValuePairsIterator iter(params, ';');
while (iter.GetNext()) {
if (filename.empty() &&
base::EqualsCaseInsensitiveASCII(iter.name(), "filename")) {
DecodeFilenameValue(iter.value(), referrer_charset, &filename,
&parse_result_flags_);
if (!filename.empty()) {
parse_result_flags_ |= HAS_FILENAME;
if (filename[0] == '\'')
parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
}
} else if (ext_filename.empty() &&
base::EqualsCaseInsensitiveASCII(iter.name(), "filename*")) {
DecodeExtValue(iter.raw_value(), &ext_filename);
if (!ext_filename.empty())
parse_result_flags_ |= HAS_EXT_FILENAME;
}
}
if (!ext_filename.empty())
filename_ = ext_filename;
else
filename_ = filename;
if (!filename.empty() && filename[0] == '\'')
parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
}
}