#include "base/json/json_parser.h"
#include <cmath>
#include <iterator>
#include <utility>
#include <vector>
#include "base/check_op.h"
#include "base/json/json_reader.h"
#include "base/metrics/histogram_functions.h"
#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
namespace base {
namespace internal {
namespace {
static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
"JSONParser error out of bounds");
std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
switch (error_code) {
case JSONParser::JSON_NO_ERROR:
return std::string();
case JSONParser::JSON_SYNTAX_ERROR:
return JSONParser::kSyntaxError;
case JSONParser::JSON_INVALID_ESCAPE:
return JSONParser::kInvalidEscape;
case JSONParser::JSON_UNEXPECTED_TOKEN:
return JSONParser::kUnexpectedToken;
case JSONParser::JSON_TRAILING_COMMA:
return JSONParser::kTrailingComma;
case JSONParser::JSON_TOO_MUCH_NESTING:
return JSONParser::kTooMuchNesting;
case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
return JSONParser::kUnexpectedDataAfterRoot;
case JSONParser::JSON_UNSUPPORTED_ENCODING:
return JSONParser::kUnsupportedEncoding;
case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
return JSONParser::kUnquotedDictionaryKey;
case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
return JSONParser::kUnrepresentableNumber;
case JSONParser::JSON_PARSE_ERROR_COUNT:
break;
}
NOTREACHED();
return std::string();
}
const int32_t kExtendedASCIIStart = 0x80;
constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
bool UnprefixedHexStringToInt(StringPiece input, int* output) {
for (size_t i = 0; i < input.size(); i++) {
if (!IsHexDigit(input[i])) {
return false;
}
}
return HexStringToInt(input, output);
}
enum class ChromiumJsonExtension {
kCComment,
kCppComment,
kXEscape,
kVerticalTabEscape,
kControlCharacter,
kMaxValue = kControlCharacter,
};
const char kExtensionHistogramName[] =
"Security.JSONParser.ChromiumExtensionUsage";
}
const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
const char JSONParser::kSyntaxError[] = "Syntax error.";
const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
const char JSONParser::kUnexpectedDataAfterRoot[] =
"Unexpected data after root element.";
const char JSONParser::kUnsupportedEncoding[] =
"Unsupported encoding. JSON must be UTF-8.";
const char JSONParser::kUnquotedDictionaryKey[] =
"Dictionary keys must be quoted.";
const char JSONParser::kUnrepresentableNumber[] =
"Number cannot be represented.";
JSONParser::JSONParser(int options, size_t max_depth)
: options_(options),
max_depth_(max_depth),
index_(0),
stack_depth_(0),
line_number_(0),
index_last_line_(0),
error_code_(JSON_NO_ERROR),
error_line_(0),
error_column_(0) {
CHECK_LE(max_depth, kAbsoluteMaxDepth);
}
JSONParser::~JSONParser() = default;
absl::optional<Value> JSONParser::Parse(StringPiece input) {
input_ = input;
index_ = 0;
line_number_ = 1;
index_last_line_ = static_cast<size_t>(-1);
error_code_ = JSON_NO_ERROR;
error_line_ = 0;
error_column_ = 0;
ConsumeIfMatch("\xEF\xBB\xBF");
absl::optional<Value> root(ParseNextToken());
if (!root)
return absl::nullopt;
if (GetNextToken() != T_END_OF_INPUT) {
ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
return absl::nullopt;
}
return root;
}
JSONParser::JsonParseError JSONParser::error_code() const {
return error_code_;
}
std::string JSONParser::GetErrorMessage() const {
return FormatErrorMessage(error_line_, error_column_,
ErrorCodeToString(error_code_));
}
int JSONParser::error_line() const {
return error_line_;
}
int JSONParser::error_column() const {
return error_column_;
}
JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
JSONParser::StringBuilder::StringBuilder(const char* pos)
: pos_(pos), length_(0) {}
JSONParser::StringBuilder::~StringBuilder() = default;
JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
StringBuilder&& other) = default;
void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
DCHECK(IsValidCodepoint(point));
if (point < kExtendedASCIIStart) {
if (!string_) {
DCHECK_EQ(static_cast<char>(point), pos_[length_]);
++length_;
} else {
string_->push_back(static_cast<char>(point));
}
} else {
Convert();
if (UNLIKELY(point == kUnicodeReplacementPoint)) {
string_->append(kUnicodeReplacementString);
} else {
WriteUnicodeCharacter(point, &*string_);
}
}
}
void JSONParser::StringBuilder::Convert() {
if (string_)
return;
string_.emplace(pos_, length_);
}
std::string JSONParser::StringBuilder::DestructiveAsString() {
if (string_)
return std::move(*string_);
return std::string(pos_, length_);
}
absl::optional<StringPiece> JSONParser::PeekChars(size_t count) {
if (index_ + count > input_.length())
return absl::nullopt;
return StringPiece(input_.data() + index_, count);
}
absl::optional<char> JSONParser::PeekChar() {
absl::optional<StringPiece> chars = PeekChars(1);
if (chars)
return (*chars)[0];
return absl::nullopt;
}
absl::optional<StringPiece> JSONParser::ConsumeChars(size_t count) {
absl::optional<StringPiece> chars = PeekChars(count);
if (chars)
index_ += count;
return chars;
}
absl::optional<char> JSONParser::ConsumeChar() {
absl::optional<StringPiece> chars = ConsumeChars(1);
if (chars)
return (*chars)[0];
return absl::nullopt;
}
const char* JSONParser::pos() {
CHECK_LE(index_, input_.length());
return input_.data() + index_;
}
JSONParser::Token JSONParser::GetNextToken() {
EatWhitespaceAndComments();
absl::optional<char> c = PeekChar();
if (!c)
return T_END_OF_INPUT;
switch (*c) {
case '{':
return T_OBJECT_BEGIN;
case '}':
return T_OBJECT_END;
case '[':
return T_ARRAY_BEGIN;
case ']':
return T_ARRAY_END;
case '"':
return T_STRING;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
return T_NUMBER;
case 't':
return T_BOOL_TRUE;
case 'f':
return T_BOOL_FALSE;
case 'n':
return T_NULL;
case ',':
return T_LIST_SEPARATOR;
case ':':
return T_OBJECT_PAIR_SEPARATOR;
default:
return T_INVALID_TOKEN;
}
}
void JSONParser::EatWhitespaceAndComments() {
while (absl::optional<char> c = PeekChar()) {
switch (*c) {
case '\r':
case '\n':
index_last_line_ = index_;
if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
++line_number_;
}
[[fallthrough]];
case ' ':
case '\t':
ConsumeChar();
break;
case '/':
if (!EatComment())
return;
break;
default:
return;
}
}
}
bool JSONParser::EatComment() {
absl::optional<StringPiece> comment_start = PeekChars(2);
if (!comment_start)
return false;
const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
if (comment_start == "//") {
UmaHistogramEnumeration(kExtensionHistogramName,
ChromiumJsonExtension::kCppComment);
if (!comments_allowed) {
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return false;
}
ConsumeChars(2);
while (absl::optional<char> c = PeekChar()) {
if (c == '\n' || c == '\r')
return true;
ConsumeChar();
}
} else if (comment_start == "/*") {
UmaHistogramEnumeration(kExtensionHistogramName,
ChromiumJsonExtension::kCComment);
if (!comments_allowed) {
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return false;
}
ConsumeChars(2);
char previous_char = '\0';
while (absl::optional<char> c = PeekChar()) {
if (previous_char == '*' && c == '/') {
ConsumeChar();
return true;
}
previous_char = *ConsumeChar();
}
}
return false;
}
absl::optional<Value> JSONParser::ParseNextToken() {
return ParseToken(GetNextToken());
}
absl::optional<Value> JSONParser::ParseToken(Token token) {
switch (token) {
case T_OBJECT_BEGIN:
return ConsumeDictionary();
case T_ARRAY_BEGIN:
return ConsumeList();
case T_STRING:
return ConsumeString();
case T_NUMBER:
return ConsumeNumber();
case T_BOOL_TRUE:
case T_BOOL_FALSE:
case T_NULL:
return ConsumeLiteral();
default:
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return absl::nullopt;
}
}
absl::optional<Value> JSONParser::ConsumeDictionary() {
if (ConsumeChar() != '{') {
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return absl::nullopt;
}
StackMarker depth_check(max_depth_, &stack_depth_);
if (depth_check.IsTooDeep()) {
ReportError(JSON_TOO_MUCH_NESTING, -1);
return absl::nullopt;
}
std::vector<std::pair<std::string, Value>> values;
Token token = GetNextToken();
while (token != T_OBJECT_END) {
if (token != T_STRING) {
ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
return absl::nullopt;
}
StringBuilder key;
if (!ConsumeStringRaw(&key)) {
return absl::nullopt;
}
token = GetNextToken();
if (token != T_OBJECT_PAIR_SEPARATOR) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
ConsumeChar();
absl::optional<Value> value = ParseNextToken();
if (!value) {
return absl::nullopt;
}
values.emplace_back(key.DestructiveAsString(), std::move(*value));
token = GetNextToken();
if (token == T_LIST_SEPARATOR) {
ConsumeChar();
token = GetNextToken();
if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
ReportError(JSON_TRAILING_COMMA, 0);
return absl::nullopt;
}
} else if (token != T_OBJECT_END) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
}
ConsumeChar();
ranges::reverse(values);
return Value(Value::Dict(std::make_move_iterator(values.begin()),
std::make_move_iterator(values.end())));
}
absl::optional<Value> JSONParser::ConsumeList() {
if (ConsumeChar() != '[') {
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return absl::nullopt;
}
StackMarker depth_check(max_depth_, &stack_depth_);
if (depth_check.IsTooDeep()) {
ReportError(JSON_TOO_MUCH_NESTING, -1);
return absl::nullopt;
}
Value::List list;
Token token = GetNextToken();
while (token != T_ARRAY_END) {
absl::optional<Value> item = ParseToken(token);
if (!item) {
return absl::nullopt;
}
list.Append(std::move(*item));
token = GetNextToken();
if (token == T_LIST_SEPARATOR) {
ConsumeChar();
token = GetNextToken();
if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
ReportError(JSON_TRAILING_COMMA, 0);
return absl::nullopt;
}
} else if (token != T_ARRAY_END) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
}
ConsumeChar();
return Value(std::move(list));
}
absl::optional<Value> JSONParser::ConsumeString() {
StringBuilder string;
if (!ConsumeStringRaw(&string))
return absl::nullopt;
return Value(string.DestructiveAsString());
}
bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
if (ConsumeChar() != '"') {
ReportError(JSON_UNEXPECTED_TOKEN, 0);
return false;
}
StringBuilder string(pos());
while (absl::optional<char> c = PeekChar()) {
base_icu::UChar32 next_char = 0;
if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
next_char = *c;
} else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
&next_char) ||
!IsValidCodepoint(next_char)) {
if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
ReportError(JSON_UNSUPPORTED_ENCODING, 0);
return false;
}
ConsumeChar();
string.Append(kUnicodeReplacementPoint);
continue;
}
if (next_char == '"') {
ConsumeChar();
*out = std::move(string);
return true;
}
if (next_char != '\\') {
if (next_char <= 0x1F) {
UmaHistogramEnumeration(kExtensionHistogramName,
ChromiumJsonExtension::kControlCharacter);
if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
ReportError(JSON_UNSUPPORTED_ENCODING, -1);
return false;
}
}
if ((next_char == '\r') || (next_char == '\n')) {
index_last_line_ = index_;
if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
++line_number_;
}
}
ConsumeChar();
string.Append(next_char);
} else {
string.Convert();
absl::optional<StringPiece> escape_sequence = ConsumeChars(2);
if (!escape_sequence) {
ReportError(JSON_INVALID_ESCAPE, -1);
return false;
}
switch ((*escape_sequence)[1]) {
case 'x': {
UmaHistogramEnumeration(kExtensionHistogramName,
ChromiumJsonExtension::kXEscape);
if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
ReportError(JSON_INVALID_ESCAPE, -1);
return false;
}
escape_sequence = ConsumeChars(2);
if (!escape_sequence) {
ReportError(JSON_INVALID_ESCAPE, -3);
return false;
}
int hex_digit = 0;
if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
!IsValidCharacter(hex_digit)) {
ReportError(JSON_INVALID_ESCAPE, -3);
return false;
}
string.Append(hex_digit);
break;
}
case 'u': {
base_icu::UChar32 code_point;
if (!DecodeUTF16(&code_point)) {
ReportError(JSON_INVALID_ESCAPE, -1);
return false;
}
string.Append(code_point);
break;
}
case '"':
string.Append('"');
break;
case '\\':
string.Append('\\');
break;
case '/':
string.Append('/');
break;
case 'b':
string.Append('\b');
break;
case 'f':
string.Append('\f');
break;
case 'n':
string.Append('\n');
break;
case 'r':
string.Append('\r');
break;
case 't':
string.Append('\t');
break;
case 'v':
UmaHistogramEnumeration(kExtensionHistogramName,
ChromiumJsonExtension::kVerticalTabEscape);
if (!(options_ & JSON_ALLOW_VERT_TAB)) {
ReportError(JSON_INVALID_ESCAPE, -1);
return false;
}
string.Append('\v');
break;
default:
ReportError(JSON_INVALID_ESCAPE, -1);
return false;
}
}
}
ReportError(JSON_SYNTAX_ERROR, -1);
return false;
}
bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
absl::optional<StringPiece> escape_sequence = ConsumeChars(4);
if (!escape_sequence)
return false;
int code_unit16_high = 0;
if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
return false;
if (CBU16_IS_SURROGATE(code_unit16_high)) {
if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
return false;
*out_code_point = kUnicodeReplacementPoint;
return true;
}
if (!ConsumeIfMatch("\\u")) {
if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
return false;
*out_code_point = kUnicodeReplacementPoint;
return true;
}
escape_sequence = ConsumeChars(4);
if (!escape_sequence)
return false;
int code_unit16_low = 0;
if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
return false;
if (!CBU16_IS_TRAIL(code_unit16_low)) {
if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
return false;
*out_code_point = kUnicodeReplacementPoint;
return true;
}
base_icu::UChar32 code_point =
CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
*out_code_point = code_point;
} else {
DCHECK(CBU16_IS_SINGLE(code_unit16_high));
*out_code_point = code_unit16_high;
}
return true;
}
absl::optional<Value> JSONParser::ConsumeNumber() {
const char* num_start = pos();
const size_t start_index = index_;
size_t end_index = start_index;
if (PeekChar() == '-')
ConsumeChar();
if (!ReadInt(false)) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
end_index = index_;
if (PeekChar() == '.') {
ConsumeChar();
if (!ReadInt(true)) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
end_index = index_;
}
absl::optional<char> c = PeekChar();
if (c == 'e' || c == 'E') {
ConsumeChar();
if (PeekChar() == '-' || PeekChar() == '+') {
ConsumeChar();
}
if (!ReadInt(true)) {
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
end_index = index_;
}
size_t exit_index = index_;
switch (GetNextToken()) {
case T_OBJECT_END:
case T_ARRAY_END:
case T_LIST_SEPARATOR:
case T_END_OF_INPUT:
break;
default:
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
index_ = exit_index;
StringPiece num_string(num_start, end_index - start_index);
int num_int;
if (StringToInt(num_string, &num_int))
return Value(num_int);
double num_double;
if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
return Value(num_double);
}
ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
return absl::nullopt;
}
bool JSONParser::ReadInt(bool allow_leading_zeros) {
size_t len = 0;
char first = 0;
while (absl::optional<char> c = PeekChar()) {
if (!IsAsciiDigit(c))
break;
if (len == 0)
first = *c;
++len;
ConsumeChar();
}
if (len == 0)
return false;
if (!allow_leading_zeros && len > 1 && first == '0')
return false;
return true;
}
absl::optional<Value> JSONParser::ConsumeLiteral() {
if (ConsumeIfMatch("true"))
return Value(true);
if (ConsumeIfMatch("false"))
return Value(false);
if (ConsumeIfMatch("null"))
return Value(Value::Type::NONE);
ReportError(JSON_SYNTAX_ERROR, 0);
return absl::nullopt;
}
bool JSONParser::ConsumeIfMatch(StringPiece match) {
if (match == PeekChars(match.size())) {
ConsumeChars(match.size());
return true;
}
return false;
}
void JSONParser::ReportError(JsonParseError code, int column_adjust) {
error_code_ = code;
error_line_ = line_number_;
error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
if (error_column_ < 1) {
error_column_ = 1;
}
}
std::string JSONParser::FormatErrorMessage(int line, int column,
const std::string& description) {
if (line || column) {
return StringPrintf("Line: %i, column: %i, %s",
line, column, description.c_str());
}
return description;
}
}
}