#include "base/nix/mime_util_xdg.h"
#include <algorithm>
#include <memory>
#include <utility>
#include "base/byte_count.h"
#include "base/check.h"
#include "base/containers/stack.h"
#include "base/environment.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/logging.h"
#include "base/nix/xdg_util.h"
#include "base/no_destructor.h"
#include "base/numerics/byte_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "build/build_config.h"
#if !BUILDFLAG(IS_CHROMEOS)
#include "base/synchronization/lock.h"
#endif
namespace base::nix {
namespace {
constexpr ByteCount kMaxMimeTypesFileSize = MiB(10);
constexpr size_t kMaxNodes = 30000;
constexpr size_t kMaxExtSize = 100;
constexpr size_t kHeaderSize = 40;
constexpr uint32_t kMaxUnicode = 0x10ffff;
constexpr uint8_t kDefaultGlobWeight = 50;
struct FileInfo {
FilePath path;
Time last_modified;
};
void LoadAllMimeCacheFiles(MimeTypeMap& map, std::vector<FileInfo>& files) {
std::unique_ptr<Environment> env(Environment::Create());
File::Info info;
for (const auto& path : GetXDGDataSearchLocations(env.get())) {
FilePath mime_cache = path.Append("mime/mime.cache");
if (GetFileInfo(mime_cache, &info) && ParseMimeTypes(mime_cache, map)) {
files.emplace_back(mime_cache, info.last_modified);
}
}
}
bool ReadInt(const std::string& buf,
uint32_t offset,
const std::string& field_name,
uint32_t min_result,
size_t max_result,
uint32_t* result) {
if (offset > buf.size() - 4 || (offset & 0x3)) {
LOG(ERROR) << "Invalid offset=" << offset << " for " << field_name
<< ", string size=" << buf.size();
return false;
}
auto bytes = base::as_byte_span(buf);
*result = base::U32FromBigEndian(bytes.subspan(offset).first<4u>());
if (*result < min_result || *result > max_result) {
LOG(ERROR) << "Invalid " << field_name << "=" << *result
<< " not between min_result=" << min_result
<< " and max_result=" << max_result;
return false;
}
return true;
}
}
bool ParseMimeTypes(const FilePath& file_path, MimeTypeMap& out_mime_types) {
std::string buf;
if (!ReadFileToStringWithMaxSize(file_path, &buf,
kMaxMimeTypesFileSize.InBytes())) {
LOG(ERROR) << "Failed reading in mime.cache file: " << file_path;
return false;
}
if (buf.size() < kHeaderSize) {
LOG(ERROR) << "Invalid mime.cache file size=" << buf.size();
return false;
}
uint32_t alias_list_offset = 0;
if (!ReadInt(buf, 4, "ALIAS_LIST_OFFSET", kHeaderSize, buf.size(),
&alias_list_offset)) {
return false;
}
if (buf[alias_list_offset - 1] != 0) {
LOG(ERROR) << "Invalid mime.cache file does not contain null prior to "
"ALIAS_LIST_OFFSET="
<< alias_list_offset;
return false;
}
uint32_t tree_offset = 0;
if (!ReadInt(buf, 16, "REVERSE_SUFFIX_TREE_OFFSET", kHeaderSize, buf.size(),
&tree_offset)) {
return false;
}
struct Node {
std::string ext;
uint32_t n_children;
uint32_t first_child_offset;
};
Node root;
if (!ReadInt(buf, tree_offset, "N_ROOTS", 0, kMaxUnicode, &root.n_children)) {
return false;
}
if (!ReadInt(buf, tree_offset + 4, "FIRST_ROOT_OFFSET", tree_offset,
buf.size(), &root.first_child_offset)) {
return false;
}
stack<Node> stack;
stack.push(std::move(root));
uint32_t num_nodes = 0;
while (!stack.empty()) {
Node n = std::move(stack.top());
stack.pop();
uint32_t p = n.first_child_offset;
for (uint32_t i = 0; i < n.n_children; i++) {
uint32_t c = 0;
if (!ReadInt(buf, p, "CHARACTER", 0, kMaxUnicode, &c)) {
return false;
}
p += 4;
if (c == 0) {
uint32_t mime_type_offset = 0;
if (!ReadInt(buf, p, "mime type offset", kHeaderSize,
alias_list_offset - 1, &mime_type_offset)) {
return false;
}
p += 4;
uint8_t weight = kDefaultGlobWeight;
if ((p + 3) < buf.size()) {
weight = static_cast<uint8_t>(buf[p + 3]);
}
p += 4;
if (n.ext.size() > 0 && n.ext[0] == '.') {
std::string_view ext = std::string_view(n.ext).substr(1u);
auto it = out_mime_types.find(ext);
if (it == out_mime_types.end() || weight > it->second.weight) {
auto mime_type = std::string_view(buf).substr(mime_type_offset);
mime_type = mime_type.substr(0u, mime_type.find('\0'));
out_mime_types[std::string(ext)] = {std::string(mime_type), weight};
}
}
continue;
}
Node node;
WriteUnicodeCharacter(static_cast<int>(c), &node.ext);
node.ext += n.ext;
if (!ReadInt(buf, p, "N_CHILDREN", 0, kMaxUnicode, &node.n_children)) {
return false;
}
p += 4;
if (!ReadInt(buf, p, "FIRST_CHILD_OFFSET", tree_offset, buf.size(),
&node.first_child_offset)) {
return false;
}
p += 4;
if (++num_nodes > kMaxNodes) {
LOG(ERROR) << "Exceeded maxium number of nodes=" << kMaxNodes;
return false;
}
if (node.ext.size() > kMaxExtSize) {
LOG(WARNING) << "Ignoring large extension exceeds size=" << kMaxExtSize
<< " ext=" << node.ext;
continue;
}
stack.push(std::move(node));
}
}
return true;
}
std::string GetFileMimeType(const FilePath& filepath) {
std::string ext = filepath.Extension();
if (ext.empty()) {
return std::string();
}
static NoDestructor<std::vector<FileInfo>> xdg_mime_files;
static NoDestructor<MimeTypeMap> mime_type_map([] {
MimeTypeMap map;
LoadAllMimeCacheFiles(map, *xdg_mime_files);
return map;
}());
#if !BUILDFLAG(IS_CHROMEOS)
static Time last_check;
static NoDestructor<Lock> lock;
{
AutoLock scoped_lock(*lock);
Time now = Time::Now();
if (last_check + Seconds(5) < now) {
if (std::ranges::any_of(*xdg_mime_files, [](const FileInfo& file_info) {
File::Info info;
return !GetFileInfo(file_info.path, &info) ||
info.last_modified != file_info.last_modified;
})) {
mime_type_map->clear();
xdg_mime_files->clear();
LoadAllMimeCacheFiles(*mime_type_map, *xdg_mime_files);
}
last_check = now;
}
}
#endif
auto it = mime_type_map->find(ext.substr(1));
return it != mime_type_map->end() ? it->second.mime_type : std::string();
}
}