// +-+-+ +-+-+
#include "ExportTrie.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/LEB128.h"
#include <optional>
using namespace llvm;
using namespace lld;
using namespace lld::macho;
namespace {
struct Edge {
Edge(StringRef s, TrieNode *node) : substring(s), child(node) {}
StringRef substring;
struct TrieNode *child;
};
struct ExportInfo {
uint64_t address;
uint64_t ordinal = 0;
uint8_t flags = 0;
ExportInfo(const Symbol &sym, uint64_t imageBase)
: address(sym.getVA() - imageBase) {
using namespace llvm::MachO;
if (sym.isWeakDef())
flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
if (sym.isTlv())
flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
if (auto *defined = dyn_cast<Defined>(&sym)) {
if (defined->isAbsolute())
flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
} else if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
flags |= EXPORT_SYMBOL_FLAGS_REEXPORT;
if (!dysym->isDynamicLookup())
ordinal = dysym->getFile()->ordinal;
}
}
};
}
struct macho::TrieNode {
std::vector<Edge> edges;
std::optional<ExportInfo> info;
size_t offset = 0;
uint32_t getTerminalSize() const;
bool updateOffset(size_t &nextOffset);
void writeTo(uint8_t *buf) const;
};
uint32_t TrieNode::getTerminalSize() const {
uint32_t size = getULEB128Size(info->flags);
if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
size += getULEB128Size(info->ordinal) + 1;
else
size += getULEB128Size(info->address);
return size;
}
bool TrieNode::updateOffset(size_t &nextOffset) {
size_t nodeSize;
if (info) {
uint32_t terminalSize = getTerminalSize();
nodeSize = terminalSize + getULEB128Size(terminalSize);
} else {
nodeSize = 1;
}
++nodeSize;
for (const Edge &edge : edges) {
nodeSize += edge.substring.size() + 1
+ getULEB128Size(edge.child->offset);
}
bool result = (offset != nextOffset);
offset = nextOffset;
nextOffset += nodeSize;
return result;
}
void TrieNode::writeTo(uint8_t *buf) const {
buf += offset;
if (info) {
uint32_t terminalSize = getTerminalSize();
buf += encodeULEB128(terminalSize, buf);
buf += encodeULEB128(info->flags, buf);
if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
buf += encodeULEB128(info->ordinal, buf);
*buf++ = 0;
} else {
buf += encodeULEB128(info->address, buf);
}
} else {
*buf++ = 0;
}
assert(edges.size() < 256);
*buf++ = edges.size();
for (const Edge &edge : edges) {
memcpy(buf, edge.substring.data(), edge.substring.size());
buf += edge.substring.size();
*buf++ = '\0';
buf += encodeULEB128(edge.child->offset, buf);
}
}
TrieBuilder::~TrieBuilder() {
for (TrieNode *node : nodes)
delete node;
}
TrieNode *TrieBuilder::makeNode() {
auto *node = new TrieNode();
nodes.emplace_back(node);
return node;
}
static int charAt(const Symbol *sym, size_t pos) {
StringRef str = sym->getName();
if (pos >= str.size())
return -1;
return str[pos];
}
void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec,
TrieNode *node, size_t lastPos, size_t pos) {
tailcall:
if (vec.empty())
return;
const Symbol *pivotSymbol = vec[vec.size() / 2];
int pivot = charAt(pivotSymbol, pos);
size_t i = 0;
size_t j = vec.size();
for (size_t k = 0; k < j;) {
int c = charAt(vec[k], pos);
if (c < pivot)
std::swap(vec[i++], vec[k++]);
else if (c > pivot)
std::swap(vec[--j], vec[k]);
else
k++;
}
bool isTerminal = pivot == -1;
bool prefixesDiverge = i != 0 || j != vec.size();
if (lastPos != pos && (isTerminal || prefixesDiverge)) {
TrieNode *newNode = makeNode();
node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos),
newNode);
node = newNode;
lastPos = pos;
}
sortAndBuild(vec.slice(0, i), node, lastPos, pos);
sortAndBuild(vec.slice(j), node, lastPos, pos);
if (isTerminal) {
assert(j - i == 1);
node->info = ExportInfo(*pivotSymbol, imageBase);
} else {
vec = vec.slice(i, j - i);
++pos;
goto tailcall;
}
}
size_t TrieBuilder::build() {
if (exported.empty())
return 0;
TrieNode *root = makeNode();
sortAndBuild(exported, root, 0, 0);
size_t offset;
bool more;
do {
offset = 0;
more = false;
for (TrieNode *node : nodes)
more |= node->updateOffset(offset);
} while (more);
return offset;
}
void TrieBuilder::writeTo(uint8_t *buf) const {
for (TrieNode *node : nodes)
node->writeTo(buf);
}
namespace {
class TrieParser {
public:
TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
: start(buf), end(start + size), callback(callback) {}
void parse(const uint8_t *buf, const Twine &cumulativeString);
void parse() { parse(start, ""); }
const uint8_t *start;
const uint8_t *end;
const TrieEntryCallback &callback;
};
}
void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
if (buf >= end)
fatal("Node offset points outside export section");
unsigned ulebSize;
uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
buf += ulebSize;
uint64_t flags = 0;
size_t offset;
if (terminalSize != 0) {
flags = decodeULEB128(buf, &ulebSize);
callback(cumulativeString, flags);
}
buf += terminalSize;
uint8_t numEdges = *buf++;
for (uint8_t i = 0; i < numEdges; ++i) {
const char *cbuf = reinterpret_cast<const char *>(buf);
StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
buf += substring.size() + 1;
offset = decodeULEB128(buf, &ulebSize);
buf += ulebSize;
parse(start + offset, cumulativeString + substring);
}
}
void macho::parseTrie(const uint8_t *buf, size_t size,
const TrieEntryCallback &callback) {
if (size == 0)
return;
TrieParser(buf, size, callback).parse();
}