"""
Example of use:
asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
PLUGINS
This script provides a way for external plug-ins to hook into the behaviour of
various parts of this script (see `--plugins`). This is useful for situations
where it is necessary to handle site-specific quirks (e.g. binaries with debug
symbols only accessible via a remote service) without having to modify the
script itself.
"""
import argparse
import bisect
import errno
import getopt
import logging
import os
import re
import shutil
import subprocess
import sys
symbolizers = {}
demangle = False
binutils_prefix = None
fix_filename_patterns = None
logfile = sys.stdin
allow_system_symbolizer = True
force_system_symbolizer = False
def fix_filename(file_name):
if fix_filename_patterns:
for path_to_cut in fix_filename_patterns:
file_name = re.sub(".*" + path_to_cut, "", file_name)
file_name = re.sub(".*asan_[a-z_]*.(cc|cpp):[0-9]*", "_asan_rtl_", file_name)
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
return file_name
def is_valid_arch(s):
return s in [
"i386",
"x86_64",
"x86_64h",
"arm",
"armv6",
"armv7",
"armv7s",
"armv7k",
"arm64",
"powerpc64",
"powerpc64le",
"s390x",
"s390",
"riscv64",
"loongarch64",
]
def guess_arch(addr):
if len(addr) > 10:
return "x86_64"
else:
return "i386"
class Symbolizer(object):
def __init__(self):
pass
def symbolize(self, addr, binary, offset):
"""Symbolize the given address (pair of binary and offset).
Overriden in subclasses.
Args:
addr: virtual address of an instruction.
binary: path to executable/shared object containing this instruction.
offset: instruction offset in the @binary.
Returns:
list of strings (one string for each inlined frame) describing
the code locations for this instruction (that is, function name, file
name, line and column numbers).
"""
return None
class LLVMSymbolizer(Symbolizer):
def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
super(LLVMSymbolizer, self).__init__()
self.symbolizer_path = symbolizer_path
self.default_arch = default_arch
self.system = system
self.dsym_hints = dsym_hints
self.pipe = self.open_llvm_symbolizer()
def open_llvm_symbolizer(self):
cmd = [
self.symbolizer_path,
("--demangle" if demangle else "--no-demangle"),
"--functions=linkage",
"--inlines",
"--default-arch=%s" % self.default_arch,
]
if self.system == "Darwin":
for hint in self.dsym_hints:
cmd.append("--dsym-hint=%s" % hint)
logging.debug(" ".join(cmd))
try:
result = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
bufsize=0,
universal_newlines=True,
)
except OSError:
result = None
return result
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if not self.pipe:
return None
result = []
try:
symbolizer_input = '"%s" %s' % (binary, offset)
logging.debug(symbolizer_input)
self.pipe.stdin.write("%s\n" % symbolizer_input)
while True:
function_name = self.pipe.stdout.readline().rstrip()
if not function_name:
break
file_name = self.pipe.stdout.readline().rstrip()
file_name = fix_filename(file_name)
if not function_name.startswith("??") or not file_name.startswith("??"):
result.append("%s in %s %s" % (addr, function_name, file_name))
except Exception:
result = []
if not result:
result = None
return result
def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
if not symbolizer_path:
symbolizer_path = os.getenv("ASAN_SYMBOLIZER_PATH")
if not symbolizer_path:
symbolizer_path = "llvm-symbolizer"
return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
class Addr2LineSymbolizer(Symbolizer):
def __init__(self, binary):
super(Addr2LineSymbolizer, self).__init__()
self.binary = binary
self.pipe = self.open_addr2line()
self.output_terminator = -1
def open_addr2line(self):
addr2line_tool = "addr2line"
if binutils_prefix:
addr2line_tool = binutils_prefix + addr2line_tool
logging.debug("addr2line binary is %s" % shutil.which(addr2line_tool))
cmd = [addr2line_tool, "-fi"]
if demangle:
cmd += ["--demangle"]
cmd += ["-e", self.binary]
logging.debug(" ".join(cmd))
return subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
bufsize=0,
universal_newlines=True,
)
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
lines = []
try:
self.pipe.stdin.write("%s\n" % offset)
self.pipe.stdin.write("%s\n" % self.output_terminator)
is_first_frame = True
while True:
function_name = self.pipe.stdout.readline().rstrip()
logging.debug("read function_name='%s' from addr2line" % function_name)
if function_name == "-1":
logging.debug("got function '-1' -> no more input")
break
file_name = self.pipe.stdout.readline().rstrip()
logging.debug("read file_name='%s' from addr2line" % file_name)
if is_first_frame:
is_first_frame = False
elif function_name == "??":
assert file_name == "??:0", file_name
logging.debug("got function '??' -> no more input")
break
elif not function_name:
assert not file_name, file_name
logging.debug("got empty function name -> no more input")
break
if not function_name and not file_name:
logging.debug(
"got empty function and file name -> unknown function"
)
function_name = "??"
file_name = "??:0"
lines.append((function_name, file_name))
except IOError as e:
if e.errno == errno.EPIPE:
logging.debug(
f"addr2line exited early (broken pipe) returncode={self.pipe.poll()}"
)
else:
logging.debug(
"unexpected I/O exception communicating with addr2line", exc_info=e
)
lines.append(("??", "??:0"))
except Exception as e:
logging.debug(
"got unknown exception communicating with addr2line", exc_info=e
)
lines.append(("??", "??:0"))
return [
"%s in %s %s" % (addr, function, fix_filename(file))
for (function, file) in lines
]
class UnbufferedLineConverter(object):
"""
Wrap a child process that responds to each line of input with one line of
output. Uses pty to trick the child into providing unbuffered output.
"""
def __init__(self, args, close_stderr=False):
import pty
import termios
pid, fd = pty.fork()
if pid == 0:
if close_stderr:
dev_null = os.open("/dev/null", 0)
os.dup2(dev_null, 2)
os.execvp(args[0], args)
else:
attr = termios.tcgetattr(fd)
attr[3] = attr[3] & ~termios.ECHO
termios.tcsetattr(fd, termios.TCSANOW, attr)
self.r = os.fdopen(fd, "r", 1)
self.w = os.fdopen(os.dup(fd), "w", 1)
def convert(self, line):
self.w.write(line + "\n")
return self.readline()
def readline(self):
return self.r.readline().rstrip()
class DarwinSymbolizer(Symbolizer):
def __init__(self, addr, binary, arch):
super(DarwinSymbolizer, self).__init__()
self.binary = binary
self.arch = arch
self.open_atos()
def open_atos(self):
logging.debug("atos -o %s -arch %s", self.binary, self.arch)
cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
if not os.path.exists(binary):
return ["{} ({}:{}+{})".format(addr, binary, self.arch, offset)]
atos_line = self.atos.convert("0x%x" % int(offset, 16))
while "got symbolicator for" in atos_line:
atos_line = self.atos.readline()
match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
logging.debug("atos_line: %s", atos_line)
if match:
function_name = match.group(1)
file_name = fix_filename(match.group(3))
return ["%s in %s %s" % (addr, function_name, file_name)]
else:
return ["%s in %s" % (addr, atos_line)]
class ChainSymbolizer(Symbolizer):
def __init__(self, symbolizer_list):
super(ChainSymbolizer, self).__init__()
self.symbolizer_list = symbolizer_list
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
for symbolizer in self.symbolizer_list:
if symbolizer:
result = symbolizer.symbolize(addr, binary, offset)
if result:
return result
return None
def append_symbolizer(self, symbolizer):
self.symbolizer_list.append(symbolizer)
def BreakpadSymbolizerFactory(binary):
suffix = os.getenv("BREAKPAD_SUFFIX")
if suffix:
filename = binary + suffix
if os.access(filename, os.F_OK):
return BreakpadSymbolizer(filename)
return None
def SystemSymbolizerFactory(system, addr, binary, arch):
if system == "Darwin":
return DarwinSymbolizer(addr, binary, arch)
elif system in ["Linux", "FreeBSD", "NetBSD", "SunOS"]:
return Addr2LineSymbolizer(binary)
class BreakpadSymbolizer(Symbolizer):
def __init__(self, filename):
super(BreakpadSymbolizer, self).__init__()
self.filename = filename
lines = file(filename).readlines()
self.files = []
self.symbols = {}
self.address_list = []
self.addresses = {}
fragments = lines[0].rstrip().split()
self.arch = fragments[2]
self.debug_id = fragments[3]
self.binary = " ".join(fragments[4:])
self.parse_lines(lines[1:])
def parse_lines(self, lines):
cur_function_addr = ""
for line in lines:
fragments = line.split()
if fragments[0] == "FILE":
assert int(fragments[1]) == len(self.files)
self.files.append(" ".join(fragments[2:]))
elif fragments[0] == "PUBLIC":
self.symbols[int(fragments[1], 16)] = " ".join(fragments[3:])
elif fragments[0] in ["CFI", "STACK"]:
pass
elif fragments[0] == "FUNC":
cur_function_addr = int(fragments[1], 16)
if not cur_function_addr in self.symbols.keys():
self.symbols[cur_function_addr] = " ".join(fragments[4:])
else:
addr = int(fragments[0], 16)
self.address_list.append(addr)
self.addresses[addr] = (
cur_function_addr,
int(fragments[1], 16),
int(fragments[2]),
int(fragments[3]),
)
self.address_list.sort()
def get_sym_file_line(self, addr):
key = None
if addr in self.addresses.keys():
key = addr
else:
index = bisect.bisect_left(self.address_list, addr)
if index == 0:
return None
else:
key = self.address_list[index - 1]
sym_id, size, line_no, file_no = self.addresses[key]
symbol = self.symbols[sym_id]
filename = self.files[file_no]
if addr < key + size:
return symbol, filename, line_no
else:
return None
def symbolize(self, addr, binary, offset):
if self.binary != binary:
return None
res = self.get_sym_file_line(int(offset, 16))
if res:
function_name, file_name, line_no = res
result = ["%s in %s %s:%d" % (addr, function_name, file_name, line_no)]
print(result)
return result
else:
return None
class SymbolizationLoop(object):
def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
self.plugin_proxy = plugin_proxy
if sys.platform == "win32":
self.process_line = self.process_line_echo
else:
self.dsym_hint_producer = dsym_hint_producer
self.system = os.uname()[0]
if self.system not in ["Linux", "Darwin", "FreeBSD", "NetBSD", "SunOS"]:
raise Exception("Unknown system")
self.llvm_symbolizers = {}
self.last_llvm_symbolizer = None
self.dsym_hints = set([])
self.frame_no = 0
self.process_line = self.process_line_posix
self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name())
def symbolize_address(self, addr, binary, offset, arch):
result = None
if not force_system_symbolizer:
if not binary in self.llvm_symbolizers:
use_new_symbolizer = True
if self.system == "Darwin" and self.dsym_hint_producer:
dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
self.dsym_hints |= dsym_hints_for_binary
if self.last_llvm_symbolizer and not use_new_symbolizer:
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
else:
self.last_llvm_symbolizer = LLVMSymbolizerFactory(
self.system, arch, self.dsym_hints
)
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
if not binary in symbolizers:
symbolizers[binary] = ChainSymbolizer(
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]
)
result = symbolizers[binary].symbolize(addr, binary, offset)
else:
symbolizers[binary] = ChainSymbolizer([])
if result is None:
if not allow_system_symbolizer:
raise Exception("Failed to launch or use llvm-symbolizer.")
symbolizers[binary].append_symbolizer(
SystemSymbolizerFactory(self.system, addr, binary, arch)
)
result = symbolizers[binary].symbolize(addr, binary, offset)
assert result
return result
def get_symbolized_lines(self, symbolized_lines, inc_frame_counter=True):
if not symbolized_lines:
if inc_frame_counter:
self.frame_no += 1
return [self.current_line]
else:
assert inc_frame_counter
result = []
for symbolized_frame in symbolized_lines:
result.append(
" #%s %s" % (str(self.frame_no), symbolized_frame.rstrip())
)
self.frame_no += 1
return result
def process_logfile(self):
self.frame_no = 0
for line in logfile:
processed = self.process_line(line)
print("\n".join(processed))
def process_line_echo(self, line):
return [line.rstrip()]
def process_line_posix(self, line):
self.current_line = line.rstrip()
stack_trace_line_format = (
"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
)
match = re.match(stack_trace_line_format, line)
if not match:
logging.debug('Line "{}" does not match regex'.format(line))
return self.get_symbolized_lines(None, inc_frame_counter=False)
logging.debug(line)
_, frameno_str, addr, binary, offset = match.groups()
if not self.using_module_map and not os.path.isabs(binary):
return self.get_symbolized_lines(None)
arch = ""
colon_pos = binary.rfind(":")
if colon_pos != -1:
maybe_arch = binary[colon_pos + 1 :]
if is_valid_arch(maybe_arch):
arch = maybe_arch
binary = binary[0:colon_pos]
if arch == "":
arch = guess_arch(addr)
if frameno_str == "0":
self.frame_no = 0
original_binary = binary
binary = self.plugin_proxy.filter_binary_path(binary)
if binary is None:
logging.debug('Skipping symbolication of binary "%s"', original_binary)
return self.get_symbolized_lines(None)
symbolized_line = self.symbolize_address(addr, binary, offset, arch)
if not symbolized_line:
if original_binary != binary:
symbolized_line = self.symbolize_address(
addr, original_binary, offset, arch
)
return self.get_symbolized_lines(symbolized_line)
class AsanSymbolizerPlugInProxy(object):
"""
Serves several purposes:
- Manages the lifetime of plugins (must be used a `with` statement).
- Provides interface for calling into plugins from within this script.
"""
def __init__(self):
self._plugins = []
self._plugin_names = set()
def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space):
with open(file_path, "r") as f:
exec(f.read(), globals_space, None)
def load_plugin_from_file(self, file_path):
logging.info('Loading plugins from "{}"'.format(file_path))
globals_space = dict(globals())
def register_plugin(plugin):
logging.info("Registering plugin %s", plugin.get_name())
self.add_plugin(plugin)
globals_space["register_plugin"] = register_plugin
if sys.version_info.major < 3:
execfile(file_path, globals_space, None)
else:
self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space)
def add_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
self._plugins.append(plugin)
self._plugin_names.add(plugin.get_name())
plugin._receive_proxy(self)
def remove_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
self._plugins.remove(plugin)
self._plugin_names.remove(plugin.get_name())
logging.debug("Removing plugin %s", plugin.get_name())
plugin.destroy()
def has_plugin(self, name):
"""
Returns true iff the plugin name is currently
being managed by AsanSymbolizerPlugInProxy.
"""
return name in self._plugin_names
def register_cmdline_args(self, parser):
plugins = list(self._plugins)
for plugin in plugins:
plugin.register_cmdline_args(parser)
def process_cmdline_args(self, pargs):
plugins = list(self._plugins)
for plugin in plugins:
keep = plugin.process_cmdline_args(pargs)
assert isinstance(keep, bool)
if not keep:
self.remove_plugin(plugin)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
for plugin in self._plugins:
plugin.destroy()
return False
def _filter_single_value(self, function_name, input_value):
"""
Helper for filter style plugin functions.
"""
new_value = input_value
for plugin in self._plugins:
result = getattr(plugin, function_name)(new_value)
if result is None:
return None
new_value = result
return new_value
def filter_binary_path(self, binary_path):
"""
Consult available plugins to filter the path to a binary
to make it suitable for symbolication.
Returns `None` if symbolication should not be attempted for this
binary.
"""
return self._filter_single_value("filter_binary_path", binary_path)
def filter_module_desc(self, module_desc):
"""
Consult available plugins to determine the module
description suitable for symbolication.
Returns `None` if symbolication should not be attempted for this module.
"""
assert isinstance(module_desc, ModuleDesc)
return self._filter_single_value("filter_module_desc", module_desc)
class AsanSymbolizerPlugIn(object):
"""
This is the interface the `asan_symbolize.py` code uses to talk
to plugins.
"""
@classmethod
def get_name(cls):
"""
Returns the name of the plugin.
"""
return cls.__name__
def _receive_proxy(self, proxy):
assert isinstance(proxy, AsanSymbolizerPlugInProxy)
self.proxy = proxy
def register_cmdline_args(self, parser):
"""
Hook for registering command line arguments to be
consumed in `process_cmdline_args()`.
`parser` - Instance of `argparse.ArgumentParser`.
"""
pass
def process_cmdline_args(self, pargs):
"""
Hook for handling parsed arguments. Implementations
should not modify `pargs`.
`pargs` - Instance of `argparse.Namespace` containing
parsed command line arguments.
Return `True` if plug-in should be used, otherwise
return `False`.
"""
return True
def destroy(self):
"""
Hook called when a plugin is about to be destroyed.
Implementations should free any allocated resources here.
"""
pass
def filter_binary_path(self, binary_path):
"""
Given a binary path return a binary path suitable for symbolication.
Implementations should return `None` if symbolication of this binary
should be skipped.
"""
return binary_path
def filter_module_desc(self, module_desc):
"""
Given a ModuleDesc object (`module_desc`) return
a ModuleDesc suitable for symbolication.
Implementations should return `None` if symbolication of this binary
should be skipped.
"""
return module_desc
class ModuleDesc(object):
def __init__(self, name, arch, start_addr, end_addr, module_path, uuid):
self.name = name
self.arch = arch
self.start_addr = start_addr
self.end_addr = end_addr
self.module_path = module_path
self.module_path_for_symbolization = module_path
self.uuid = uuid
assert self.is_valid()
def __str__(self):
assert self.is_valid()
return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format(
name=self.name,
arch=self.arch,
start_addr=self.start_addr,
end_addr=self.end_addr,
module_path=self.module_path
if self.module_path == self.module_path_for_symbolization
else "{} ({})".format(self.module_path_for_symbolization, self.module_path),
uuid=self.uuid,
)
def is_valid(self):
if not isinstance(self.name, str):
return False
if not isinstance(self.arch, str):
return False
if not isinstance(self.start_addr, int):
return False
if self.start_addr < 0:
return False
if not isinstance(self.end_addr, int):
return False
if self.end_addr <= self.start_addr:
return False
if not isinstance(self.module_path, str):
return False
if not os.path.isabs(self.module_path):
return False
if not isinstance(self.module_path_for_symbolization, str):
return False
if not os.path.isabs(self.module_path_for_symbolization):
return False
if not isinstance(self.uuid, str):
return False
return True
class GetUUIDFromBinaryException(Exception):
def __init__(self, msg):
super(GetUUIDFromBinaryException, self).__init__(msg)
_get_uuid_from_binary_cache = dict()
def get_uuid_from_binary(path_to_binary, arch=None):
cache_key = (path_to_binary, arch)
cached_value = _get_uuid_from_binary_cache.get(cache_key)
if cached_value:
return cached_value
if not os.path.exists(path_to_binary):
raise GetUUIDFromBinaryException(
'Binary "{}" does not exist'.format(path_to_binary)
)
cmd = ["/usr/bin/otool", "-l"]
if arch:
cmd.extend(["-arch", arch])
cmd.append(path_to_binary)
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
if isinstance(output, str):
output_str = output
else:
assert isinstance(output, bytes)
output_str = output.decode()
assert isinstance(output_str, str)
lines = output_str.split("\n")
uuid = None
for index, line in enumerate(lines):
stripped_line = line.strip()
if not stripped_line.startswith("cmd LC_UUID"):
continue
uuid_line = lines[index + 2].strip()
if not uuid_line.startswith("uuid"):
raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line))
split_uuid_line = uuid_line.split()
uuid = split_uuid_line[1]
break
if uuid is None:
logging.error("Failed to retrieve UUID from binary {}".format(path_to_binary))
logging.error("otool output was:\n{}".format(output_str))
raise GetUUIDFromBinaryException(
'Failed to retrieve UUID from binary "{}"'.format(path_to_binary)
)
else:
_get_uuid_from_binary_cache[cache_key] = uuid
return uuid
class ModuleMap(object):
def __init__(self):
self._module_name_to_description_map = dict()
def add_module(self, desc):
assert isinstance(desc, ModuleDesc)
assert desc.name not in self._module_name_to_description_map
self._module_name_to_description_map[desc.name] = desc
def find_module_by_name(self, name):
return self._module_name_to_description_map.get(name, None)
def __str__(self):
s = "{} modules:\n".format(self.num_modules)
for module_desc in sorted(
self._module_name_to_description_map.values(), key=lambda v: v.start_addr
):
s += str(module_desc) + "\n"
return s
@property
def num_modules(self):
return len(self._module_name_to_description_map)
@property
def modules(self):
return set(self._module_name_to_description_map.values())
def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid):
module_desc = self.find_module_by_name(module_name)
if module_desc is None:
return None
module_desc = proxy.filter_module_desc(module_desc)
if module_desc is None:
return None
if validate_uuid:
logging.debug(
"Validating UUID of {}".format(
module_desc.module_path_for_symbolization
)
)
try:
uuid = get_uuid_from_binary(
module_desc.module_path_for_symbolization, arch=module_desc.arch
)
if uuid != module_desc.uuid:
logging.warning(
"Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid)
)
return None
except GetUUIDFromBinaryException as e:
logging.error("Failed to get binary from UUID: %s", str(e))
return None
else:
logging.warning(
"Skipping validation of UUID of {}".format(
module_desc.module_path_for_symbolization
)
)
return module_desc.module_path_for_symbolization
@staticmethod
def parse_from_file(module_map_path):
if not os.path.exists(module_map_path):
raise Exception('module map "{}" does not exist'.format(module_map_path))
with open(module_map_path, "r") as f:
mm = None
hex_regex = lambda name: r"0x(?P<" + name + r">[0-9a-f]+)"
module_path_regex = r"(?P<path>.+)"
arch_regex = r"\((?P<arch>.+)\)"
uuid_regex = r"<(?P<uuid>[0-9A-Z-]+)>"
line_regex = r"^{}-{}\s+{}\s+{}\s+{}".format(
hex_regex("start_addr"),
hex_regex("end_addr"),
module_path_regex,
arch_regex,
uuid_regex,
)
matcher = re.compile(line_regex)
line_num = 0
line = "dummy"
while line != "":
line = f.readline()
line_num += 1
if mm is None:
if line.startswith("Process module map:"):
mm = ModuleMap()
continue
if line.startswith("End of module map"):
break
m_obj = matcher.match(line)
if not m_obj:
raise Exception(
'Failed to parse line {} "{}"'.format(line_num, line)
)
arch = m_obj.group("arch")
start_addr = int(m_obj.group("start_addr"), base=16)
end_addr = int(m_obj.group("end_addr"), base=16)
module_path = m_obj.group("path")
uuid = m_obj.group("uuid")
module_desc = ModuleDesc(
name=os.path.basename(module_path),
arch=arch,
start_addr=start_addr,
end_addr=end_addr,
module_path=module_path,
uuid=uuid,
)
mm.add_module(module_desc)
if mm is not None:
logging.debug(
'Loaded Module map from "{}":\n{}'.format(f.name, str(mm))
)
return mm
class SysRootFilterPlugIn(AsanSymbolizerPlugIn):
"""
Simple plug-in to add sys root prefix to all binary paths
used for symbolication.
"""
def __init__(self):
self.sysroot_path = ""
def register_cmdline_args(self, parser):
parser.add_argument(
"-s",
dest="sys_root",
metavar="SYSROOT",
help="set path to sysroot for sanitized binaries",
)
def process_cmdline_args(self, pargs):
if pargs.sys_root is None:
return False
self.sysroot_path = pargs.sys_root
return True
def filter_binary_path(self, path):
return self.sysroot_path + path
class ModuleMapPlugIn(AsanSymbolizerPlugIn):
def __init__(self):
self._module_map = None
self._uuid_validation = True
def register_cmdline_args(self, parser):
parser.add_argument(
"--module-map",
help="Path to text file containing module map"
"output. See print_module_map ASan option.",
)
parser.add_argument(
"--skip-uuid-validation",
default=False,
action="store_true",
help="Skips validating UUID of modules using otool.",
)
def process_cmdline_args(self, pargs):
if not pargs.module_map:
return False
self._module_map = ModuleMap.parse_from_file(args.module_map)
if self._module_map is None:
msg = "Failed to find module map"
logging.error(msg)
raise Exception(msg)
self._uuid_validation = not pargs.skip_uuid_validation
return True
def filter_binary_path(self, binary_path):
if os.path.isabs(binary_path):
module_name = os.path.basename(binary_path)
else:
module_name = binary_path
return self._module_map.get_module_path_for_symbolication(
module_name, self.proxy, self._uuid_validation
)
def add_logging_args(parser):
parser.add_argument(
"--log-dest",
default=None,
help="Destination path for script logging (default stderr).",
)
parser.add_argument(
"--log-level",
choices=["debug", "info", "warning", "error", "critical"],
default="info",
help="Log level for script (default: %(default)s).",
)
def setup_logging():
parser = argparse.ArgumentParser(add_help=False)
add_logging_args(parser)
pargs, unparsed_args = parser.parse_known_args()
log_level = getattr(logging, pargs.log_level.upper())
if log_level == logging.DEBUG:
log_format = (
"%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s"
)
else:
log_format = "%(levelname)s: %(message)s"
basic_config = {"level": log_level, "format": log_format}
log_dest = pargs.log_dest
if log_dest:
basic_config["filename"] = log_dest
logging.basicConfig(**basic_config)
logging.debug(
'Logging level set to "{}" and directing output to "{}"'.format(
pargs.log_level, "stderr" if log_dest is None else log_dest
)
)
return unparsed_args
def add_load_plugin_args(parser):
parser.add_argument("-p", "--plugins", help="Load plug-in", nargs="+", default=[])
def setup_plugins(plugin_proxy, args):
parser = argparse.ArgumentParser(add_help=False)
add_load_plugin_args(parser)
pargs, unparsed_args = parser.parse_known_args()
for plugin_path in pargs.plugins:
plugin_proxy.load_plugin_from_file(plugin_path)
plugin_proxy.add_plugin(ModuleMapPlugIn())
plugin_proxy.add_plugin(SysRootFilterPlugIn())
return unparsed_args
if __name__ == "__main__":
remaining_args = setup_logging()
with AsanSymbolizerPlugInProxy() as plugin_proxy:
remaining_args = setup_plugins(plugin_proxy, remaining_args)
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="ASan symbolization script",
epilog=__doc__,
)
parser.add_argument(
"path_to_cut",
nargs="*",
help="pattern to be cut from the result file path ",
)
parser.add_argument(
"-d", "--demangle", action="store_true", help="demangle function names"
)
parser.add_argument(
"-c", metavar="CROSS_COMPILE", help="set prefix for binutils"
)
parser.add_argument(
"-l",
"--logfile",
default=sys.stdin,
type=argparse.FileType("r"),
help="set log file name to parse, default is stdin",
)
parser.add_argument(
"--force-system-symbolizer",
action="store_true",
help="don't use llvm-symbolizer",
)
add_logging_args(parser)
add_load_plugin_args(parser)
plugin_proxy.register_cmdline_args(parser)
args = parser.parse_args(remaining_args)
plugin_proxy.process_cmdline_args(args)
if args.path_to_cut:
fix_filename_patterns = args.path_to_cut
if args.demangle:
demangle = True
if args.c:
binutils_prefix = args.c
if args.logfile:
logfile = args.logfile
else:
logfile = sys.stdin
if args.force_system_symbolizer:
force_system_symbolizer = True
if force_system_symbolizer:
assert allow_system_symbolizer
loop = SymbolizationLoop(plugin_proxy)
loop.process_logfile()