"""Dumps the names, addresses, and disassmebly of static initializers.
Usage example:
tools/linux/dump-static-intializers.py out/Release/chrome
For an explanation of static initializers, see: //docs/static_initializers.md.
"""
import argparse
import json
import os
import pathlib
import subprocess
import sys
_TOOLCHAIN_PREFIX = str(
pathlib.Path(__file__).parents[2] / 'third_party' / 'llvm-build' /
'Release+Asserts' / 'bin' / 'llvm-')
_MAX_DISASSEMBLY_SYMBOLS = 10
def _ParseNm(binary, addresses):
output = subprocess.check_output(
[_TOOLCHAIN_PREFIX + 'nm', '--print-size', binary], encoding='utf8')
addresses = set(addresses)
ret = {}
for line in output.splitlines():
parts = line.split()
if len(parts) != 4:
continue
address = int(parts[0], 16)
if address in addresses:
ret[address] = int(parts[1], 16)
return ret
def _Disassemble(binary, start, end):
cmd = [
_TOOLCHAIN_PREFIX + 'objdump',
binary,
'--disassemble',
'--source',
'--demangle',
'--start-address=0x%x' % start,
'--stop-address=0x%x' % end,
]
stdout = subprocess.check_output(cmd, encoding='utf8')
all_lines = stdout.splitlines(keepends=True)
source_lines = [l for l in all_lines if l.startswith(';')]
ret = []
if source_lines:
ret = ['Showing source lines that appear in the symbol (via objdump).\n']
else:
ret = [
'Symbol missing source lines. Showing raw disassembly (via objdump).\n'
]
lines = source_lines or all_lines
if len(lines) > 10:
ret += ['This might be verbose due to inlined functions.\n']
ret += lines
return ''.join(ret)
def _DumpInitArray(binary):
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--hex-dump=.init_array', binary]
output = subprocess.check_output(cmd, encoding='utf8')
is_64_bit = False
is_arm = False
byte_order = 'little'
ret = []
for line in output.splitlines():
if line.startswith('Format:') and 'big' in line:
byte_order = 'big'
continue
if line == 'Arch: arm':
is_arm = True
continue
if line == 'AddressSize: 64bit':
is_64_bit = True
continue
if not line.startswith('0x'):
continue
init_array_address = int(line[:10], 16)
parts = line[10:-16].split()
assert len(parts) <= 4, 'Too many parts: ' + line
if is_64_bit:
parts = [parts[i] + parts[i + 1] for i in range(0, len(parts), 2)]
arrays = (bytearray.fromhex(p) for p in parts)
for a in arrays:
address = int.from_bytes(a, byteorder=byte_order, signed=False)
if is_arm:
address = address & ~1
ret.append((init_array_address, address))
init_array_address += 8 if is_64_bit else 4
return ret
def _DumpRelativeRelocations(binary):
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--relocations', binary]
lines = subprocess.check_output(cmd, encoding='utf8').splitlines()
ret = {}
for line in lines:
if 'RELATIVE' in line:
parts = line.split()
ret[int(parts[0], 16)] = int(parts[-1], 16)
return ret
def _ResolveRelativeAddresses(binary, address_tuples):
relocations_dict = None
ret = []
for init_address, address in address_tuples:
if address == 0:
if relocations_dict is None:
relocations_dict = _DumpRelativeRelocations(binary)
address = relocations_dict.get(init_address)
if address is None:
raise Exception('Failed to resolve relocation for address: ' +
hex(init_address))
ret.append(address)
return ret
def _SymbolizeAddresses(binary, addresses):
ret = {}
if not addresses:
return ret
cmd = [
_TOOLCHAIN_PREFIX + 'symbolizer', '-e', binary, '--functions',
'--output-style=JSON'
] + [hex(a) for a in addresses]
output = subprocess.check_output(cmd, encoding='utf8')
for main_entry in json.loads(output):
symbols = main_entry['Symbol']
name_entry = symbols[-1]
file_entry = next((x for x in symbols[::-1] if x['Line'] != 0), name_entry)
address = int(main_entry['Address'], 16)
filename = file_entry['FileName']
line = file_entry['Line']
if line:
filename += f':{line}'
ret[address] = (filename, name_entry['FunctionName'])
return ret
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--json',
action='store_true',
help='Output in JSON format')
parser.add_argument('binary', help='The non-stripped binary to analyze.')
args = parser.parse_args()
address_tuples = _DumpInitArray(args.binary)
addresses = _ResolveRelativeAddresses(args.binary, address_tuples)
symbolized_by_address = _SymbolizeAddresses(args.binary, addresses)
skip_disassembly = len(addresses) > _MAX_DISASSEMBLY_SYMBOLS
if skip_disassembly:
sys.stderr.write('Not collection disassembly due to the large number of '
'results.\n')
else:
size_by_address = _ParseNm(args.binary, addresses)
entries = []
for address in addresses:
filename, symbol_name = symbolized_by_address[address]
if skip_disassembly:
disassembly = ''
else:
size = size_by_address.get(address, 0)
if size == 0:
disassembly = ('Not showing disassembly because of unknown symbol size '
'(assembly symbols sometimes omit size).\n')
else:
disassembly = _Disassemble(args.binary, address, address + size)
entries.append({
'address': address,
'disassembly': disassembly,
'filename': filename,
'symbol_name': symbol_name,
})
if args.json:
print(json.dumps({'entries': entries}))
return
for e in entries:
print(f'# 0x{e["address"]:x} {e["filename"]} {e["symbol_name"]}')
print(e['disassembly'])
print(f'Found {len(entries)} files containing static initializers.')
if '__main__' == __name__:
main()