"""Runs nm on specified .a and .o file, plus some analysis.
CollectAliasesByAddress():
Runs nm on the elf to collect all symbol names. This reveals symbol names of
identical-code-folded functions.
CollectAliasesByAddressAsync():
Runs CollectAliasesByAddress in a subprocess and returns a promise.
RunNmOnIntermediates():
BulkForkAndCall() target: Runs nm on a .a file or a list of .o files, parses
the output, extracts symbol information, and (if available) extracts string
offset information.
CreateUniqueSymbols():
Creates Symbol objects from nm output.
"""
import argparse
import collections
import logging
import os
import subprocess
import demangle
import models
import parallel
import path_util
import readelf
import sys
def _IsRelevantNmName(name):
return name and not name.startswith('$')
def _IsRelevantObjectFileName(name):
if name in ('__tcf_0', 'startup'):
return False
if name.startswith('._') and name[2:].isdigit():
return False
if name.startswith('.L') and name.find('.', 2) != -1:
return False
dot_idx = name.find('.')
if dot_idx == -1:
return True
name = name[:dot_idx]
return name not in ('CSWTCH', 'lock', '__compound_literal', 'table')
def CollectAliasesByAddress(elf_path):
"""Runs nm on |elf_path| and returns a dict of address->[names]"""
names_by_address = collections.defaultdict(set)
num_outlined_functions_at_address = collections.Counter()
args = [path_util.GetNmPath(), '--no-sort', '--defined-only', elf_path]
proc = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8')
stdout, _ = proc.communicate()
assert proc.returncode == 0
for line in stdout.splitlines():
space_idx = line.find(' ')
address_str = line[:space_idx]
section = line[space_idx + 1]
mangled_name = line[space_idx + 3:]
if section not in 'tTW' or not _IsRelevantNmName(mangled_name):
continue
address = int(address_str, 16)
if not address:
continue
if mangled_name.startswith('OUTLINED_FUNCTION_'):
num_outlined_functions_at_address[address] += 1
else:
names_by_address[address].add(mangled_name)
for address, count in num_outlined_functions_at_address.items():
name = '** outlined function' + (' * %d' % count if count > 1 else '')
names_by_address[address].add(name)
demangle.DemangleSetsInDictsInPlace(names_by_address)
return {
addr: sorted(names, key=lambda n: (n.startswith('**'), n))
for addr, names in names_by_address.items()
if len(names) > 1 or num_outlined_functions_at_address.get(addr, 0) > 1
}
def CreateUniqueSymbols(elf_path, section_ranges):
"""Creates symbols from nm --print-size output.
Creates only one symbol for each address (does not create symbol aliases).
"""
section_ranges = [
x for x in section_ranges.items() if x[0] in models.NATIVE_SECTIONS
]
section_ranges.sort(key=lambda x: x[1])
min_address = section_ranges[0][1][0]
max_address = sum(section_ranges[-1][1])
args = [
path_util.GetNmPath(), '--no-sort', '--defined-only', '--print-size',
elf_path
]
stdout = subprocess.check_output(args,
stderr=subprocess.DEVNULL,
encoding='utf-8')
lines = stdout.splitlines()
logging.debug('Parsing %d lines of output', len(lines))
symbols_by_address = {}
for line in lines:
tokens = line.split(' ', 3)
num_tokens = len(tokens)
if num_tokens < 3:
continue
address_str = tokens[0]
if num_tokens == 3:
size_str = '0'
section = tokens[1]
mangled_name = tokens[2]
else:
size_str = tokens[1]
section = tokens[2]
mangled_name = tokens[3]
if section not in 'BbDdTtRrWw' or not _IsRelevantNmName(mangled_name):
continue
address = int(address_str, 16)
if not min_address <= address < max_address:
continue
existing_alias = symbols_by_address.get(address)
if existing_alias and existing_alias.size > 0:
continue
size = int(size_str, 16)
if mangled_name.startswith('.str.'):
mangled_name = models.STRING_LITERAL_NAME
elif mangled_name.startswith('__ARMV7PILongThunk_'):
mangled_name = mangled_name[len('__ARMV7PILongThunk_'):] + '.LongThunk'
elif mangled_name.startswith('__ThumbV7PILongThunk_'):
mangled_name = mangled_name[len('__ThumbV7PILongThunk_'):] + '.LongThunk'
section_name = None
if section in 'Tt':
section_name = models.SECTION_TEXT
elif section in 'Rr':
section_name = models.SECTION_RODATA
elif section in 'Bb':
section_name = models.SECTION_BSS
symbols_by_address[address] = models.Symbol(section_name,
size,
address=address,
full_name=mangled_name)
logging.debug('Sorting %d NM symbols', len(symbols_by_address))
sorted_symbols = sorted(symbols_by_address.values(), key=lambda s: s.address)
logging.debug('Assigning section_name and filling in missing sizes')
section_range_iter = iter(section_ranges)
section_end = -1
raw_symbols = []
active_assembly_sym = None
for i, sym in enumerate(sorted_symbols):
while sym.address >= section_end:
section_range = next(section_range_iter)
section_name, (section_start, section_size) = section_range
section_end = section_start + section_size
if sym.address < section_start:
continue
if sym.section_name and sym.section_name != section_name:
logging.warning('Re-assigning section for %r to %s', sym, section_name)
sym.section_name = section_name
if i + 1 < len(sorted_symbols):
next_addr = min(section_end, sorted_symbols[i + 1].address)
else:
next_addr = section_end
if (active_assembly_sym and sym.size == 0
and sym.section_name == models.SECTION_TEXT):
if sym.full_name.startswith('.') or sym.full_name.isupper():
active_assembly_sym.size += next_addr - sym.address
logging.debug('Discarding assembly label: %s', sym.full_name)
continue
active_assembly_sym = sym if sym.size == 0 else None
if active_assembly_sym or sym.end_address > next_addr:
sym.size = next_addr - sym.address
raw_symbols.append(sym)
return raw_symbols
def _CollectAliasesByAddressAsyncHelper(elf_path):
result = CollectAliasesByAddress(elf_path)
return parallel.EncodeDictOfLists(result, key_transform=str)
def CollectAliasesByAddressAsync(elf_path):
"""Calls CollectAliasesByAddress in a helper process. Returns a Result."""
def decode(encoded):
return parallel.DecodeDictOfLists(encoded, key_transform=int)
return parallel.ForkAndCall(_CollectAliasesByAddressAsyncHelper, (elf_path, ),
decode_func=decode)
def _ParseOneObjectFileNmOutput(lines):
symbol_names = set()
string_addresses = []
for line in lines:
if not line:
break
space_idx = line.find(' ')
section = line[space_idx + 1]
mangled_name = line[space_idx + 3:]
if _IsRelevantNmName(mangled_name):
if section == 'r' and (
mangled_name.startswith('.L.str') or
mangled_name.startswith('.L__') and mangled_name.find('.', 3) != -1):
string_addresses.append(line[:space_idx].lstrip('0') or '0')
elif _IsRelevantObjectFileName(mangled_name):
symbol_names.add(mangled_name)
return symbol_names, string_addresses
def RunNmOnIntermediates(target, output_directory):
"""Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path.
Args:
target: Either a single path to a .a (as a string), or a list of .o paths.
"""
is_archive = isinstance(target, str)
args = [path_util.GetNmPath(), '--no-sort', '--defined-only']
if is_archive:
args.append(target)
else:
args.extend(target)
proc = subprocess.Popen(
args,
cwd=output_directory,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding='utf-8')
stdout, stderr = proc.communicate()
assert proc.returncode == 0, 'NM failed: ' + ' '.join(args)
num_no_symbols = len(stderr.splitlines())
lines = stdout.splitlines()
if not lines:
return parallel.EMPTY_ENCODED_DICT, parallel.EMPTY_ENCODED_DICT, 0
is_multi_file = not lines[0]
lines = iter(lines)
if is_multi_file:
next(lines)
path = next(lines)[:-1]
else:
assert not is_archive
path = target[0]
symbol_names_by_path = {}
string_addresses_by_path = {}
while path:
if is_archive:
path = '%s(%s)' % (target, path)
mangled_symbol_names, string_addresses = _ParseOneObjectFileNmOutput(lines)
symbol_names_by_path[path] = mangled_symbol_names
if string_addresses:
string_addresses_by_path[path] = string_addresses
path = next(lines, ':')[:-1]
return (parallel.EncodeDictOfLists(symbol_names_by_path),
parallel.EncodeDictOfLists(string_addresses_by_path), num_no_symbols)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--output-directory', required=True)
parser.add_argument('elf_path', type=os.path.realpath)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG,
format='%(levelname).1s %(relativeCreated)6d %(message)s')
section_ranges = readelf.SectionInfoFromElf(args.elf_path)
symbols = CreateUniqueSymbols(args.elf_path, section_ranges)
for s in symbols:
print(s)
logging.warning('Printed %d symbols', len(symbols))
if __name__ == '__main__':
main()