"""
Parse information about a PE file to summarize the on-disk and
in-memory sizes of the sections, in decimal MB instead of in hex. This
script will also automatically display diffs between two files if they
have the same name. This script relies on having VS 2015 installed and is used
to help investigate binary size regressions and improvements.
Section information printed by dumpbin looks like this:
SECTION HEADER #2
.rdata name
5CCD56 virtual size
1CEF000 virtual address (11CEF000 to 122BBD55)
5CCE00 size of raw data
1CEE000 file pointer to raw data (01CEE000 to 022BADFF)
0 file pointer to relocation table
0 file pointer to line numbers
0 number of relocations
0 number of line numbers
40000040 flags
Initialized Data
Read Only
The reports generated by this script look like this:
> python tools\win\pe_summarize.py out\release\chrome.dll
Size of out\release\chrome.dll is 41.190912 MB
name: mem size , disk size
.text: 33.199959 MB
.rdata: 6.170416 MB
.data: 0.713864 MB, 0.270336 MB
.tls: 0.000025 MB
CPADinfo: 0.000036 MB
.rodata: 0.003216 MB
.crthunk: 0.000064 MB
.gfids: 0.001052 MB
_RDATA: 0.000288 MB
.rsrc: 0.130808 MB
.reloc: 1.410172 MB
Note that the .data section has separate in-memory and on-disk sizes due to
zero-initialized data. Other sections have smaller discrepancies - the disk size
is only printed if it differs from the memory size by more than 512 bytes.
Note that many of the sections - such as .text, .rdata, and .rsrc - are shared
between processes. Some sections - such as .reloc - are discarded after a
process is loaded. Other sections, such as .data, produce private pages and are
therefore objectively 'worse' than the others.
"""
from __future__ import print_function
import os
import subprocess
import sys
def _FindSection(section_list, section_name):
for i in range(len(section_list)):
if section_name == section_list[i][0]:
return i
return -1
def main():
if len(sys.argv) < 2:
print(r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0])
print(r'Sample: %s chrome.dll' % sys.argv[0])
print(r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0])
return 0
last_pe_filepart = ""
for pe_path in sys.argv[1:]:
results = []
if not os.path.exists(pe_path):
print('%s does not exist!' % pe_path)
continue
print('Size of %s is %1.6f MB' %
(pe_path, os.path.getsize(pe_path) / 1e6))
print('%10s: %9s , %9s' % ('name', 'mem size', 'disk size'))
sections = None
command = 'dumpbin.exe /nopdb /headers "%s"' % pe_path
try:
for line in subprocess.check_output(command).decode().splitlines():
if line.startswith('SECTION HEADER #'):
sections = []
elif type(sections) == type([]):
sections.append(line.strip())
if len(sections) == 4:
name, memory_size, _, disk_size = sections
assert name.count('name') == 1
assert memory_size.count('virtual size') == 1
assert disk_size.count('size of raw data') == 1
name = name.split()[0]
memory_size = int(memory_size.split()[0], 16)
disk_size = int(disk_size.split()[0], 16)
if abs(memory_size - disk_size) < 512:
print('%10s: %9.6f MB' % (name, memory_size / 1e6))
else:
print('%10s: %9.6f MB, %9.6f MB' %
(name, memory_size / 1e6, disk_size / 1e6))
results.append((name, memory_size))
sections = None
except WindowsError as error:
if error.winerror == 2:
print(
r'Cannot find dumpbin. Run "C:\Program Files\Microsoft '
r'Visual Studio\2022\Professional\VC\Auxiliary\Build'
r'\vcvarsall.bat amd64" or similar to add dumpbin to the '
r'path.')
else:
print(error)
break
print()
pe_filepart = os.path.split(pe_path)[1]
if pe_filepart.lower() == last_pe_filepart.lower():
print('Memory size change from %s to %s' % (last_pe_path, pe_path))
total_delta = 0
for i in range(len(results)):
section_name = results[i][0]
last_i = _FindSection(last_results, section_name)
delta = results[i][1]
if last_i >= 0:
delta -= last_results[last_i][1]
total_delta += delta
if delta:
print('%12s: %7d bytes change' % (section_name, delta))
for last_i in range(len(last_results)):
section_name = last_results[last_i][0]
i = _FindSection(results, section_name)
if i < 0:
delta = -last_results[last_i][1]
total_delta += delta
print('%12s: %7d bytes change' % (section_name, delta))
print('Total change: %7d bytes' % total_delta)
last_pe_filepart = pe_filepart
last_pe_path = pe_path
last_results = results
if __name__ == '__main__':
sys.exit(main())