import argparse
import re
import subprocess
import tempfile
from enum import Enum
try:
from elftools.elf.elffile import ELFFile
except ModuleNotFoundError as e:
print(f"Error:{e}.\nPlease execute the following command to install dependencies:")
print("pip install construct pyelftools")
exit(1)
TAG_SECTION = "note_type"
FORMAT_SECTION = "note_format"
NOTE_TYPE_SIZE = 8
class NoteTag(Enum):
UINT32 = 0
UINT64 = 1
DOUBLE = 2
STRING = 3
class NoteFormatType:
def __init__(self, bitwidth: int):
self.bitwidth = bitwidth
self.pattern = re.compile(
r"%(?P<flags>[-+ #0]*)?(?P<width>\d+|\*)?(?:\.(?P<precision>\d+|\*))?"
r"(?P<length>[hljztL]|ll|hh)?(?P<specifier>[diuxXopfFeEgGaAcspn%])"
)
def get_integer_type(self, length_modifier: str) -> int:
"""Determine the integer type based on the length modifier"""
if length_modifier in ["ll", "j"]:
return NoteTag.UINT64
elif length_modifier in ["l", "z", "t"]:
return NoteTag.UINT64 if self.bitwidth == 64 else NoteTag.UINT32
else:
return NoteTag.UINT32
def get_pointer_type(self) -> int:
"""Determine the pointer type based on the architecture"""
return NoteTag.UINT64 if self.bitwidth == 64 else NoteTag.UINT32
def note_format_type(self, fmt: str) -> int:
"""
Parse the format string and return the type code
Each parameter occupies 2 bits: u32=0, u64=1, double=2, string=3
The highest 4 bits represent the number of parameters
"""
matches = self.pattern.finditer(fmt)
typelist = []
for match in matches:
specifier = match.group("specifier")
if specifier == "%":
continue
if match.group("width") == "*":
typelist.append(NoteTag.UINT32)
if match.group("precision") == "*":
typelist.append(NoteTag.UINT32)
length_modifier = match.group("length") or ""
if specifier in "cdiuxXon":
typelist.append(self.get_integer_type(length_modifier))
elif specifier == "p":
typelist.append(self.get_pointer_type())
elif specifier in "fFeEgGaA":
typelist.append(NoteTag.DOUBLE)
elif specifier in "s":
typelist.append(NoteTag.STRING)
if len(typelist) > 29:
raise ValueError(f"format string {fmt} has too many arguments")
notetypes = len(typelist) << 58
for i, notetype in enumerate(typelist):
notetypes |= notetype.value << (i * 2)
return notetypes, typelist
class NoteFormat:
def __init__(self, elf):
self.elf = ELFFile(open(elf, "rb"))
self.note_format_section = None
self.note_type_section = None
try:
self.note_format_section = self.get_section_by_name(FORMAT_SECTION)
self.note_type_section = self.get_section_by_name(TAG_SECTION)
except ValueError as e:
raise ValueError(f"Failed to get note section: {e}")
self.pointer_size = (
4 if self.elf.header["e_ident"]["EI_CLASS"] == "ELFCLASS32" else 8
)
self.byteorder = (
"little"
if self.elf.header["e_ident"]["EI_DATA"] == "ELFDATA2LSB"
else "big"
)
self.note_type_list = []
def get_section_by_name(self, name):
for section in self.elf.iter_sections():
if section.name == name:
return section
raise ValueError(f"Section {name} not found")
def escape_non_printable(self, s: str) -> str:
escaped_string = ""
for char in s:
if 32 <= ord(char) <= 126:
escaped_string += char
else:
escaped_string += repr(char)[1:-1]
return escaped_string
def get_note_format_by_addr(self, addr: int) -> str:
if self.note_format_section is None:
raise ValueError("note_format section is not found")
section_start_addr = self.note_format_section.header["sh_addr"]
if addr < section_start_addr:
raise ValueError(f"Address {addr} is out of bounds")
offset = addr - section_start_addr
section_data = self.note_format_section.data()
if offset >= len(section_data):
raise ValueError(f"Offset {offset} is out of bounds")
end_offset = offset
while end_offset < len(section_data) and section_data[end_offset] != 0:
end_offset += 1
if end_offset > offset:
string_bytes = section_data[offset:end_offset]
string = string_bytes.decode("utf-8", errors="replace")
else:
string = ""
return self.escape_non_printable(string)
def get_note_type(self):
if self.note_type_section is None:
return
section_data = self.note_type_section.data()
section_start_addr = self.note_type_section.header["sh_addr"]
typelist = list()
for i in range(0, len(section_data), NOTE_TYPE_SIZE):
format_addr_bytes = section_data[i : i + NOTE_TYPE_SIZE]
format_addr = int.from_bytes(
format_addr_bytes, byteorder=self.byteorder, signed=False
)
type_addr = section_start_addr + i
format_string = self.get_note_format_by_addr(format_addr)
typelist.append((type_addr, format_addr, format_string))
return typelist
def generate_note_type_section(self, output, debug):
typelist = self.get_note_type()
note_format_type = NoteFormatType(self.pointer_size)
for i, (index_addr, format_addr, format_string) in enumerate(typelist):
typeflags, types = note_format_type.note_format_type(format_string)
type_bytes = typeflags.to_bytes(NOTE_TYPE_SIZE, byteorder=self.byteorder)
output.write(type_bytes)
if debug:
print(
f'index: {i}, format: "{format_string}", type: 0x{typeflags:08x}, types: {[item.name for item in types]}'
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--elf", help="ELF file", required=True)
parser.add_argument("-o", "--output", type=str, help="Output file")
parser.add_argument("-c", "--objcopy", type=str, help="Objcopy path", required=True)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
args = parser.parse_args()
output = args.output
with tempfile.NamedTemporaryFile(delete=False) as typefile:
note = NoteFormat(args.elf)
print(args.verbose)
note.generate_note_type_section(typefile, args.verbose)
typefile_name = typefile.name
output = args.output if args.output else ""
objcopy = args.objcopy
cmd = f"{objcopy} --update-section {TAG_SECTION}={typefile_name} {args.elf} {output}"
subprocess.run(cmd, shell=True)