iBoot/tools/macho.py

947 lines
34 KiB
Python
Raw Normal View History

2023-07-08 13:03:17 -07:00
#!/usr/bin/python
#
# Copyright (C) 2013 Apple Inc. All rights reserved.
#
# This document is the property of Apple Inc.
# It is considered confidential and proprietary.
#
# This document may not be reproduced or transmitted in any form,
# in whole or in part, without the express written permission of
# Apple Inc.
#
#
# A module for manipulating statically linked mach-o files.
#
# Load information from a mach-o file:
#
# f = macho.File(pathname)
#
# Search for non-debug global text symbols starting with 'a' :
#
# m = { "name": "^_a.*", "section": "__TEXT,.*", n_type": (N_STAB | N_EXT, N_EXT, N_STAB)}
# l = f.symtab.matching(m)
#
# Read a structure pointed to by a symbol
#
# d = l.unpack("IIQ")
#
# Write changes back to the file
#
# f.update
#
# Specific things this module does not support:
# - moving anything
# - adding, resizing or removing sections or segments
#
#
import io
import struct
import re
# section types/attributes
SECTION_TYPE = 0x000000ff # 256 section types
SECTION_ATTRIBUTES = 0xffffff00 # 24 section attributes
S_REGULAR = 0x00 # regular section
S_ZEROFILL = 0x01 # zero fill on demand section
S_CSTRING_LITERALS = 0x02 # section with only literal C strings
S_4BYTE_LITERALS = 0x03 # section with only 4 byte literals
S_8BYTE_LITERALS = 0x04 # section with only 8 byte literals
S_LITERAL_POINTERS = 0x05 # section with only pointers to
S_NON_LAZY_SYMBOL_POINTERS = 0x06 # section with only non-lazy symbol pointers
S_LAZY_SYMBOL_POINTERS = 0x07 # section with only lazy symbol pointers
S_SYMBOL_STUBS = 0x08 # section with only symbol stubs, byte size of stub in the reserved2 field
S_MOD_INIT_FUNC_POINTERS = 0x09 # section with only function pointers for initializatio
S_MOD_TERM_FUNC_POINTERS = 0x0a # section with only function pointers for termination
S_COALESCED = 0x0b # section contains symbols that are to be coalesced
S_GB_ZEROFILL = 0x0c # zero fill on demand section (that can be larger than 4 gigabytes)
S_INTERPOSING = 0x0d # section with only pairs of function pointers for interposing
S_16BYTE_LITERALS = 0x0e # section with only 16 byte literals
S_DTRACE_DOF = 0x0f # section contains DTrace Object Format
S_THREAD_LOCAL_REGULAR = 0x11 # template of initial values for TLVs
S_THREAD_LOCAL_ZEROFILL = 0x12 # template of initial values for TLVs
S_THREAD_LOCAL_VARIABLES = 0x13 # TLV descriptors
S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14 # pointers to TLV descriptors
S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15 # functions to call to initialize TLV values
SECTION_ATTRIBUTES_USR = 0xff000000 # User setable attributes
S_ATTR_PURE_INSTRUCTIONS = 0x80000000 # section contains only true machine instructions
S_ATTR_NO_TOC = 0x40000000 # section contains coalesced symbols that are not to be in a ranlib table of contents
S_ATTR_STRIP_STATIC_SYMS = 0x20000000 # ok to strip static symbols in this section in files with the MH_DYLDLINK flag
S_ATTR_NO_DEAD_STRIP = 0x10000000 # no dead stripping
S_ATTR_LIVE_SUPPORT = 0x08000000 # blocks are live if they reference live blocks
S_ATTR_SELF_MODIFYING_CODE = 0x04000000 # Used with i386 code stubs written on by dyld
S_ATTR_DEBUG = 0x02000000 # a debug section
SECTION_ATTRIBUTES_SYS = 0x00ffff00 # system setable attributes
S_ATTR_SOME_INSTRUCTIONS = 0x00000400 # section contains some machine instructions
S_ATTR_EXT_RELOC = 0x00000200 # section has external relocation entries
S_ATTR_LOC_RELOC = 0x00000100 # section has local relocation entries
SEG_PAGEZERO = "__PAGEZERO" # the pagezero segment which has no protections and catches NULL references
SEG_TEXT = "__TEXT" # the tradition UNIX text segment */
SECT_TEXT = "__text" # the real text part of the text section no headers, and no padding */
SEG_DATA = "__DATA" # the tradition UNIX data segment */
SECT_DATA = "__data" # the real initialized data section, no padding, no bss overlap */
SECT_BSS = "__bss" # the real uninitialized data section, no padding */
SECT_COMMON = "__common" # the section common symbols are allocated in by the link editor */
# not-a-section value for n_sect
NO_SECT = 0x00 # section numbers start at 1
# nlist n_type values
N_STAB = 0xe0 # mask for debugger symbols
N_TYPE = 0x0e # mask for symbol type
N_PEXT = 0x10 # symbol is private-extern
N_EXT = 0x01 # symbol is external
N_UNDF = 0x00 # symbol is undefined
N_ABS = 0x02 # absolute (n_sect is NO_SECT)
#N_TEXT = 0x04 # text symbol
N_DATA = 0x06 # data symbol
#N_BSS = 0x08 # bss symbol
N_INDR = 0x0a # symbol is indirect (n_value is string table index)
N_SECT = 0x0e # symbol is in n_sect
#N_COMM = 0x12 # common symbol
#N_FN = 0x1e # filename
# nlist n_desc values
N_WEAK_REF = 0x40 # weak reference (value may be zero)
N_WEAK_DEF = 0x80 # weak definition
N_ARM_THUMB_DEF = 0x0008 # Thumb function
# selected STAB symbol types
# fields listed here are n_name, n_sect, n_desc, n_value
N_GSYM = 0x20 # global symbol: name,NO_SECT,type,0
N_FUN = 0x24 # procedure: name,n_sect,linenumber,address
N_STSYM = 0x26 # static symbol: name,n_sect,type,address
N_LCSYM = 0x28 # .lcomm symbol: name,n_sect,type,address
N_BNSYM = 0x2e # begin nsect sym: 0,n_sect,0,address
N_OPT = 0x3c # emitted with gcc2_compiled and in gcc source
N_RSYM = 0x40 # register sym: name,NO_SECT,type,register
N_SLINE = 0x44 # src line: 0,n_sect,linenumber,address
N_ENSYM = 0x4e # end nsect sym: 0,n_sect,0,address
N_SSYM = 0x60 # structure elt: name,NO_SECT,type,struct_offset
N_SO = 0x64 # source file name: name,n_sect,0,address
N_OSO = 0x66 # object file name: name,0,0,st_mtime
N_LSYM = 0x80 # local sym: name,NO_SECT,type,offset
N_BINCL = 0x82 # include file beginning: name,NO_SECT,0,sum
N_SOL = 0x84 # #included file name: name,n_sect,0,address
N_PARAMS = 0x86 # compiler parameters: name,NO_SECT,0,0
N_VERSION = 0x88 # compiler version: name,NO_SECT,0,0
N_OLEVEL = 0x8A # compiler -O level: name,NO_SECT,0,0
N_PSYM = 0xa0 # parameter: name,NO_SECT,type,offset
N_EINCL = 0xa2 # include file end: name,NO_SECT,0,0
N_ENTRY = 0xa4 # alternate entry: name,n_sect,linenumber,address
N_LBRAC = 0xc0 # left bracket: 0,NO_SECT,nesting level,address
N_EXCL = 0xc2 # deleted include file: name,NO_SECT,0,sum
N_RBRAC = 0xe0 # right bracket: 0,NO_SECT,nesting level,address
N_BCOMM = 0xe2 # begin common: name,NO_SECT,0,0
N_ECOMM = 0xe4 # end common: name,n_sect,0,0
N_ECOML = 0xe8 # end common (local name): 0,n_sect,0,address
N_LENG = 0xfe # second stab entry with length information
def dict_from_struct(with_bytes, using_struct, to_names):
result = {}
# unpack
values = using_struct.unpack(str(with_bytes))
# walk the unpacked values
keys = to_names.split(" ")
for i in range(0, len(keys)):
result[keys[i]] = values[i]
return result
class File(object):
"""toplevel object representing a mach-o file"""
# struct mach_header
# {
# uint32_t magic;
# cpu_type_t cputype;
# cpu_subtype_t cpusubtype;
# uint32_t filetype;
# uint32_t ncmds;
# uint32_t sizeofcmds;
# uint32_t flags;
# };
# struct mach_header_64
# {
# uint32_t magic;
# cpu_type_t cputype;
# cpu_subtype_t cpusubtype;
# uint32_t filetype;
# uint32_t ncmds;
# uint32_t sizeofcmds;
# uint32_t flags;
# uint32_t reserved;
# };
mach_header_magic32 = 0xfeedface
mach_header_magic64 = 0xfeedfacf
mach_header_f = "IiiIIII"
mach_header_n = "magic cputype cpusubtype filetype ncmds sizeofcmds flags"
def __init__(self, file, debug=False):
self._symtab = None
self._stringtab = None
self._segments = {}
self._sections = {}
self._section_index = 1
self._uuid = None
self._debug = debug
# stash the file and the header offset
self._file = file
# read the file header
self._properties = self.unpack(self.mach_header_f, self.mach_header_n)
# skip the reserved field in a 64-bit header
if self['magic'] == self.mach_header_magic64:
self._file.seek(4, io.SEEK_CUR)
elif self['magic'] != self.mach_header_magic32:
raise RuntimeError("bad magic number {:#010x} in mach-o header".format(self['magic']))
# read load commands
for i in range(1, self['ncmds']):
cmd = LoadCommand.from_file(self)
# reset the file pointer to the next load command
# print "seek to load command at {} + {}".format(cmd['__offset'], cmd['cmdsize'])
self._file.seek(cmd['__offset'] + cmd['cmdsize'])
def debug(self, str):
if self._debug is not False:
print str
# access to the UUID
@property
def uuid(self):
return self._uuid
@uuid.setter
def uuid(self, value):
self._uuid = value
# access to the symbol table
@property
def symtab(self):
return self._symtab
@symtab.setter
def symtab(self, value):
self._symtab = value
# access to the string table
@property
def stringtab(self):
return self._stringtab
@stringtab.setter
def stringtab(self, value):
self._stringtab = value
# access to the segment list
@property
def segments(self):
return self._segments
# segments call this to self-register
def add_segment(self, segment):
self.debug("segment " + segment['segname'])
self._segments[segment['segname']] = segment
# access to the section list
@property
def sections(self):
return self._sections
def sections_inorder(self):
'''iterate sections in file order'''
for i in range(1, self._section_index - 1):
yield self._sections[i]
# sections call this to self-register
def add_section(self, section):
sectionname = "{},{}".format(section['segname'], section['sectname'])
self.debug("section {}:{} @ {}".format(self._section_index, sectionname, section['offset']))
self._sections[self._section_index] = section
self._section_index += 1
self._sections[sectionname] = section
# get the object file flavor (32/64)
@property
def flavor(self):
if self['magic'] == self.mach_header_magic32:
return 32
return 64
# allow properties to be fetched by indexing us
def __getitem__(self, index):
if index in self._properties:
return self._properties[index]
else:
return None
# unpack a structure from the file
def unpack(self, format, names, offset=-1, rewind=False):
# if we are told to seek, do that now
if offset != -1:
self._file.seek(offset)
# remember where we read from
pre_offset = self._file.tell()
# fetch the data and unpack it
s = struct.Struct(format)
bytes = self.readbytes(s.size, offset, rewind)
result = dict_from_struct(bytes, s, names)
# default result dict entries
result['__offset'] = pre_offset
result['__dirty'] = False
# if we were asked to rewind
if rewind is True:
self._file.seek(pre_offset)
return result
# repack a structure into the file
def repack(self, format, names, props):
# make a list of the property arguments for the format
values = [props[key] for key in names.split()]
s = struct.Struct(format)
bytes = s.pack(*values)
self.writebytes(bytes, props['__offset'])
# read bytes from the file
def readbytes(self, size, offset=-1, rewind=False):
o_offset = self._file.tell()
if offset != -1:
self._file.seek(offset)
bytes = self._file.read(size)
if rewind is True:
self._file.seek(o_offset)
return bytearray(bytes)
# write bytes to the file
def writebytes(self, bytes, offset=-1):
if offset != -1:
self._file.seek(offset)
count = len(bytes)
wrote = self._file.write(bytes)
if wrote != count:
raise RuntimeError("tried to write {} bytes, only wrote {} bytes".format(count, wrote))
# push any updates out to the file
def update(self):
updated = False
for i in range(1, self._section_index):
if self._sections[i].update():
updated = True
if updated:
self._file.flush()
return updated
# locate a segment by name
def segment_for_name(self, name):
if name in self._segments:
return self._segments[name]
return None
# locate the section that contains data for a given address range
def section_for_address(self, addr):
for sect in self._sections.itervalues():
saddr = sect['addr']
send = saddr + sect['size']
if (saddr <= addr) and (send > addr):
return sect
return None
# locate a section by name
def section_for_name(self, name):
if name in self._sections:
return self._sections[name]
return None
# locate symbols in a section
def symbols_in_section(self, name):
matching = {'section': name}
return self._symtab.matching(matching)
# locate symbols referencing an address
def symbols_for_address(self, addr):
matching = {'n_value': addr, 'n_type': N_ABS}
return self._symtab.matching(matching)
class LoadCommand(object):
"""base class for load commands"""
# struct load_command
# {
# uint32_t cmd;
# uint32_t cmdsize;
# };
lc_names = {
0x00000001: "LC_SEGMENT",
0x00000002: "LC_SYMTAB",
0x00000019: "LC_SEGMENT_64",
0x0000001b: "LC_UUID",
0x00000026: "LC_FUNCTION_STARTS",
0x00000029: "LC_DATA_IN_CODE",
0x0000002A: "LC_SOURCE_VERSION",
}
load_command_f = "II"
load_command_n = "cmd cmdsize"
@classmethod
def from_file(cls, parent):
# read the header and reset the file pointer so that a subclass
# will get the whole thing
properties = parent.unpack(cls.load_command_f, cls.load_command_n, rewind=True)
# hand off to the first subclass that is interested in it
obj = None
for subcls in cls.__subclasses__():
obj = subcls.for_load_command(properties['cmd'], parent)
if obj is not None:
break
# if we didn't find a subclass, return a dummy so that it can be skipped
if obj is None:
# print "ignoring unhandled section {}".format(properties['cmd'])
obj = LoadCommand(parent)
return obj
def __init__(self, parent):
self._properties = parent.unpack(self.load_command_f, self.load_command_n)
# allow properties to be fetched by indexing us
def __getitem__(self, index):
if index in self._properties:
return self._properties[index]
else:
return None
class Segment(LoadCommand):
"""handles segment commands"""
# struct segment_command
# {
# uint32_t cmd;
# uint32_t cmdsize;
# char segname[16];
# uint32_t vmaddr;
# uint32_t vmsize;
# uint32_t fileoff;
# uint32_t filesize;
# vm_prot_t maxprot;
# vm_prot_t initprot;
# uint32_t nsects;
# uint32_t flags;
# };
# struct segment_command_64
# {
# uint32_t cmd;
# uint32_t cmdsize;
# char segname[16];
# uint64_t vmaddr;
# uint64_t vmsize;
# uint64_t fileoff;
# uint64_t filesize;
# vm_prot_t maxprot;
# vm_prot_t initprot;
# uint32_t nsects;
# uint32_t flags;
# };
segment_command32_f = "II16sIIIIiiII"
segment_command64_f = "II16sQQQQiiII"
segment_command_n = "cmd cmdsize segname vmaddr vmsize fileoff filesize maxprot initprot nsects flags"
@classmethod
def for_load_command(cls, cmd, parent):
if cmd in cls.lc_names:
if cls.lc_names[cmd] == "LC_SEGMENT":
return cls(cmd, parent, 32)
if cls.lc_names[cmd] == "LC_SEGMENT_64":
return cls(cmd, parent, 64)
return None
def __init__(self, cmd, parent, flavor):
if flavor == 32:
self._properties = parent.unpack(self.segment_command32_f, self.segment_command_n)
else:
self._properties = parent.unpack(self.segment_command64_f, self.segment_command_n)
# strip NULs from the segment name
self._properties['segname'] = self._properties['segname'].rstrip("\x00")
# advertise to the parent
parent.add_segment(self)
# iterate sections
for i in range(0, self['nsects']):
Section(parent, flavor)
class Section(object):
"""base class for text/data sections"""
# struct section
# {
# char sectname[16];
# char segname[16];
# uint32_t addr;
# uint32_t size;
# uint32_t offset;
# uint32_t align;
# uint32_t reloff;
# uint32_t nreloc;
# uint32_t flags;
# uint32_t reserved1; /* offset or index */
# uint32_t reserved2; /* count or sizeof */
# };
# struct section_64
# {
# char sectname[16];
# char segname[16];
# uint64_t addr;
# uint64_t size;
# uint32_t offset;
# uint32_t align;
# uint32_t reloff;
# uint32_t nreloc;
# uint32_t flags;
# uint32_t reserved1; /* offset or index */
# uint32_t reserved2; /* count or sizeof */
# uint32_t reserved3;
# };
section32_f = "16s16sIIIIIIIII"
section64_f = "16s16sQQIIIIIIII"
section_n = "sectname segname addr size offset align reloff nreloc flags index count"
def __init__(self, parent, flavor):
if flavor == 32:
self._properties = parent.unpack(self.section32_f, self.section_n)
else:
self._properties = parent.unpack(self.section64_f, self.section_n)
# strip NULs from the segment and section names
self._properties['segname'] = self._properties['segname'].rstrip("\x00")
self._properties['sectname'] = self._properties['sectname'].rstrip("\x00")
# give ourselves a canonical name
self._properties['name'] = "{},{}".format(self['segname'], self['sectname'])
# register so that we can be found by either name or index
self._parent = parent
parent.add_section(self)
# allow properties to be fetched by indexing us
# and for a slice to index the 'bytes' property
def __getitem__(self, index):
if isinstance(index, slice):
return self['bytes'][index]
if index not in self._properties:
if index == 'bytes':
# can't mess with bytes in a zerofill section
if (self['flags'] & SECTION_TYPE) == S_ZEROFILL:
return None
# self._parent.debug("read {}/{}".format(self['offset'], self['size']))
v = self._parent.readbytes(self['size'], offset=self['offset'])
# print ord(v[0]), ord(v[1]), ord(v[2]), ord(v[3])
elif index == 'end':
v = self['addr'] + self['size'] - 1
else:
return None
self._properties[index] = v
return self._properties[index]
# allow some properties to be changed
def __setitem__(self, index, value):
if isinstance(index, slice):
# prefetch the 'bytes' property
value = self['bytes']
self._properties['bytes'][index] = value
if index == 'bytes':
if len(value) != self['size']:
raise RuntimeError("wrong size for section data")
self._properties['bytes'] = value
self._properties['__dirty'] = True
else:
raise RuntimeError("section attribute '{}' is not writable", index)
# if we have been dirtied by e.g. variable patching, write our bytes back
def update(self):
if self['__dirty']:
self._parent.writebytes(self['bytes'], self['offset'])
self._properties['__dirty'] = False
return True
return False
class Symtab(LoadCommand):
"""symbol table"""
# struct symtab_command
# {
# uint_32 cmd;
# uint_32 cmdsize;
# uint_32 symoff;
# uint_32 nsyms;
# uint_32 stroff;
# uint_32 strsize;
# };
symtab_f = "IIIIII"
symtab_n = "cmd cmdsize symoff nsyms stroff strsize"
@classmethod
def for_load_command(cls, cmd, parent):
if cmd in cls.lc_names:
if cls.lc_names[cmd] in ("LC_SYMTAB"):
return cls(parent)
return None
def __init__(self, parent):
self._nlist = []
# read the symtab header
self._properties = parent.unpack(self.symtab_f, self.symtab_n)
# read the string table first
parent.stringtab = StringTable(parent, self['stroff'], self['strsize'])
# read the name list
self._nlist.append(Nlist(parent, self['symoff']))
for n in range(2, self['nsyms']):
self._nlist.append(Nlist(parent))
# print "symtab: {} symbols".format(len(self._nlist))
parent.symtab = self
# find symbols matching a dictionary
def matching(self, matching_dict):
result = []
# walk the symbol set looking for matches
for sym in self._nlist:
# print " {}:".format(sym['name'])
if sym.matches(matching_dict):
result.append(sym)
return result
class StringTable(object):
"""a string table"""
_strings = {}
def __init__(self, parent, offset, size):
# read the raw string table from the file
bytes = parent.readbytes(size, offset=offset)
# parse out NUL-separate strings and index them
index = 0
ptr = -1
s = ""
for c in bytes:
ptr += 1
if c == 0:
if len(s) > 0:
# print "strtab: {} '{}'".format(index, s)
self._strings[index] = s
parent.debug("string {}:'{}'".format(index, s))
s = ""
else:
if len(s) == 0:
index = ptr;
s += chr(c)
def __getitem__(self, index):
if index in self._strings:
return self._strings[index]
elif index == 0:
return ""
else:
return None
class UUIDCommand(LoadCommand):
"""handles segment commands"""
# struct uuid_command
# {
# uint32_t cmd;
# uint32_t cmdsize;
# uint8_t uuid[16];
# };
uuid_command_f = "II16s"
uuid_command_n = "cmd cmdsize uuid"
@classmethod
def for_load_command(cls, cmd, parent):
if cmd in cls.lc_names:
if cls.lc_names[cmd] == "LC_UUID":
return cls(cmd, parent)
return None
def __init__(self, cmd, parent):
self._properties = parent.unpack(self.uuid_command_f, self.uuid_command_n)
parent.debug('uuid: {}'.format(self._properties))
# advertise to the parent
parent.uuid = self._properties['uuid']
class Nlist(object):
"""a symbol table entry"""
# struct nlist
# {
# union {
# int32_t n_strx; /* string table offset */
# } n_un;
# uint8_t n_type; /* symbol type */
# uint8_t n_sect; /* section index */
# int16_t n_desc; /* debug description */
# uint32_t n_value; /* address/value */
# };
# struct nlist_64
# {
# union {
# uint32_t n_strx;
# } n_un;
# uint8_t n_type;
# uint8_t n_sect;
# uint16_t n_desc;
# uint64_t n_value;
# };
nlist32_f = "IBBHI"
nlist64_f = "IBBHQ"
nlist_n = "n_strx n_type n_sect n_desc n_value"
def __init__(self, parent, offset=-1):
if parent.flavor == 32:
self._properties = parent.unpack(self.nlist32_f, self.nlist_n, offset=offset)
elif parent.flavor == 64:
self._properties = parent.unpack(self.nlist64_f, self.nlist_n, offset=offset)
else:
raise RuntimeError("bad nlist flavor")
# save a reference to the parent, we need it for lazy name evaluation
self._parent = parent
self['name']
parent.debug("symbol {}".format(self._properties))
# match the symbol against a dictionary of same
def matches(self, matching_dict):
# walk the matching dictionary
for key in matching_dict:
# fetch the symbol's value for this key; if we don't
# have a value we cannot match
v = self[key]
if v is None:
return False
c = matching_dict[key]
# simple integer match?
if isinstance(c, int):
if c != v:
return False
continue
# integer masking match?
if isinstance(c, tuple) and (len(c) == 3):
mask, require, prohibit = c
v &= mask
if (v & prohibit) != 0:
return False
if (v & require) != require:
return False
continue
# string match?
if isinstance(c, str):
r = re.compile(c)
m = r.match(v)
if m is None:
return False
continue
# don't actually know how to handle this...
raise RuntimeError("don't know how to match '{}' using '{}'".format(key, v))
return True
# Allow properties to be fetched by indexing us
#
# As some properties aren't known as parse time (e.g. section names)
# we look them up lazily.
#
def __getitem__(self, index):
if index not in self._properties:
v = None
if index == 'name':
v = self._parent.stringtab[self['n_strx']]
elif index == 'sectname':
i = self['n_sect']
if i not in self._parent.sections:
return None
v = self._parent.sections[i]['name']
elif index == 'section':
stype = self._properties['n_type'] & N_TYPE
if stype == N_ABS or stype == N_DATA:
v = self._parent.section_for_address(self['n_value'])
elif stype == N_SECT:
v = self._parent.sections[self['n_sect']]
self._properties[index] = v
return self._properties[index]
# Read from whatever the symbol points to
def unpack(self, format, names):
# look up the section we're operating on
sect = self['section']
addr = self['n_value']
offset = addr - sect['addr']
# self._parent.debug("addr 0x{:x} section addr 0x{:x} offset 0x{:x}".format(addr, sect['addr'], offset))
bytes = sect['bytes']
if format == 'C': # Non-standard C String format
chunk = bytes[offset:bytes.index('\0', offset)]
s = struct.Struct('%ds' % len(chunk))
else:
# slice the requested range and decode
s = struct.Struct(format)
# print "{}:{}".format(offset, offset + s.size)
chunk = bytes[offset:(offset + s.size)]
return dict_from_struct(chunk, s, names)
# write to whatever the symbol points to
#
# XXX needs to be updated to take a dict as argument
def pack(self, format, values):
# look up the section we're operating on
sect = self['section']
addr = self['n_value']
offset = addr - sect['addr']
# encode the values
self._parent.debug("packing {}".format(values))
s = struct.Struct(format)
chunk = s.pack(*values)
# and update the section
bytes = sect['bytes']
bytes[offset:(offset + s.size)] = chunk
sect['bytes'] = bytes
# dereference the symbol and get a single value it points to
def deref(self, format):
contents = self.unpack(format, 'value')
return contents['value']
# assign a value to the thing the symbol references
def assign(self, format, value):
self.pack(format, [value])
def to_binary(macho, output, verbose=False):
'''emit a compact in-memory binary representation of the mach-o file'''
output.seek(0)
data_offset = 0
for section in macho.sections_inorder():
# We assume that we simply want to pack all of the
# text/data sections in the input file into the output, and
# that we should stop once we hit a zero-filled section because
# we've run out of data.
if (((section['flags'] & S_ZEROFILL) != 0) or
(section['sectname'] == '__common') or
(section['sectname'] == '__bss')):
break
# Make a note of where we are in the output file when
# we start processing sections in the __DATA segment
if data_offset == 0 and section['segname'] == '__DATA':
data_offset = output.tell()
# pad sections to maintain alignment
alignment = 1 << section['align']
if section['segname'] == '__DATA':
skew = data_offset
else:
skew = 0
padding = (alignment - ((output.tell() - skew) % alignment)) % alignment
zeros = bytearray.fromhex('00' * padding)
output.write(zeros)
# write the section contents
if section['size'] > 0:
if verbose is True:
print '{}/{} 0x{:x}/0x{:x} @ 0x{:x}'.format(section['segname'],
section['sectname'],
section['addr'],
section['size'],
output.tell())
output.write(section['bytes'])
#
# Be a useful utility.
#
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='mach-o utility')
parser.add_argument('-m', '--macho', required=True,
help='path to the mach-o file')
parser.add_argument('-o', '--output',
help='path to the output file')
parser.add_argument('-c', '--convert', dest='format', choices=['binary', 'TBD'],
help='convert to the specified format')
parser.add_argument('-v', '--verbose', default=False, action='store_true',
help='enable verbose output')
parser.add_argument('-d', '--debug', default=False, action='store_true',
help='enable debug output')
args = parser.parse_args()
try:
f = File(io.open(args.macho, 'rb'), args.debug)
except:
print "macho: failed opening " + args.macho
raise
if args.format == 'binary' or args.format == 'objcopy':
if args.output is None:
raise RuntimeError("missing -o argument to specify output name")
o = io.open(args.output, 'wb')
to_binary(f, o, args.verbose)
o.close()