iBoot/lib/macho/macho_header.h

598 lines
26 KiB
C

/*
* Copyright (C) 2007-2011 Apple Inc. All rights reserved.
* Copyright (C) 1998-2006 Apple Computer, Inc. All rights reserved.
*
* This document is the property of Apple Inc.
* It is considered confidential and proprietary.
*
* This document may not be reproduced or transmitted in any form,
* in whole or in part, without the express written permission of
* Apple Inc.
*/
#ifndef __MACHO_HEADER_H
#define __MACHO_HEADER_H
// Taken from osfmk/mach/vm_prot.h
typedef int vm_prot_t;
#define VM_PROT_NONE ((vm_prot_t) 0x00)
#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */
#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */
#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */
// Taken from osfmk/mach/arm/vm_types.h
typedef int integer_t;
// Taken from osfmk/mach/machine.h
typedef integer_t cpu_type_t;
typedef integer_t cpu_subtype_t;
#ifndef __LP64__
// Taken from mach-o/loader.h
/*
* The 32-bit mach header appears at the very beginning of the object file for
* 32-bit architectures.
*/
typedef struct mach_header {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
}mach_header_t;
/* Constant for the magic field of the mach_header (32-bit architectures) */
#define MH_MAGIC 0xfeedface /* the mach magic number */
#else
/*
* The 64-bit mach header appears at the very beginning of object files for
* 64-bit architectures.
*/
typedef struct mach_header_64 {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
uint32_t reserved; /* reserved */
} mach_header_t;
/* Constant for the magic field of the mach_header_64 (64-bit architectures) */
#define MH_MAGIC 0xfeedfacf /* the 64-bit mach magic number */
#endif
/* Constants for the flags field of the mach_header */
#define MH_PIE 0x200000 /* When this bit is set, the OS will
load the main executable at a
random address. Only used in
MH_EXECUTE filetypes. */
/*
* The load commands directly follow the mach_header. The total size of all
* of the commands is given by the sizeofcmds field in the mach_header. All
* load commands must have as their first two fields cmd and cmdsize. The cmd
* field is filled in with a constant for that command type. Each command type
* has a structure specifically for it. The cmdsize field is the size in bytes
* of the particular load command structure plus anything that follows it that
* is a part of the load command (i.e. section structures, strings, etc.). To
* advance to the next load command the cmdsize can be added to the offset or
* pointer of the current load command. The cmdsize for 32-bit architectures
* MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple
* of 8 bytes (these are forever the maximum alignment of any load commands).
* The padded bytes must be zero. All tables in the object file must also
* follow these rules so the file can be memory mapped. Otherwise the pointers
* to these tables will not work well or at all on some machines. With all
* padding zeroed like objects will compare byte for byte.
*/
struct load_command {
uint32_t cmd; /* type of load command */
uint32_t cmdsize; /* total size of command in bytes */
};
/* Constants for the cmd field of all load commands, the type */
#define LC_SEGMENT 0x1 /* segment of this file to be mapped */
#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */
#define LC_THREAD 0x4 /* thread */
#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */
#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */
#ifndef __LP64__
#define LC_SEGMENT_ARM LC_SEGMENT
#else
#define LC_SEGMENT_ARM LC_SEGMENT_64
#endif
#ifndef __LP64__
/*
* The segment load command indicates that a part of this file is to be
* mapped into the task's address space. The size of this segment in memory,
* vmsize, maybe equal to or larger than the amount to map from this file,
* filesize. The file is mapped starting at fileoff to the beginning of
* the segment in memory, vmaddr. The rest of the memory of the segment,
* if any, is allocated zero fill on demand. The segment's maximum virtual
* memory protection and initial virtual memory protection are specified
* by the maxprot and initprot fields. If the segment has sections then the
* section structures directly follow the segment command and their size is
* reflected in cmdsize.
*/
typedef struct segment_command { /* for 32-bit architectures */
uint32_t cmd; /* LC_SEGMENT */
uint32_t cmdsize; /* includes sizeof section structs */
char segname[16]; /* segment name */
uint32_t vmaddr; /* memory address of this segment */
uint32_t vmsize; /* memory size of this segment */
uint32_t fileoff; /* file offset of this segment */
uint32_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
} segment_command_t;
#else
/*
* The 64-bit segment load command indicates that a part of this file is to be
* mapped into a 64-bit task's address space. If the 64-bit segment has
* sections then section_64 structures directly follow the 64-bit segment
* command and their size is reflected in cmdsize.
*/
typedef struct segment_command_64 { /* for 64-bit architectures */
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment */
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
} segment_command_t;
#endif
#ifndef __LP64__
typedef struct section { /* for 32-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint32_t addr; /* memory address of this section */
uint32_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
} section_t;
#else
typedef struct section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
} section_t;
#endif
/*
* The flags field of a section structure is separated into two parts a section
* type and section attributes. The section types are mutually exclusive (it
* can only have one type) but the section attributes are not (it may have more
* than one attribute).
*/
#define SECTION_TYPE 0x000000ff /* 256 section types */
#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */
/* Constants for the type of a section */
#define S_REGULAR 0x0 /* regular section */
#define S_ZEROFILL 0x1 /* zero fill on demand section */
#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/
#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */
#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */
#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */
/* literals */
/*
* For the two types of symbol pointers sections and the symbol stubs section
* they have indirect symbol table entries. For each of the entries in the
* section the indirect symbol table entries, in corresponding order in the
* indirect symbol table, start at the index stored in the reserved1 field
* of the section structure. Since the indirect symbol table entries
* correspond to the entries in the section the number of indirect symbol table
* entries is inferred from the size of the section divided by the size of the
* entries in the section. For symbol pointers sections the size of the entries
* in the section is 4 bytes and for symbol stubs sections the byte size of the
* stubs is stored in the reserved2 field of the section structure.
*/
#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy
symbol pointers */
#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol
pointers */
#define S_SYMBOL_STUBS 0x8 /* section with only symbol
stubs, byte size of stub in
the reserved2 field */
#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function
pointers for initialization*/
#define S_MOD_TERM_FUNC_POINTERS 0xa /* section with only function
pointers for termination */
#define S_COALESCED 0xb /* section contains symbols that
are to be coalesced */
#define S_GB_ZEROFILL 0xc /* zero fill on demand section
(that can be larger than 4
gigabytes) */
#define S_INTERPOSING 0xd /* section with only pairs of
function pointers for
interposing */
#define S_16BYTE_LITERALS 0xe /* section with only 16 byte
literals */
#define S_DTRACE_DOF 0xf /* section contains
DTrace Object Format */
#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy
symbol pointers to lazy
loaded dylibs */
/*
* The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
* "stab" style symbol table information as described in the header files
* <nlist.h> and <stab.h>.
*/
struct symtab_command {
uint32_t cmd; /* LC_SYMTAB */
uint32_t cmdsize; /* sizeof(struct symtab_command) */
uint32_t symoff; /* symbol table offset */
uint32_t nsyms; /* number of symbol table entries */
uint32_t stroff; /* string table offset */
uint32_t strsize; /* string table size in bytes */
};
/*
* This is the second set of the symbolic information which is used to support
* the data structures for the dynamically link editor.
*
* The original set of symbolic information in the symtab_command which contains
* the symbol and string tables must also be present when this load command is
* present. When this load command is present the symbol table is organized
* into three groups of symbols:
* local symbols (static and debugging symbols) - grouped by module
* defined external symbols - grouped by module (sorted by name if not lib)
* undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
* and in order the were seen by the static
* linker if MH_BINDATLOAD is set)
* In this load command there are offsets and counts to each of the three groups
* of symbols.
*
* This load command contains a the offsets and sizes of the following new
* symbolic information tables:
* table of contents
* module table
* reference symbol table
* indirect symbol table
* The first three tables above (the table of contents, module table and
* reference symbol table) are only present if the file is a dynamically linked
* shared library. For executable and object modules, which are files
* containing only one module, the information that would be in these three
* tables is determined as follows:
* table of contents - the defined external symbols are sorted by name
* module table - the file contains only one module so everything in the
* file is part of the module.
* reference symbol table - is the defined and undefined external symbols
*
* For dynamically linked shared library files this load command also contains
* offsets and sizes to the pool of relocation entries for all sections
* separated into two groups:
* external relocation entries
* local relocation entries
* For executable and object modules the relocation entries continue to hang
* off the section structures.
*/
struct dysymtab_command {
uint32_t cmd; /* LC_DYSYMTAB */
uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
/*
* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
* are grouped into the following three groups:
* local symbols (further grouped by the module they are from)
* defined external symbols (further grouped by the module they are from)
* undefined symbols
*
* The local symbols are used only for debugging. The dynamic binding
* process may have to use them to indicate to the debugger the local
* symbols for a module that is being bound.
*
* The last two groups are used by the dynamic binding process to do the
* binding (indirectly through the module table and the reference symbol
* table when this is a dynamically linked shared library file).
*/
uint32_t ilocalsym; /* index to local symbols */
uint32_t nlocalsym; /* number of local symbols */
uint32_t iextdefsym;/* index to externally defined symbols */
uint32_t nextdefsym;/* number of externally defined symbols */
uint32_t iundefsym; /* index to undefined symbols */
uint32_t nundefsym; /* number of undefined symbols */
/*
* For the for the dynamic binding process to find which module a symbol
* is defined in the table of contents is used (analogous to the ranlib
* structure in an archive) which maps defined external symbols to modules
* they are defined in. This exists only in a dynamically linked shared
* library file. For executable and object modules the defined external
* symbols are sorted by name and is use as the table of contents.
*/
uint32_t tocoff; /* file offset to table of contents */
uint32_t ntoc; /* number of entries in table of contents */
/*
* To support dynamic binding of "modules" (whole object files) the symbol
* table must reflect the modules that the file was created from. This is
* done by having a module table that has indexes and counts into the merged
* tables for each module. The module structure that these two entries
* refer to is described below. This exists only in a dynamically linked
* shared library file. For executable and object modules the file only
* contains one module so everything in the file belongs to the module.
*/
uint32_t modtaboff; /* file offset to module table */
uint32_t nmodtab; /* number of module table entries */
/*
* To support dynamic module binding the module structure for each module
* indicates the external references (defined and undefined) each module
* makes. For each module there is an offset and a count into the
* reference symbol table for the symbols that the module references.
* This exists only in a dynamically linked shared library file. For
* executable and object modules the defined external symbols and the
* undefined external symbols indicates the external references.
*/
uint32_t extrefsymoff; /* offset to referenced symbol table */
uint32_t nextrefsyms; /* number of referenced symbol table entries */
/*
* The sections that contain "symbol pointers" and "routine stubs" have
* indexes and (implied counts based on the size of the section and fixed
* size of the entry) into the "indirect symbol" table for each pointer
* and stub. For every section of these two types the index into the
* indirect symbol table is stored in the section header in the field
* reserved1. An indirect symbol table entry is simply a 32bit index into
* the symbol table to the symbol that the pointer or stub is referring to.
* The indirect symbol table is ordered to match the entries in the section.
*/
uint32_t indirectsymoff; /* file offset to the indirect symbol table */
uint32_t nindirectsyms; /* number of indirect symbol table entries */
/*
* To support relocating an individual module in a library file quickly the
* external relocation entries for each module in the library need to be
* accessed efficiently. Since the relocation entries can't be accessed
* through the section headers for a library file they are separated into
* groups of local and external entries further grouped by module. In this
* case the presents of this load command who's extreloff, nextrel,
* locreloff and nlocrel fields are non-zero indicates that the relocation
* entries of non-merged sections are not referenced through the section
* structures (and the reloff and nreloc fields in the section headers are
* set to zero).
*
* Since the relocation entries are not accessed through the section headers
* this requires the r_address field to be something other than a section
* offset to identify the item to be relocated. In this case r_address is
* set to the offset from the vmaddr of the first LC_SEGMENT command.
* For MH_SPLIT_SEGS images r_address is set to the the offset from the
* vmaddr of the first read-write LC_SEGMENT command.
*
* The relocation entries are grouped by module and the module table
* entries have indexes and counts into them for the group of external
* relocation entries for that the module.
*
* For sections that are merged across modules there must not be any
* remaining external relocation entries for them (for merged sections
* remaining relocation entries must be local).
*/
uint32_t extreloff; /* offset to external relocation entries */
uint32_t nextrel; /* number of external relocation entries */
/*
* All the local relocation entries are grouped together (they are not
* grouped by their module since they are only used if the object is moved
* from it staticly link edited address).
*/
uint32_t locreloff; /* offset to local relocation entries */
uint32_t nlocrel; /* number of local relocation entries */
};
/*
* An indirect symbol table entry is simply a 32bit index into the symbol table
* to the symbol that the pointer or stub is refering to. Unless it is for a
* non-lazy symbol pointer section for a defined symbol which strip(1) as
* removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the
* symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that.
*/
#define INDIRECT_SYMBOL_LOCAL 0x80000000
#define INDIRECT_SYMBOL_ABS 0x40000000
/*
* Thread commands contain machine-specific data structures suitable for
* use in the thread state primitives. The machine specific data structures
* follow the struct thread_command as follows.
* Each flavor of machine specific data structure is preceded by an unsigned
* long constant for the flavor of that data structure, an uint32_t
* that is the count of longs of the size of the state data structure and then
* the state data structure follows. This triple may be repeated for many
* flavors. The constants for the flavors, counts and state data structure
* definitions are expected to be in the header file <machine/thread_status.h>.
* These machine specific data structures sizes must be multiples of
* 4 bytes The cmdsize reflects the total size of the thread_command
* and all of the sizes of the constants for the flavors, counts and state
* data structures.
*
* For executable objects that are unix processes there will be one
* thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor.
* This is the same as a LC_THREAD, except that a stack is automatically
* created (based on the shell's limit for the stack size). Command arguments
* and environment variables are copied onto that stack.
*/
struct thread_command {
uint32_t cmd; /* LC_THREAD or LC_UNIXTHREAD */
uint32_t cmdsize; /* total size of this command */
/* uint32_t flavor flavor of thread state */
/* uint32_t count count of longs in thread state */
/* struct XXX_thread_state state thread state for this flavor */
/* ... */
};
#ifdef __arm__
// Taken from osfmk/mach/arm/_types.h
typedef struct arm_thread_state
{
uint32_t r[13]; /* General purpose register r0-r12 */
uint32_t sp; /* Stack pointer r13 */
uint32_t lr; /* Link regisster r14 */
uint32_t pc; /* Program counter r15 */
uint32_t cpsr; /* Current program status register */
} arm_thread_state_t;
#else
#ifdef __arm64__
// Based from osfmk/mach/arm/_structs.h
typedef struct arm_thread_state64
{
uint64_t x[29]; /* General purpose registers x0-x28 */
uint64_t fp; /* Frame pointer x29 */
uint64_t lr; /* Link register x30 */
uint64_t sp; /* Stack pointer x31 */
uint64_t pc; /* Program counter */
uint32_t cpsr; /* Current program status register */
} arm_thread_state_t;
#endif
#endif
/*
* Format of a symbol table entry of a Mach-O file for 32-bit architectures.
* Modified from the BSD format. The modifications from the original format
* were changing n_other (an unused field) to n_sect and the addition of the
* N_SECT type. These modifications are required to support symbols in a larger
* number of sections not just the three sections (text, data and bss) in a BSD
* file.
*/
struct nlist {
union {
#ifndef __LP64__
char *n_name; /* for use when in-core */
#endif
int32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
int16_t n_desc; /* see <mach-o/stab.h> */
uint32_t n_value; /* value of this symbol (or stab offset) */
};
/*
* This is the symbol table entry structure for 64-bit architectures.
*/
struct nlist_64 {
union {
uint32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
uint16_t n_desc; /* see <mach-o/stab.h> */
uint64_t n_value; /* value of this symbol (or stab offset) */
};
/*
* Symbols with a index into the string table of zero (n_un.n_strx == 0) are
* defined to have a null, "", name. Therefore all string indexes to non null
* names must not have a zero string index. This is bit historical information
* that has never been well documented.
*/
/*
* The n_type field really contains four fields:
* unsigned char N_STAB:3,
* N_PEXT:1,
* N_TYPE:3,
* N_EXT:1;
* which are used via the following masks.
*/
#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */
#define N_PEXT 0x10 /* private external symbol bit */
#define N_TYPE 0x0e /* mask for the type bits */
#define N_EXT 0x01 /* external symbol bit, set for external symbols */
/*
* Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD
* format. The modifications from the original format were changing the value
* of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
* This modification is required to support symbols in an arbitrary number of
* sections not just the three sections (text, data and bss) in a 4.3BSD file.
* Also the last 4 bits have had the r_type tag added to them.
*/
struct relocation_info {
int32_t r_address; /* offset in the section to what is being
relocated */
uint32_t r_symbolnum:24, /* symbol index if r_extern == 1 or section
ordinal if r_extern == 0 */
r_pcrel:1, /* was relocated pc relative already */
r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
r_extern:1, /* does not include value of sym referenced */
r_type:4; /* if not 0, machine specific relocation type */
};
enum reloc_type_arm
{
ARM_RELOC_VANILLA, /* generic relocation as discribed above */
ARM_RELOC_PAIR, /* the second relocation entry of a pair */
ARM_RELOC_SECTDIFF, /* a PAIR follows with subtract symbol value */
ARM_RELOC_LOCAL_SECTDIFF, /* like ARM_RELOC_SECTDIFF, but the symbol
referenced was local. */
ARM_RELOC_PB_LA_PTR,/* prebound lazy pointer */
ARM_RELOC_BR24, /* 24 bit branch displacement (to a word address) */
ARM_THUMB_RELOC_BR22, /* 22 bit branch displacement (to a half-word
address) */
ARM_THUMB_32BIT_BRANCH, /* obsolete - a thumb 32-bit branch instruction
possibly needing page-spanning branch workaround */
/*
* For these two r_type relocations they always have a pair following them
* and the r_length bits are used differently. The encoding of the
* r_length is as follows:
* low bit of r_length:
* 0 - :lower16: for movw instructions
* 1 - :upper16: for movt instructions
* high bit of r_length:
* 0 - arm instructions
* 1 - thumb instructions
* the other half of the relocated expression is in the following pair
* relocation entry in the the low 16 bits of r_address field.
*/
ARM_RELOC_HALF,
ARM_RELOC_HALF_SECTDIFF
};
#endif /* __MACHO_HEADER_H */