import macholib from macholib import MachO as macho from collections import namedtuple import uuid import sys # # Defines segment/section descriptions that can be used by external users # like kext management to keep track about memory layout. To avoid the need # to keep full Mach-O instance around. # MachOSegment = namedtuple( 'MachOSegment', 'name vmaddr vmsize fileoff filesize sections' ) MachOSection = namedtuple( 'MachOSection', 'sectname addr size fileoff' ) # # The Mach-O library loads data for each section in a Mach-O. # This breaks our macros in few ways: # - It is slow and no one is really using it. # - File offsets in fileset KC points outside of the file window reported # by OSkext API. # # Until macholib gets some optin to avoid reading section data we have to # patch it here. # # !!! Note. This works only with the latest lib 1.15.1 !!! if macholib.__version__ == "1.15.2": from macholib.mach_o import ( LC_ID_DYLIB, LC_REGISTRY, LC_SEGMENT, LC_SEGMENT_64, S_ZEROFILL, load_command, section, section_64, ) from macholib.ptypes import sizeof from macholib.util import fileview # !!! This is the actual patch for macholib 1.15.2 !!! # # 1. MemMachOHeader subclasses macho.MachOHeader # 2. Overloaded load() method is copy/paste of the original load() with # small patch added that disables section contents loading. # 3. The new MemMachOHeader is injected back into library and used # in place of macho.MachOHeader. # # This code should not ever exist in the first place. So the plan is to # remove it when macholib gets improved or abandoned by our own # implementation. class MemMachOHeader(macho.MachOHeader): """ Mach-O header parser that does not try to load section data. """ def load(self, fh): fh = fileview(fh, self.offset, self.size) fh.seek(0) self.sizediff = 0 kw = {"_endian_": self.endian} header = self.mach_header.from_fileobj(fh, **kw) self.header = header # If header.magic != self.MH_MAGIC: # raise ValueError("header has magic %08x, expecting %08x" % ( # header.magic, self.MH_MAGIC)) cmd = self.commands = [] self.filetype = self.get_filetype_shortname(header.filetype) read_bytes = 0 low_offset = sys.maxsize for i in range(header.ncmds): # read the load command cmd_load = load_command.from_fileobj(fh, **kw) # read the specific command klass = LC_REGISTRY.get(cmd_load.cmd, None) if klass is None: raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) cmd_cmd = klass.from_fileobj(fh, **kw) if cmd_load.cmd == LC_ID_DYLIB: # remember where this command was if self.id_cmd is not None: raise ValueError("This dylib already has an id") self.id_cmd = i if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): # for segment commands, read the list of segments segs = [] # assert that the size makes sense if cmd_load.cmd == LC_SEGMENT: section_cls = section else: # LC_SEGMENT_64 section_cls = section_64 expected_size = ( sizeof(klass) + sizeof(load_command) + (sizeof(section_cls) * cmd_cmd.nsects) ) if cmd_load.cmdsize != expected_size: raise ValueError("Segment size mismatch") # this is a zero block or something # so the beginning is wherever the fileoff of this command is if cmd_cmd.nsects == 0: if cmd_cmd.filesize != 0: low_offset = min(low_offset, cmd_cmd.fileoff) else: # this one has multiple segments for _j in range(cmd_cmd.nsects): # read the segment seg = section_cls.from_fileobj(fh, **kw) # If the segment has a size and is not zero filled # then its beginning is the offset of this segment not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL if seg.offset > 0 and seg.size > 0 and not_zerofill: low_offset = min(low_offset, seg.offset) # Do NOT read section data. It is not required and # does not work well with filset KC offsets. """ if not_zerofill: c = fh.tell() fh.seek(seg.offset) sd = fh.read(seg.size) seg.add_section_data(sd) fh.seek(c) """ segs.append(seg) # data is a list of segments cmd_data = segs else: # data is a raw str data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) cmd_data = fh.read(data_size) cmd.append((cmd_load, cmd_cmd, cmd_data)) read_bytes += cmd_load.cmdsize # make sure the header made sense if read_bytes != header.sizeofcmds: raise ValueError( "Read %d bytes, header reports %d bytes" % (read_bytes, header.sizeofcmds) ) self.total_size = sizeof(self.mach_header) + read_bytes self.low_offset = low_offset # Patch the library to use our own header class instead. macho.MachOHeader = MemMachOHeader class MemMachO(macho.MachO): """ Mach-O implementation that accepts I/O stream instead of file. """ def __init__(self, file): """ Creates Mach-O parser on top of provided I/O. """ # Figured out file size from the I/O. file.seek(0, 2) size = file.tell() file.seek(0, 0) # supports the ObjectGraph protocol self.graphident = 'mem:%d//'.format(size) self.filename = 'mem:%d//'.format(size) self.loader_path = "" # initialized by load self.fat = None self.headers = [] self.load(file) @staticmethod def make_seg(seg, sects): """ Constructs MachOSegment from input. """ # Wrap all sections in MachOSection tuple. segsec = [ MachOSection( sectname = s.segname[:s.segname.find(b'\x00')].decode(), addr = s.addr, fileoff = s.offset, size = s.size ) for s in sects ] # Return MachOSegment return MachOSegment( name=seg.segname[:seg.segname.find(b'\x00')].decode(), vmaddr = seg.vmaddr, vmsize = seg.vmsize, fileoff = seg.fileoff, filesize = seg.filesize, sections = segsec ) @property def segments(self): """ Constructs section/segment descriptors. Values are cached in an instance attribute. """ if hasattr(self, '_segments'): return self._segments # Wrap all segments/sections into a MachOSegment/MachOSection. self._segments = [ self.make_seg(seg, sec) for h in self.headers for _, seg, sec in h.commands if isinstance(seg, SEGMENT_TYPES) ] return self._segments @property def uuid(self): """ Returns UUID of the Mach-O. """ if hasattr(self, '_uuid'): return self._uuid for h in self.headers: for cmd in h.commands: # cmds is [(load_command, segment, [sections..])] (_, segment, _) = cmd if isinstance(segment, macholib.mach_o.uuid_command): self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper() return self._uuid # some fixups in macholib that are required for kext support macholib.mach_o.MH_KEXT_BUNDLE = 0xB macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle" macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext" SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command) def get_load_command_human_name(lc): return lc.get_cmd_name() class VisualMachoMap(object): KB_1 = 1024 KB_16 = 16 * 1024 MB_1 = 1 * 1024 * 1024 GB_1 = 1 * 1024 * 1024 * 1024 def __init__(self, name, width=40): self.name = name self.width = 40 self.default_side_padding = 2 def get_header_line(self): return '+' + '-' * (self.width - 2) + '+' def get_space_line(self): return '|' + ' ' * (self.width - 2) + '|' def get_dashed_line(self): return '|' + '-' * (self.width - 2) + '|' def get_dotted_line(self): return '|' + '.' * (self.width - 2) + '|' def center_text_in_line(self, line, text): even_length = bool(len(text) % 2 == 0) if len(text) > len(line) - 2: raise ValueError("text is larger than line of text") lbreak_pos = (len(line) // 2) - (len(text) // 2) if not even_length: lbreak_pos -= 1 out = line[:lbreak_pos] + text return out + line[len(out):] def get_separator_lines(self): return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/'] def printMachoMap(self, mobj): MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment') outstr = self.name + '\n' other_cmds = '' blocks = [] for hdr in mobj.headers: cmd_index = 0 for cmd in hdr.commands: # cmds is [(load_command, segment, [sections..])] (lc, segment, sections) = cmd lc_cmd_str = get_load_command_human_name(lc) lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections)) # print lc_str_rep if isinstance(segment, SEGMENT_TYPES): segname = segment.segname[:segment.segname.find(b'\x00')].decode() # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format( # segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize) blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize, ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot), True)) for section in sections: section_name = section.sectname[:section.sectname.find(b'\x00')].decode() blocks.append(MapBlock(section_name, section.addr, section.size, section.offset, section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False)) #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size) elif isinstance(segment, macholib.mach_o.uuid_command): other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper()) elif isinstance(segment, macholib.mach_o.rpath_command): other_cmds += "\n\t rpath: {:s}".format(segment.path) elif isinstance(segment, macholib.mach_o.dylib_command): other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version)) else: other_cmds += lc_str_rep cmd_index += 1 # fixup the self.width param for _b in blocks: if self.default_side_padding + len(_b.name) + 2 > self.width: self.width = self.default_side_padding + len(_b.name) + 2 if self.width % 2 != 0: self.width += 1 sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr) mstr = [self.get_header_line()] prev_block = MapBlock('', 0, 0, 0, 0, '', False) for b in sorted_blocks: # TODO add separator blocks if vmaddr is large from prev_block if b.is_segment: s = self.get_dashed_line() else: s = self.get_dotted_line() s = self.center_text_in_line(s, b.name) line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x} {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info) if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16: mstr.append(self.get_space_line()) mstr.append(self.get_space_line()) mstr.append(line) if b.vmsize > VisualMachoMap.MB_1: mstr.append(self.get_space_line()) mstr.extend(self.get_separator_lines()) mstr.append(self.get_space_line()) #mstr.append(self.get_space_line()) prev_block = b mstr.append(self.get_space_line()) if prev_block.vmsize > VisualMachoMap.KB_16: mstr.append(self.get_space_line()) mstr.append(self.get_header_line()) print(outstr) print("\n".join(mstr)) print("\n\n=============== Other Load Commands ===============") print(other_cmds) if __name__ == '__main__': import sys if len(sys.argv) < 2: print("Usage: {} /path/to/macho_binary".format(sys.argv[0])) sys.exit(1) with open(sys.argv[-1], 'rb') as fp: mobject = MemMachO(fp) p = VisualMachoMap(sys.argv[-1]) p.printMachoMap(mobject) sys.exit(0)