tools/lldbmacros/macho.py

import macholib
from macholib import MachO as macho
from collections import namedtuple
import uuid
import sys


#
# Defines segment/section descriptions that can be used by external users
# like kext management to keep track about memory layout. To avoid the need
# to keep full Mach-O instance around.
#

MachOSegment = namedtuple(
    'MachOSegment',
    'name vmaddr vmsize fileoff filesize sections'
)

MachOSection = namedtuple(
    'MachOSection',
    'sectname addr size fileoff'
)


#
# The Mach-O library loads data for each section in a Mach-O.
# This breaks our macros in few ways:
#   - It is slow and no one is really using it.
#   - File offsets in fileset KC points outside of the file window reported
#     by OSkext API.
#
# Until macholib gets some optin to avoid reading section data we have to
# patch it here.
#
# !!! Note. This works only with the latest lib 1.15.1 !!!

if macholib.__version__ == "1.15.2":
    from macholib.mach_o import (
        LC_ID_DYLIB,
        LC_REGISTRY,
        LC_SEGMENT,
        LC_SEGMENT_64,
        S_ZEROFILL,
        load_command,
        section,
        section_64,
    )
    from macholib.ptypes import sizeof
    from macholib.util import fileview


    # !!! This is the actual patch for macholib 1.15.2 !!!
    #
    #   1. MemMachOHeader subclasses macho.MachOHeader
    #   2. Overloaded load() method is copy/paste of the original load() with
    #      small patch added that disables section contents loading.
    #   3. The new MemMachOHeader is injected back into library and used
    #      in place of macho.MachOHeader.
    #
    # This code should not ever exist in the first place. So the plan is to
    # remove it when macholib gets improved or abandoned by our own
    # implementation.
    class MemMachOHeader(macho.MachOHeader):
        """ Mach-O header parser that does not try to load section data. """

        def load(self, fh):
            fh = fileview(fh, self.offset, self.size)
            fh.seek(0)

            self.sizediff = 0
            kw = {"_endian_": self.endian}
            header = self.mach_header.from_fileobj(fh, **kw)
            self.header = header
            # If header.magic != self.MH_MAGIC:
            #    raise ValueError("header has magic %08x, expecting %08x" % (
            #        header.magic, self.MH_MAGIC))

            cmd = self.commands = []

            self.filetype = self.get_filetype_shortname(header.filetype)

            read_bytes = 0
            low_offset = sys.maxsize
            for i in range(header.ncmds):
                # read the load command
                cmd_load = load_command.from_fileobj(fh, **kw)

                # read the specific command
                klass = LC_REGISTRY.get(cmd_load.cmd, None)
                if klass is None:
                    raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
                cmd_cmd = klass.from_fileobj(fh, **kw)

                if cmd_load.cmd == LC_ID_DYLIB:
                    # remember where this command was
                    if self.id_cmd is not None:
                        raise ValueError("This dylib already has an id")
                    self.id_cmd = i

                if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
                    # for segment commands, read the list of segments
                    segs = []
                    # assert that the size makes sense
                    if cmd_load.cmd == LC_SEGMENT:
                        section_cls = section
                    else:  # LC_SEGMENT_64
                        section_cls = section_64

                    expected_size = (
                        sizeof(klass)
                        + sizeof(load_command)
                        + (sizeof(section_cls) * cmd_cmd.nsects)
                    )
                    if cmd_load.cmdsize != expected_size:
                        raise ValueError("Segment size mismatch")
                    # this is a zero block or something
                    # so the beginning is wherever the fileoff of this command is
                    if cmd_cmd.nsects == 0:
                        if cmd_cmd.filesize != 0:
                            low_offset = min(low_offset, cmd_cmd.fileoff)
                    else:
                        # this one has multiple segments
                        for _j in range(cmd_cmd.nsects):
                            # read the segment
                            seg = section_cls.from_fileobj(fh, **kw)
                            # If the segment has a size and is not zero filled
                            # then its beginning is the offset of this segment
                            not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL
                            if seg.offset > 0 and seg.size > 0 and not_zerofill:
                                low_offset = min(low_offset, seg.offset)

                            # Do NOT read section data. It is not required and
                            # does not work well with filset KC offsets.
                            """
                            if not_zerofill:
                                c = fh.tell()
                                fh.seek(seg.offset)
                                sd = fh.read(seg.size)
                                seg.add_section_data(sd)
                                fh.seek(c)
                            """
                            segs.append(seg)
                    # data is a list of segments
                    cmd_data = segs

                else:
                    # data is a raw str
                    data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
                    cmd_data = fh.read(data_size)
                cmd.append((cmd_load, cmd_cmd, cmd_data))
                read_bytes += cmd_load.cmdsize

            # make sure the header made sense
            if read_bytes != header.sizeofcmds:
                raise ValueError(
                    "Read %d bytes, header reports %d bytes"
                    % (read_bytes, header.sizeofcmds)
                )
            self.total_size = sizeof(self.mach_header) + read_bytes
            self.low_offset = low_offset


    # Patch the library to use our own header class instead.
    macho.MachOHeader = MemMachOHeader


class MemMachO(macho.MachO):
    """ Mach-O implementation that accepts I/O stream instead of file. """

    def __init__(self, file):
        """ Creates Mach-O parser on top of provided I/O. """

        # Figured out file size from the I/O.
        file.seek(0, 2)
        size = file.tell()
        file.seek(0, 0)

        # supports the ObjectGraph protocol
        self.graphident = 'mem:%d//'.format(size)
        self.filename = 'mem:%d//'.format(size)
        self.loader_path = "<no-path>"

        # initialized by load
        self.fat = None
        self.headers = []

        self.load(file)

    @staticmethod
    def make_seg(seg, sects):
        """ Constructs MachOSegment from input. """

        # Wrap all sections in MachOSection tuple.
        segsec = [
            MachOSection(
                sectname = s.segname[:s.segname.find(b'\x00')].decode(),
                addr = s.addr,
                fileoff = s.offset,
                size = s.size
            )
            for s in sects
        ]

        # Return MachOSegment
        return MachOSegment(
            name=seg.segname[:seg.segname.find(b'\x00')].decode(),
            vmaddr = seg.vmaddr,
            vmsize = seg.vmsize,
            fileoff = seg.fileoff,
            filesize = seg.filesize,
            sections = segsec
        )

    @property
    def segments(self):
        """ Constructs section/segment descriptors.

            Values are cached in an instance attribute.
        """
        if hasattr(self, '_segments'):
            return self._segments

        # Wrap all segments/sections into a MachOSegment/MachOSection.
        self._segments = [
            self.make_seg(seg, sec)
            for h in self.headers
            for _, seg, sec in h.commands
            if isinstance(seg, SEGMENT_TYPES)
        ]

        return self._segments

    @property
    def uuid(self):
        """ Returns UUID of the Mach-O. """
        if hasattr(self, '_uuid'):
            return self._uuid

        for h in self.headers:
            for cmd in h.commands:
                # cmds is [(load_command, segment, [sections..])]
                (_, segment, _) = cmd
                if isinstance(segment, macholib.mach_o.uuid_command):
                    self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper()
        return self._uuid


# some fixups in macholib that are required for kext support
macholib.mach_o.MH_KEXT_BUNDLE = 0xB

macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle"
macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext"

SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command)

def get_load_command_human_name(lc):
    return lc.get_cmd_name()


class VisualMachoMap(object):
    KB_1 = 1024
    KB_16 = 16 * 1024
    MB_1 = 1 * 1024 * 1024
    GB_1 = 1 * 1024 * 1024 * 1024

    def __init__(self, name, width=40):
        self.name = name
        self.width = 40
        self.default_side_padding = 2

    def get_header_line(self):
        return '+' + '-' * (self.width - 2) + '+'

    def get_space_line(self):
        return '|' + ' ' * (self.width - 2) + '|'

    def get_dashed_line(self):
        return '|' + '-' * (self.width - 2) + '|'

    def get_dotted_line(self):
        return '|' + '.' * (self.width - 2) + '|'

    def center_text_in_line(self, line, text):
        even_length = bool(len(text) % 2 == 0)
        if len(text) > len(line) - 2:
            raise ValueError("text is larger than line of text")

        lbreak_pos = (len(line) // 2) - (len(text) // 2)
        if not even_length:
            lbreak_pos -= 1
        out = line[:lbreak_pos] + text
        return out + line[len(out):]

    def get_separator_lines(self):
        return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/']

    def printMachoMap(self, mobj):
        MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment')
        outstr = self.name + '\n'
        other_cmds = ''
        blocks = []
        for hdr in mobj.headers:
            cmd_index = 0
            for cmd in hdr.commands:
                # cmds is [(load_command, segment, [sections..])]
                (lc, segment, sections) = cmd
                lc_cmd_str = get_load_command_human_name(lc)
                lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections))
                # print lc_str_rep
                if isinstance(segment, SEGMENT_TYPES):
                    segname = segment.segname[:segment.segname.find(b'\x00')].decode()
                    # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format(
                    #             segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize)
                    blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize,
                                            ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot),
                                            True))
                    for section in sections:
                        section_name = section.sectname[:section.sectname.find(b'\x00')].decode()
                        blocks.append(MapBlock(section_name, section.addr, section.size, section.offset,
                                                section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False))
                        #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size)
                elif isinstance(segment, macholib.mach_o.uuid_command):
                    other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper())
                elif isinstance(segment, macholib.mach_o.rpath_command):
                    other_cmds += "\n\t rpath: {:s}".format(segment.path)
                elif isinstance(segment, macholib.mach_o.dylib_command):
                    other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version))
                else:
                    other_cmds += lc_str_rep
                cmd_index += 1

        # fixup the self.width param
        for _b in blocks:
            if self.default_side_padding + len(_b.name) + 2 > self.width:
                self.width = self.default_side_padding + len(_b.name) + 2
        if self.width % 2 != 0:
            self.width += 1

        sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr)
        mstr = [self.get_header_line()]
        prev_block = MapBlock('', 0, 0, 0, 0, '', False)
        for b in sorted_blocks:
            # TODO add separator blocks if vmaddr is large from prev_block
            if b.is_segment:
                s = self.get_dashed_line()
            else:
                s = self.get_dotted_line()
            s = self.center_text_in_line(s, b.name)
            line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x}  {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info)
            if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16:
                mstr.append(self.get_space_line())
                mstr.append(self.get_space_line())

            mstr.append(line)

            if b.vmsize > VisualMachoMap.MB_1:
                mstr.append(self.get_space_line())
                mstr.extend(self.get_separator_lines())
                mstr.append(self.get_space_line())
            #mstr.append(self.get_space_line())
            prev_block = b
        mstr.append(self.get_space_line())
        if prev_block.vmsize > VisualMachoMap.KB_16:
            mstr.append(self.get_space_line())
        mstr.append(self.get_header_line())
        print(outstr)
        print("\n".join(mstr))
        print("\n\n=============== Other Load Commands ===============")
        print(other_cmds)


if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: {} /path/to/macho_binary".format(sys.argv[0]))
        sys.exit(1)
    with open(sys.argv[-1], 'rb') as fp:
        mobject = MemMachO(fp)

        p = VisualMachoMap(sys.argv[-1])
        p.printMachoMap(mobject)
    sys.exit(0)