xref: /xnu-10002.61.3/tools/lldbmacros/macho.py (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1from __future__ import absolute_import, division, print_function
2
3from builtins import range
4from builtins import object
5
6import macholib
7from macholib import MachO as macho
8from collections import namedtuple
9import six
10import uuid
11import sys
12
13
14#
15# Defines segment/section descriptions that can be used by external users
16# like kext management to keep track about memory layout. To avoid the need
17# to keep full Mach-O instance around.
18#
19
20MachOSegment = namedtuple(
21    'MachOSegment',
22    'name vmaddr vmsize fileoff filesize sections'
23)
24
25MachOSection = namedtuple(
26    'MachOSection',
27    'sectname addr size fileoff'
28)
29
30
31#
32# The Mach-O library loads data for each section in a Mach-O.
33# This breaks our macros in few ways:
34#   - It is slow and no one is really using it.
35#   - File offsets in fileset KC points outside of the file window reported
36#     by OSkext API.
37#
38# Until macholib gets some optin to avoid reading section data we have to
39# patch it here.
40#
41# !!! Note. This works only with the latest lib 1.15.1 !!!
42
43if macholib.__version__ == "1.15.2":
44    from macholib.mach_o import (
45        LC_ID_DYLIB,
46        LC_REGISTRY,
47        LC_SEGMENT,
48        LC_SEGMENT_64,
49        S_ZEROFILL,
50        load_command,
51        section,
52        section_64,
53    )
54    from macholib.ptypes import sizeof
55    from macholib.util import fileview
56
57
58    # !!! This is the actual patch for macholib 1.15.2 !!!
59    #
60    #   1. MemMachOHeader subclasses macho.MachOHeader
61    #   2. Overloaded load() method is copy/paste of the original load() with
62    #      small patch added that disables section contents loading.
63    #   3. The new MemMachOHeader is injected back into library and used
64    #      in place of macho.MachOHeader.
65    #
66    # This code should not ever exist in the first place. So the plan is to
67    # remove it when macholib gets improved or abandoned by our own
68    # implementation.
69    class MemMachOHeader(macho.MachOHeader):
70        """ Mach-O header parser that does not try to load section data. """
71
72        def load(self, fh):
73            fh = fileview(fh, self.offset, self.size)
74            fh.seek(0)
75
76            self.sizediff = 0
77            kw = {"_endian_": self.endian}
78            header = self.mach_header.from_fileobj(fh, **kw)
79            self.header = header
80            # If header.magic != self.MH_MAGIC:
81            #    raise ValueError("header has magic %08x, expecting %08x" % (
82            #        header.magic, self.MH_MAGIC))
83
84            cmd = self.commands = []
85
86            self.filetype = self.get_filetype_shortname(header.filetype)
87
88            read_bytes = 0
89            low_offset = sys.maxsize
90            for i in range(header.ncmds):
91                # read the load command
92                cmd_load = load_command.from_fileobj(fh, **kw)
93
94                # read the specific command
95                klass = LC_REGISTRY.get(cmd_load.cmd, None)
96                if klass is None:
97                    raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
98                cmd_cmd = klass.from_fileobj(fh, **kw)
99
100                if cmd_load.cmd == LC_ID_DYLIB:
101                    # remember where this command was
102                    if self.id_cmd is not None:
103                        raise ValueError("This dylib already has an id")
104                    self.id_cmd = i
105
106                if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
107                    # for segment commands, read the list of segments
108                    segs = []
109                    # assert that the size makes sense
110                    if cmd_load.cmd == LC_SEGMENT:
111                        section_cls = section
112                    else:  # LC_SEGMENT_64
113                        section_cls = section_64
114
115                    expected_size = (
116                        sizeof(klass)
117                        + sizeof(load_command)
118                        + (sizeof(section_cls) * cmd_cmd.nsects)
119                    )
120                    if cmd_load.cmdsize != expected_size:
121                        raise ValueError("Segment size mismatch")
122                    # this is a zero block or something
123                    # so the beginning is wherever the fileoff of this command is
124                    if cmd_cmd.nsects == 0:
125                        if cmd_cmd.filesize != 0:
126                            low_offset = min(low_offset, cmd_cmd.fileoff)
127                    else:
128                        # this one has multiple segments
129                        for _j in range(cmd_cmd.nsects):
130                            # read the segment
131                            seg = section_cls.from_fileobj(fh, **kw)
132                            # If the segment has a size and is not zero filled
133                            # then its beginning is the offset of this segment
134                            not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL
135                            if seg.offset > 0 and seg.size > 0 and not_zerofill:
136                                low_offset = min(low_offset, seg.offset)
137
138                            # Do NOT read section data. It is not required and
139                            # does not work well with filset KC offsets.
140                            """
141                            if not_zerofill:
142                                c = fh.tell()
143                                fh.seek(seg.offset)
144                                sd = fh.read(seg.size)
145                                seg.add_section_data(sd)
146                                fh.seek(c)
147                            """
148                            segs.append(seg)
149                    # data is a list of segments
150                    cmd_data = segs
151
152                else:
153                    # data is a raw str
154                    data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
155                    cmd_data = fh.read(data_size)
156                cmd.append((cmd_load, cmd_cmd, cmd_data))
157                read_bytes += cmd_load.cmdsize
158
159            # make sure the header made sense
160            if read_bytes != header.sizeofcmds:
161                raise ValueError(
162                    "Read %d bytes, header reports %d bytes"
163                    % (read_bytes, header.sizeofcmds)
164                )
165            self.total_size = sizeof(self.mach_header) + read_bytes
166            self.low_offset = low_offset
167
168
169    # Patch the library to use our own header class instead.
170    macho.MachOHeader = MemMachOHeader
171
172
173class MemMachO(macho.MachO):
174    """ Mach-O implementation that accepts I/O stream instead of file. """
175
176    def __init__(self, file):
177        """ Creates Mach-O parser on top of provided I/O. """
178
179        # Figured out file size from the I/O.
180        file.seek(0, 2)
181        size = file.tell()
182        file.seek(0, 0)
183
184        # supports the ObjectGraph protocol
185        self.graphident = 'mem:%d//'.format(size)
186        self.filename = 'mem:%d//'.format(size)
187        self.loader_path = "<no-path>"
188
189        # initialized by load
190        self.fat = None
191        self.headers = []
192
193        self.load(file)
194
195    @staticmethod
196    def make_seg(seg, sects):
197        """ Constructs MachOSegment from input. """
198
199        # Wrap all sections in MachOSection tuple.
200        segsec = [
201            MachOSection(
202                sectname = six.ensure_str(s.segname[:s.segname.find(b'\x00')]),
203                addr = s.addr,
204                fileoff = s.offset,
205                size = s.size
206            )
207            for s in sects
208        ]
209
210        # Return MachOSegment
211        return MachOSegment(
212            name=six.ensure_str(seg.segname[:seg.segname.find(b'\x00')]),
213            vmaddr = seg.vmaddr,
214            vmsize = seg.vmsize,
215            fileoff = seg.fileoff,
216            filesize = seg.filesize,
217            sections = segsec
218        )
219
220    @property
221    def segments(self):
222        """ Constructs section/segment descriptors.
223
224            Values are cached in an instance attribute.
225        """
226        if hasattr(self, '_segments'):
227            return self._segments
228
229        # Wrap all segments/sections into a MachOSegment/MachOSection.
230        self._segments = [
231            self.make_seg(seg, sec)
232            for h in self.headers
233            for _, seg, sec in h.commands
234            if isinstance(seg, SEGMENT_TYPES)
235        ]
236
237        return self._segments
238
239    @property
240    def uuid(self):
241        """ Returns UUID of the Mach-O. """
242        if hasattr(self, '_uuid'):
243            return self._uuid
244
245        for h in self.headers:
246            for cmd in h.commands:
247                # cmds is [(load_command, segment, [sections..])]
248                (_, segment, _) = cmd
249                if isinstance(segment, macholib.mach_o.uuid_command):
250                    self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper()
251        return self._uuid
252
253
254# some fixups in macholib that are required for kext support
255macholib.mach_o.MH_KEXT_BUNDLE = 0xB
256
257macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle"
258macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext"
259
260SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command)
261
262if six.PY3:
263    # Use newer macholib interface on Python 3.
264    def get_load_command_human_name(lc):
265        return lc.get_cmd_name()
266else:
267    def get_load_command_human_name(lc):
268        """ return string name of LC_LOAD_DYLIB => "load_dylib"
269            "<unknown>" if not found
270        """
271        retval = "<unknown>"
272        if lc.cmd in macho.LC_REGISTRY:
273            retval = macho.LC_REGISTRY[lc.cmd].__name__
274            retval = retval.replace("_command", "")
275        return retval
276
277
278class VisualMachoMap(object):
279    KB_1 = 1024
280    KB_16 = 16 * 1024
281    MB_1 = 1 * 1024 * 1024
282    GB_1 = 1 * 1024 * 1024 * 1024
283
284    def __init__(self, name, width=40):
285        self.name = name
286        self.width = 40
287        self.default_side_padding = 2
288
289    def get_header_line(self):
290        return '+' + '-' * (self.width - 2) + '+'
291
292    def get_space_line(self):
293        return '|' + ' ' * (self.width - 2) + '|'
294
295    def get_dashed_line(self):
296        return '|' + '-' * (self.width - 2) + '|'
297
298    def get_dotted_line(self):
299        return '|' + '.' * (self.width - 2) + '|'
300
301    def center_text_in_line(self, line, text):
302        even_length = bool(len(text) % 2 == 0)
303        if len(text) > len(line) - 2:
304            raise ValueError("text is larger than line of text")
305
306        lbreak_pos = (len(line) // 2) - (len(text) // 2)
307        if not even_length:
308            lbreak_pos -= 1
309        out = line[:lbreak_pos] + text
310        return out + line[len(out):]
311
312    def get_separator_lines(self):
313        return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/']
314
315    def printMachoMap(self, mobj):
316        MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment')
317        outstr = self.name + '\n'
318        other_cmds = ''
319        blocks = []
320        for hdr in mobj.headers:
321            cmd_index = 0
322            for cmd in hdr.commands:
323                # cmds is [(load_command, segment, [sections..])]
324                (lc, segment, sections) = cmd
325                lc_cmd_str = get_load_command_human_name(lc)
326                lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections))
327                # print lc_str_rep
328                if isinstance(segment, SEGMENT_TYPES):
329                    segname = six.ensure_str(segment.segname[:segment.segname.find(b'\x00')])
330                    # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format(
331                    #             segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize)
332                    blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize,
333                                            ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot),
334                                            True))
335                    for section in sections:
336                        section_name = six.ensure_str(section.sectname[:section.sectname.find(b'\x00')])
337                        blocks.append(MapBlock(section_name, section.addr, section.size, section.offset,
338                                                section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False))
339                        #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size)
340                elif isinstance(segment, macholib.mach_o.uuid_command):
341                    other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper())
342                elif isinstance(segment, macholib.mach_o.rpath_command):
343                    other_cmds += "\n\t rpath: {:s}".format(segment.path)
344                elif isinstance(segment, macholib.mach_o.dylib_command):
345                    other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version))
346                else:
347                    other_cmds += lc_str_rep
348                cmd_index += 1
349
350        # fixup the self.width param
351        for _b in blocks:
352            if self.default_side_padding + len(_b.name) + 2 > self.width:
353                self.width = self.default_side_padding + len(_b.name) + 2
354        if self.width % 2 != 0:
355            self.width += 1
356
357        sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr)
358        mstr = [self.get_header_line()]
359        prev_block = MapBlock('', 0, 0, 0, 0, '', False)
360        for b in sorted_blocks:
361            # TODO add separator blocks if vmaddr is large from prev_block
362            if b.is_segment:
363                s = self.get_dashed_line()
364            else:
365                s = self.get_dotted_line()
366            s = self.center_text_in_line(s, b.name)
367            line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x}  {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info)
368            if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16:
369                mstr.append(self.get_space_line())
370                mstr.append(self.get_space_line())
371
372            mstr.append(line)
373
374            if b.vmsize > VisualMachoMap.MB_1:
375                mstr.append(self.get_space_line())
376                mstr.extend(self.get_separator_lines())
377                mstr.append(self.get_space_line())
378            #mstr.append(self.get_space_line())
379            prev_block = b
380        mstr.append(self.get_space_line())
381        if prev_block.vmsize > VisualMachoMap.KB_16:
382            mstr.append(self.get_space_line())
383        mstr.append(self.get_header_line())
384        print(outstr)
385        print("\n".join(mstr))
386        print("\n\n=============== Other Load Commands ===============")
387        print(other_cmds)
388
389
390if __name__ == '__main__':
391    import sys
392    if len(sys.argv) < 2:
393        print("Usage: {} /path/to/macho_binary".format(sys.argv[0]))
394        sys.exit(1)
395    with open(sys.argv[-1], 'rb') as fp:
396        mobject = MemMachO(fp)
397
398        p = VisualMachoMap(sys.argv[-1])
399        p.printMachoMap(mobject)
400    sys.exit(0)
401