xref: /xnu-11215.81.4/tools/lldbmacros/macho.py (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1import macholib
2from macholib import MachO as macho
3from collections import namedtuple
4import uuid
5import sys
6
7
8#
9# Defines segment/section descriptions that can be used by external users
10# like kext management to keep track about memory layout. To avoid the need
11# to keep full Mach-O instance around.
12#
13
14MachOSegment = namedtuple(
15    'MachOSegment',
16    'name vmaddr vmsize fileoff filesize sections'
17)
18
19MachOSection = namedtuple(
20    'MachOSection',
21    'sectname addr size fileoff'
22)
23
24
25#
26# The Mach-O library loads data for each section in a Mach-O.
27# This breaks our macros in few ways:
28#   - It is slow and no one is really using it.
29#   - File offsets in fileset KC points outside of the file window reported
30#     by OSkext API.
31#
32# Until macholib gets some optin to avoid reading section data we have to
33# patch it here.
34#
35# !!! Note. This works only with the latest lib 1.15.1 !!!
36
37if macholib.__version__ == "1.15.2":
38    from macholib.mach_o import (
39        LC_ID_DYLIB,
40        LC_REGISTRY,
41        LC_SEGMENT,
42        LC_SEGMENT_64,
43        S_ZEROFILL,
44        load_command,
45        section,
46        section_64,
47    )
48    from macholib.ptypes import sizeof
49    from macholib.util import fileview
50
51
52    # !!! This is the actual patch for macholib 1.15.2 !!!
53    #
54    #   1. MemMachOHeader subclasses macho.MachOHeader
55    #   2. Overloaded load() method is copy/paste of the original load() with
56    #      small patch added that disables section contents loading.
57    #   3. The new MemMachOHeader is injected back into library and used
58    #      in place of macho.MachOHeader.
59    #
60    # This code should not ever exist in the first place. So the plan is to
61    # remove it when macholib gets improved or abandoned by our own
62    # implementation.
63    class MemMachOHeader(macho.MachOHeader):
64        """ Mach-O header parser that does not try to load section data. """
65
66        def load(self, fh):
67            fh = fileview(fh, self.offset, self.size)
68            fh.seek(0)
69
70            self.sizediff = 0
71            kw = {"_endian_": self.endian}
72            header = self.mach_header.from_fileobj(fh, **kw)
73            self.header = header
74            # If header.magic != self.MH_MAGIC:
75            #    raise ValueError("header has magic %08x, expecting %08x" % (
76            #        header.magic, self.MH_MAGIC))
77
78            cmd = self.commands = []
79
80            self.filetype = self.get_filetype_shortname(header.filetype)
81
82            read_bytes = 0
83            low_offset = sys.maxsize
84            for i in range(header.ncmds):
85                # read the load command
86                cmd_load = load_command.from_fileobj(fh, **kw)
87
88                # read the specific command
89                klass = LC_REGISTRY.get(cmd_load.cmd, None)
90                if klass is None:
91                    raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
92                cmd_cmd = klass.from_fileobj(fh, **kw)
93
94                if cmd_load.cmd == LC_ID_DYLIB:
95                    # remember where this command was
96                    if self.id_cmd is not None:
97                        raise ValueError("This dylib already has an id")
98                    self.id_cmd = i
99
100                if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
101                    # for segment commands, read the list of segments
102                    segs = []
103                    # assert that the size makes sense
104                    if cmd_load.cmd == LC_SEGMENT:
105                        section_cls = section
106                    else:  # LC_SEGMENT_64
107                        section_cls = section_64
108
109                    expected_size = (
110                        sizeof(klass)
111                        + sizeof(load_command)
112                        + (sizeof(section_cls) * cmd_cmd.nsects)
113                    )
114                    if cmd_load.cmdsize != expected_size:
115                        raise ValueError("Segment size mismatch")
116                    # this is a zero block or something
117                    # so the beginning is wherever the fileoff of this command is
118                    if cmd_cmd.nsects == 0:
119                        if cmd_cmd.filesize != 0:
120                            low_offset = min(low_offset, cmd_cmd.fileoff)
121                    else:
122                        # this one has multiple segments
123                        for _j in range(cmd_cmd.nsects):
124                            # read the segment
125                            seg = section_cls.from_fileobj(fh, **kw)
126                            # If the segment has a size and is not zero filled
127                            # then its beginning is the offset of this segment
128                            not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL
129                            if seg.offset > 0 and seg.size > 0 and not_zerofill:
130                                low_offset = min(low_offset, seg.offset)
131
132                            # Do NOT read section data. It is not required and
133                            # does not work well with filset KC offsets.
134                            """
135                            if not_zerofill:
136                                c = fh.tell()
137                                fh.seek(seg.offset)
138                                sd = fh.read(seg.size)
139                                seg.add_section_data(sd)
140                                fh.seek(c)
141                            """
142                            segs.append(seg)
143                    # data is a list of segments
144                    cmd_data = segs
145
146                else:
147                    # data is a raw str
148                    data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
149                    cmd_data = fh.read(data_size)
150                cmd.append((cmd_load, cmd_cmd, cmd_data))
151                read_bytes += cmd_load.cmdsize
152
153            # make sure the header made sense
154            if read_bytes != header.sizeofcmds:
155                raise ValueError(
156                    "Read %d bytes, header reports %d bytes"
157                    % (read_bytes, header.sizeofcmds)
158                )
159            self.total_size = sizeof(self.mach_header) + read_bytes
160            self.low_offset = low_offset
161
162
163    # Patch the library to use our own header class instead.
164    macho.MachOHeader = MemMachOHeader
165
166
167class MemMachO(macho.MachO):
168    """ Mach-O implementation that accepts I/O stream instead of file. """
169
170    def __init__(self, file):
171        """ Creates Mach-O parser on top of provided I/O. """
172
173        # Figured out file size from the I/O.
174        file.seek(0, 2)
175        size = file.tell()
176        file.seek(0, 0)
177
178        # supports the ObjectGraph protocol
179        self.graphident = 'mem:%d//'.format(size)
180        self.filename = 'mem:%d//'.format(size)
181        self.loader_path = "<no-path>"
182
183        # initialized by load
184        self.fat = None
185        self.headers = []
186
187        self.load(file)
188
189    @staticmethod
190    def make_seg(seg, sects):
191        """ Constructs MachOSegment from input. """
192
193        # Wrap all sections in MachOSection tuple.
194        segsec = [
195            MachOSection(
196                sectname = s.segname[:s.segname.find(b'\x00')].decode(),
197                addr = s.addr,
198                fileoff = s.offset,
199                size = s.size
200            )
201            for s in sects
202        ]
203
204        # Return MachOSegment
205        return MachOSegment(
206            name=seg.segname[:seg.segname.find(b'\x00')].decode(),
207            vmaddr = seg.vmaddr,
208            vmsize = seg.vmsize,
209            fileoff = seg.fileoff,
210            filesize = seg.filesize,
211            sections = segsec
212        )
213
214    @property
215    def segments(self):
216        """ Constructs section/segment descriptors.
217
218            Values are cached in an instance attribute.
219        """
220        if hasattr(self, '_segments'):
221            return self._segments
222
223        # Wrap all segments/sections into a MachOSegment/MachOSection.
224        self._segments = [
225            self.make_seg(seg, sec)
226            for h in self.headers
227            for _, seg, sec in h.commands
228            if isinstance(seg, SEGMENT_TYPES)
229        ]
230
231        return self._segments
232
233    @property
234    def uuid(self):
235        """ Returns UUID of the Mach-O. """
236        if hasattr(self, '_uuid'):
237            return self._uuid
238
239        for h in self.headers:
240            for cmd in h.commands:
241                # cmds is [(load_command, segment, [sections..])]
242                (_, segment, _) = cmd
243                if isinstance(segment, macholib.mach_o.uuid_command):
244                    self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper()
245        return self._uuid
246
247
248# some fixups in macholib that are required for kext support
249macholib.mach_o.MH_KEXT_BUNDLE = 0xB
250
251macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle"
252macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext"
253
254SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command)
255
256def get_load_command_human_name(lc):
257    return lc.get_cmd_name()
258
259
260class VisualMachoMap(object):
261    KB_1 = 1024
262    KB_16 = 16 * 1024
263    MB_1 = 1 * 1024 * 1024
264    GB_1 = 1 * 1024 * 1024 * 1024
265
266    def __init__(self, name, width=40):
267        self.name = name
268        self.width = 40
269        self.default_side_padding = 2
270
271    def get_header_line(self):
272        return '+' + '-' * (self.width - 2) + '+'
273
274    def get_space_line(self):
275        return '|' + ' ' * (self.width - 2) + '|'
276
277    def get_dashed_line(self):
278        return '|' + '-' * (self.width - 2) + '|'
279
280    def get_dotted_line(self):
281        return '|' + '.' * (self.width - 2) + '|'
282
283    def center_text_in_line(self, line, text):
284        even_length = bool(len(text) % 2 == 0)
285        if len(text) > len(line) - 2:
286            raise ValueError("text is larger than line of text")
287
288        lbreak_pos = (len(line) // 2) - (len(text) // 2)
289        if not even_length:
290            lbreak_pos -= 1
291        out = line[:lbreak_pos] + text
292        return out + line[len(out):]
293
294    def get_separator_lines(self):
295        return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/']
296
297    def printMachoMap(self, mobj):
298        MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment')
299        outstr = self.name + '\n'
300        other_cmds = ''
301        blocks = []
302        for hdr in mobj.headers:
303            cmd_index = 0
304            for cmd in hdr.commands:
305                # cmds is [(load_command, segment, [sections..])]
306                (lc, segment, sections) = cmd
307                lc_cmd_str = get_load_command_human_name(lc)
308                lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections))
309                # print lc_str_rep
310                if isinstance(segment, SEGMENT_TYPES):
311                    segname = segment.segname[:segment.segname.find(b'\x00')].decode()
312                    # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format(
313                    #             segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize)
314                    blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize,
315                                            ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot),
316                                            True))
317                    for section in sections:
318                        section_name = section.sectname[:section.sectname.find(b'\x00')].decode()
319                        blocks.append(MapBlock(section_name, section.addr, section.size, section.offset,
320                                                section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False))
321                        #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size)
322                elif isinstance(segment, macholib.mach_o.uuid_command):
323                    other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper())
324                elif isinstance(segment, macholib.mach_o.rpath_command):
325                    other_cmds += "\n\t rpath: {:s}".format(segment.path)
326                elif isinstance(segment, macholib.mach_o.dylib_command):
327                    other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version))
328                else:
329                    other_cmds += lc_str_rep
330                cmd_index += 1
331
332        # fixup the self.width param
333        for _b in blocks:
334            if self.default_side_padding + len(_b.name) + 2 > self.width:
335                self.width = self.default_side_padding + len(_b.name) + 2
336        if self.width % 2 != 0:
337            self.width += 1
338
339        sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr)
340        mstr = [self.get_header_line()]
341        prev_block = MapBlock('', 0, 0, 0, 0, '', False)
342        for b in sorted_blocks:
343            # TODO add separator blocks if vmaddr is large from prev_block
344            if b.is_segment:
345                s = self.get_dashed_line()
346            else:
347                s = self.get_dotted_line()
348            s = self.center_text_in_line(s, b.name)
349            line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x}  {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info)
350            if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16:
351                mstr.append(self.get_space_line())
352                mstr.append(self.get_space_line())
353
354            mstr.append(line)
355
356            if b.vmsize > VisualMachoMap.MB_1:
357                mstr.append(self.get_space_line())
358                mstr.extend(self.get_separator_lines())
359                mstr.append(self.get_space_line())
360            #mstr.append(self.get_space_line())
361            prev_block = b
362        mstr.append(self.get_space_line())
363        if prev_block.vmsize > VisualMachoMap.KB_16:
364            mstr.append(self.get_space_line())
365        mstr.append(self.get_header_line())
366        print(outstr)
367        print("\n".join(mstr))
368        print("\n\n=============== Other Load Commands ===============")
369        print(other_cmds)
370
371
372if __name__ == '__main__':
373    import sys
374    if len(sys.argv) < 2:
375        print("Usage: {} /path/to/macho_binary".format(sys.argv[0]))
376        sys.exit(1)
377    with open(sys.argv[-1], 'rb') as fp:
378        mobject = MemMachO(fp)
379
380        p = VisualMachoMap(sys.argv[-1])
381        p.printMachoMap(mobject)
382    sys.exit(0)
383