1from __future__ import absolute_import, division, print_function 2 3from builtins import range 4from builtins import object 5 6import macholib 7from macholib import MachO as macho 8from collections import namedtuple 9import six 10import uuid 11import sys 12 13 14# 15# Defines segment/section descriptions that can be used by external users 16# like kext management to keep track about memory layout. To avoid the need 17# to keep full Mach-O instance around. 18# 19 20MachOSegment = namedtuple( 21 'MachOSegment', 22 'name vmaddr vmsize fileoff filesize sections' 23) 24 25MachOSection = namedtuple( 26 'MachOSection', 27 'sectname addr size fileoff' 28) 29 30 31# 32# The Mach-O library loads data for each section in a Mach-O. 33# This breaks our macros in few ways: 34# - It is slow and no one is really using it. 35# - File offsets in fileset KC points outside of the file window reported 36# by OSkext API. 37# 38# Until macholib gets some optin to avoid reading section data we have to 39# patch it here. 40# 41# !!! Note. This works only with the latest lib 1.15.1 !!! 42 43if macholib.__version__ == "1.15.2": 44 from macholib.mach_o import ( 45 LC_ID_DYLIB, 46 LC_REGISTRY, 47 LC_SEGMENT, 48 LC_SEGMENT_64, 49 S_ZEROFILL, 50 load_command, 51 section, 52 section_64, 53 ) 54 from macholib.ptypes import sizeof 55 from macholib.util import fileview 56 57 58 # !!! This is the actual patch for macholib 1.15.2 !!! 59 # 60 # 1. MemMachOHeader subclasses macho.MachOHeader 61 # 2. Overloaded load() method is copy/paste of the original load() with 62 # small patch added that disables section contents loading. 63 # 3. The new MemMachOHeader is injected back into library and used 64 # in place of macho.MachOHeader. 65 # 66 # This code should not ever exist in the first place. So the plan is to 67 # remove it when macholib gets improved or abandoned by our own 68 # implementation. 69 class MemMachOHeader(macho.MachOHeader): 70 """ Mach-O header parser that does not try to load section data. """ 71 72 def load(self, fh): 73 fh = fileview(fh, self.offset, self.size) 74 fh.seek(0) 75 76 self.sizediff = 0 77 kw = {"_endian_": self.endian} 78 header = self.mach_header.from_fileobj(fh, **kw) 79 self.header = header 80 # If header.magic != self.MH_MAGIC: 81 # raise ValueError("header has magic %08x, expecting %08x" % ( 82 # header.magic, self.MH_MAGIC)) 83 84 cmd = self.commands = [] 85 86 self.filetype = self.get_filetype_shortname(header.filetype) 87 88 read_bytes = 0 89 low_offset = sys.maxsize 90 for i in range(header.ncmds): 91 # read the load command 92 cmd_load = load_command.from_fileobj(fh, **kw) 93 94 # read the specific command 95 klass = LC_REGISTRY.get(cmd_load.cmd, None) 96 if klass is None: 97 raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) 98 cmd_cmd = klass.from_fileobj(fh, **kw) 99 100 if cmd_load.cmd == LC_ID_DYLIB: 101 # remember where this command was 102 if self.id_cmd is not None: 103 raise ValueError("This dylib already has an id") 104 self.id_cmd = i 105 106 if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): 107 # for segment commands, read the list of segments 108 segs = [] 109 # assert that the size makes sense 110 if cmd_load.cmd == LC_SEGMENT: 111 section_cls = section 112 else: # LC_SEGMENT_64 113 section_cls = section_64 114 115 expected_size = ( 116 sizeof(klass) 117 + sizeof(load_command) 118 + (sizeof(section_cls) * cmd_cmd.nsects) 119 ) 120 if cmd_load.cmdsize != expected_size: 121 raise ValueError("Segment size mismatch") 122 # this is a zero block or something 123 # so the beginning is wherever the fileoff of this command is 124 if cmd_cmd.nsects == 0: 125 if cmd_cmd.filesize != 0: 126 low_offset = min(low_offset, cmd_cmd.fileoff) 127 else: 128 # this one has multiple segments 129 for _j in range(cmd_cmd.nsects): 130 # read the segment 131 seg = section_cls.from_fileobj(fh, **kw) 132 # If the segment has a size and is not zero filled 133 # then its beginning is the offset of this segment 134 not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL 135 if seg.offset > 0 and seg.size > 0 and not_zerofill: 136 low_offset = min(low_offset, seg.offset) 137 138 # Do NOT read section data. It is not required and 139 # does not work well with filset KC offsets. 140 """ 141 if not_zerofill: 142 c = fh.tell() 143 fh.seek(seg.offset) 144 sd = fh.read(seg.size) 145 seg.add_section_data(sd) 146 fh.seek(c) 147 """ 148 segs.append(seg) 149 # data is a list of segments 150 cmd_data = segs 151 152 else: 153 # data is a raw str 154 data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) 155 cmd_data = fh.read(data_size) 156 cmd.append((cmd_load, cmd_cmd, cmd_data)) 157 read_bytes += cmd_load.cmdsize 158 159 # make sure the header made sense 160 if read_bytes != header.sizeofcmds: 161 raise ValueError( 162 "Read %d bytes, header reports %d bytes" 163 % (read_bytes, header.sizeofcmds) 164 ) 165 self.total_size = sizeof(self.mach_header) + read_bytes 166 self.low_offset = low_offset 167 168 169 # Patch the library to use our own header class instead. 170 macho.MachOHeader = MemMachOHeader 171 172 173class MemMachO(macho.MachO): 174 """ Mach-O implementation that accepts I/O stream instead of file. """ 175 176 def __init__(self, file): 177 """ Creates Mach-O parser on top of provided I/O. """ 178 179 # Figured out file size from the I/O. 180 file.seek(0, 2) 181 size = file.tell() 182 file.seek(0, 0) 183 184 # supports the ObjectGraph protocol 185 self.graphident = 'mem:%d//'.format(size) 186 self.filename = 'mem:%d//'.format(size) 187 self.loader_path = "<no-path>" 188 189 # initialized by load 190 self.fat = None 191 self.headers = [] 192 193 self.load(file) 194 195 @staticmethod 196 def make_seg(seg, sects): 197 """ Constructs MachOSegment from input. """ 198 199 # Wrap all sections in MachOSection tuple. 200 segsec = [ 201 MachOSection( 202 sectname = six.ensure_str(s.segname[:s.segname.find(b'\x00')]), 203 addr = s.addr, 204 fileoff = s.offset, 205 size = s.size 206 ) 207 for s in sects 208 ] 209 210 # Return MachOSegment 211 return MachOSegment( 212 name=six.ensure_str(seg.segname[:seg.segname.find(b'\x00')]), 213 vmaddr = seg.vmaddr, 214 vmsize = seg.vmsize, 215 fileoff = seg.fileoff, 216 filesize = seg.filesize, 217 sections = segsec 218 ) 219 220 @property 221 def segments(self): 222 """ Constructs section/segment descriptors. 223 224 Values are cached in an instance attribute. 225 """ 226 if hasattr(self, '_segments'): 227 return self._segments 228 229 # Wrap all segments/sections into a MachOSegment/MachOSection. 230 self._segments = [ 231 self.make_seg(seg, sec) 232 for h in self.headers 233 for _, seg, sec in h.commands 234 if isinstance(seg, SEGMENT_TYPES) 235 ] 236 237 return self._segments 238 239 @property 240 def uuid(self): 241 """ Returns UUID of the Mach-O. """ 242 if hasattr(self, '_uuid'): 243 return self._uuid 244 245 for h in self.headers: 246 for cmd in h.commands: 247 # cmds is [(load_command, segment, [sections..])] 248 (_, segment, _) = cmd 249 if isinstance(segment, macholib.mach_o.uuid_command): 250 self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper() 251 return self._uuid 252 253 254# some fixups in macholib that are required for kext support 255macholib.mach_o.MH_KEXT_BUNDLE = 0xB 256 257macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle" 258macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext" 259 260SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command) 261 262if six.PY3: 263 # Use newer macholib interface on Python 3. 264 def get_load_command_human_name(lc): 265 return lc.get_cmd_name() 266else: 267 def get_load_command_human_name(lc): 268 """ return string name of LC_LOAD_DYLIB => "load_dylib" 269 "<unknown>" if not found 270 """ 271 retval = "<unknown>" 272 if lc.cmd in macho.LC_REGISTRY: 273 retval = macho.LC_REGISTRY[lc.cmd].__name__ 274 retval = retval.replace("_command", "") 275 return retval 276 277 278class VisualMachoMap(object): 279 KB_1 = 1024 280 KB_16 = 16 * 1024 281 MB_1 = 1 * 1024 * 1024 282 GB_1 = 1 * 1024 * 1024 * 1024 283 284 def __init__(self, name, width=40): 285 self.name = name 286 self.width = 40 287 self.default_side_padding = 2 288 289 def get_header_line(self): 290 return '+' + '-' * (self.width - 2) + '+' 291 292 def get_space_line(self): 293 return '|' + ' ' * (self.width - 2) + '|' 294 295 def get_dashed_line(self): 296 return '|' + '-' * (self.width - 2) + '|' 297 298 def get_dotted_line(self): 299 return '|' + '.' * (self.width - 2) + '|' 300 301 def center_text_in_line(self, line, text): 302 even_length = bool(len(text) % 2 == 0) 303 if len(text) > len(line) - 2: 304 raise ValueError("text is larger than line of text") 305 306 lbreak_pos = (len(line) // 2) - (len(text) // 2) 307 if not even_length: 308 lbreak_pos -= 1 309 out = line[:lbreak_pos] + text 310 return out + line[len(out):] 311 312 def get_separator_lines(self): 313 return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/'] 314 315 def printMachoMap(self, mobj): 316 MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment') 317 outstr = self.name + '\n' 318 other_cmds = '' 319 blocks = [] 320 for hdr in mobj.headers: 321 cmd_index = 0 322 for cmd in hdr.commands: 323 # cmds is [(load_command, segment, [sections..])] 324 (lc, segment, sections) = cmd 325 lc_cmd_str = get_load_command_human_name(lc) 326 lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections)) 327 # print lc_str_rep 328 if isinstance(segment, SEGMENT_TYPES): 329 segname = six.ensure_str(segment.segname[:segment.segname.find(b'\x00')]) 330 # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format( 331 # segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize) 332 blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize, 333 ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot), 334 True)) 335 for section in sections: 336 section_name = six.ensure_str(section.sectname[:section.sectname.find(b'\x00')]) 337 blocks.append(MapBlock(section_name, section.addr, section.size, section.offset, 338 section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False)) 339 #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size) 340 elif isinstance(segment, macholib.mach_o.uuid_command): 341 other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper()) 342 elif isinstance(segment, macholib.mach_o.rpath_command): 343 other_cmds += "\n\t rpath: {:s}".format(segment.path) 344 elif isinstance(segment, macholib.mach_o.dylib_command): 345 other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version)) 346 else: 347 other_cmds += lc_str_rep 348 cmd_index += 1 349 350 # fixup the self.width param 351 for _b in blocks: 352 if self.default_side_padding + len(_b.name) + 2 > self.width: 353 self.width = self.default_side_padding + len(_b.name) + 2 354 if self.width % 2 != 0: 355 self.width += 1 356 357 sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr) 358 mstr = [self.get_header_line()] 359 prev_block = MapBlock('', 0, 0, 0, 0, '', False) 360 for b in sorted_blocks: 361 # TODO add separator blocks if vmaddr is large from prev_block 362 if b.is_segment: 363 s = self.get_dashed_line() 364 else: 365 s = self.get_dotted_line() 366 s = self.center_text_in_line(s, b.name) 367 line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x} {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info) 368 if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16: 369 mstr.append(self.get_space_line()) 370 mstr.append(self.get_space_line()) 371 372 mstr.append(line) 373 374 if b.vmsize > VisualMachoMap.MB_1: 375 mstr.append(self.get_space_line()) 376 mstr.extend(self.get_separator_lines()) 377 mstr.append(self.get_space_line()) 378 #mstr.append(self.get_space_line()) 379 prev_block = b 380 mstr.append(self.get_space_line()) 381 if prev_block.vmsize > VisualMachoMap.KB_16: 382 mstr.append(self.get_space_line()) 383 mstr.append(self.get_header_line()) 384 print(outstr) 385 print("\n".join(mstr)) 386 print("\n\n=============== Other Load Commands ===============") 387 print(other_cmds) 388 389 390if __name__ == '__main__': 391 import sys 392 if len(sys.argv) < 2: 393 print("Usage: {} /path/to/macho_binary".format(sys.argv[0])) 394 sys.exit(1) 395 with open(sys.argv[-1], 'rb') as fp: 396 mobject = MemMachO(fp) 397 398 p = VisualMachoMap(sys.argv[-1]) 399 p.printMachoMap(mobject) 400 sys.exit(0) 401