1import macholib 2from macholib import MachO as macho 3from collections import namedtuple 4import uuid 5import sys 6 7 8# 9# Defines segment/section descriptions that can be used by external users 10# like kext management to keep track about memory layout. To avoid the need 11# to keep full Mach-O instance around. 12# 13 14MachOSegment = namedtuple( 15 'MachOSegment', 16 'name vmaddr vmsize fileoff filesize sections' 17) 18 19MachOSection = namedtuple( 20 'MachOSection', 21 'sectname addr size fileoff' 22) 23 24 25# 26# The Mach-O library loads data for each section in a Mach-O. 27# This breaks our macros in few ways: 28# - It is slow and no one is really using it. 29# - File offsets in fileset KC points outside of the file window reported 30# by OSkext API. 31# 32# Until macholib gets some optin to avoid reading section data we have to 33# patch it here. 34# 35# !!! Note. This works only with the latest lib 1.15.1 !!! 36 37if macholib.__version__ == "1.15.2": 38 from macholib.mach_o import ( 39 LC_ID_DYLIB, 40 LC_REGISTRY, 41 LC_SEGMENT, 42 LC_SEGMENT_64, 43 S_ZEROFILL, 44 load_command, 45 section, 46 section_64, 47 ) 48 from macholib.ptypes import sizeof 49 from macholib.util import fileview 50 51 52 # !!! This is the actual patch for macholib 1.15.2 !!! 53 # 54 # 1. MemMachOHeader subclasses macho.MachOHeader 55 # 2. Overloaded load() method is copy/paste of the original load() with 56 # small patch added that disables section contents loading. 57 # 3. The new MemMachOHeader is injected back into library and used 58 # in place of macho.MachOHeader. 59 # 60 # This code should not ever exist in the first place. So the plan is to 61 # remove it when macholib gets improved or abandoned by our own 62 # implementation. 63 class MemMachOHeader(macho.MachOHeader): 64 """ Mach-O header parser that does not try to load section data. """ 65 66 def load(self, fh): 67 fh = fileview(fh, self.offset, self.size) 68 fh.seek(0) 69 70 self.sizediff = 0 71 kw = {"_endian_": self.endian} 72 header = self.mach_header.from_fileobj(fh, **kw) 73 self.header = header 74 # If header.magic != self.MH_MAGIC: 75 # raise ValueError("header has magic %08x, expecting %08x" % ( 76 # header.magic, self.MH_MAGIC)) 77 78 cmd = self.commands = [] 79 80 self.filetype = self.get_filetype_shortname(header.filetype) 81 82 read_bytes = 0 83 low_offset = sys.maxsize 84 for i in range(header.ncmds): 85 # read the load command 86 cmd_load = load_command.from_fileobj(fh, **kw) 87 88 # read the specific command 89 klass = LC_REGISTRY.get(cmd_load.cmd, None) 90 if klass is None: 91 raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) 92 cmd_cmd = klass.from_fileobj(fh, **kw) 93 94 if cmd_load.cmd == LC_ID_DYLIB: 95 # remember where this command was 96 if self.id_cmd is not None: 97 raise ValueError("This dylib already has an id") 98 self.id_cmd = i 99 100 if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): 101 # for segment commands, read the list of segments 102 segs = [] 103 # assert that the size makes sense 104 if cmd_load.cmd == LC_SEGMENT: 105 section_cls = section 106 else: # LC_SEGMENT_64 107 section_cls = section_64 108 109 expected_size = ( 110 sizeof(klass) 111 + sizeof(load_command) 112 + (sizeof(section_cls) * cmd_cmd.nsects) 113 ) 114 if cmd_load.cmdsize != expected_size: 115 raise ValueError("Segment size mismatch") 116 # this is a zero block or something 117 # so the beginning is wherever the fileoff of this command is 118 if cmd_cmd.nsects == 0: 119 if cmd_cmd.filesize != 0: 120 low_offset = min(low_offset, cmd_cmd.fileoff) 121 else: 122 # this one has multiple segments 123 for _j in range(cmd_cmd.nsects): 124 # read the segment 125 seg = section_cls.from_fileobj(fh, **kw) 126 # If the segment has a size and is not zero filled 127 # then its beginning is the offset of this segment 128 not_zerofill = (seg.flags & S_ZEROFILL) != S_ZEROFILL 129 if seg.offset > 0 and seg.size > 0 and not_zerofill: 130 low_offset = min(low_offset, seg.offset) 131 132 # Do NOT read section data. It is not required and 133 # does not work well with filset KC offsets. 134 """ 135 if not_zerofill: 136 c = fh.tell() 137 fh.seek(seg.offset) 138 sd = fh.read(seg.size) 139 seg.add_section_data(sd) 140 fh.seek(c) 141 """ 142 segs.append(seg) 143 # data is a list of segments 144 cmd_data = segs 145 146 else: 147 # data is a raw str 148 data_size = cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) 149 cmd_data = fh.read(data_size) 150 cmd.append((cmd_load, cmd_cmd, cmd_data)) 151 read_bytes += cmd_load.cmdsize 152 153 # make sure the header made sense 154 if read_bytes != header.sizeofcmds: 155 raise ValueError( 156 "Read %d bytes, header reports %d bytes" 157 % (read_bytes, header.sizeofcmds) 158 ) 159 self.total_size = sizeof(self.mach_header) + read_bytes 160 self.low_offset = low_offset 161 162 163 # Patch the library to use our own header class instead. 164 macho.MachOHeader = MemMachOHeader 165 166 167class MemMachO(macho.MachO): 168 """ Mach-O implementation that accepts I/O stream instead of file. """ 169 170 def __init__(self, file): 171 """ Creates Mach-O parser on top of provided I/O. """ 172 173 # Figured out file size from the I/O. 174 file.seek(0, 2) 175 size = file.tell() 176 file.seek(0, 0) 177 178 # supports the ObjectGraph protocol 179 self.graphident = 'mem:%d//'.format(size) 180 self.filename = 'mem:%d//'.format(size) 181 self.loader_path = "<no-path>" 182 183 # initialized by load 184 self.fat = None 185 self.headers = [] 186 187 self.load(file) 188 189 @staticmethod 190 def make_seg(seg, sects): 191 """ Constructs MachOSegment from input. """ 192 193 # Wrap all sections in MachOSection tuple. 194 segsec = [ 195 MachOSection( 196 sectname = s.segname[:s.segname.find(b'\x00')].decode(), 197 addr = s.addr, 198 fileoff = s.offset, 199 size = s.size 200 ) 201 for s in sects 202 ] 203 204 # Return MachOSegment 205 return MachOSegment( 206 name=seg.segname[:seg.segname.find(b'\x00')].decode(), 207 vmaddr = seg.vmaddr, 208 vmsize = seg.vmsize, 209 fileoff = seg.fileoff, 210 filesize = seg.filesize, 211 sections = segsec 212 ) 213 214 @property 215 def segments(self): 216 """ Constructs section/segment descriptors. 217 218 Values are cached in an instance attribute. 219 """ 220 if hasattr(self, '_segments'): 221 return self._segments 222 223 # Wrap all segments/sections into a MachOSegment/MachOSection. 224 self._segments = [ 225 self.make_seg(seg, sec) 226 for h in self.headers 227 for _, seg, sec in h.commands 228 if isinstance(seg, SEGMENT_TYPES) 229 ] 230 231 return self._segments 232 233 @property 234 def uuid(self): 235 """ Returns UUID of the Mach-O. """ 236 if hasattr(self, '_uuid'): 237 return self._uuid 238 239 for h in self.headers: 240 for cmd in h.commands: 241 # cmds is [(load_command, segment, [sections..])] 242 (_, segment, _) = cmd 243 if isinstance(segment, macholib.mach_o.uuid_command): 244 self._uuid = str(uuid.UUID(bytes=segment.uuid)).upper() 245 return self._uuid 246 247 248# some fixups in macholib that are required for kext support 249macholib.mach_o.MH_KEXT_BUNDLE = 0xB 250 251macholib.mach_o.MH_FILETYPE_NAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext bundle" 252macholib.mach_o.MH_FILETYPE_SHORTNAMES[macholib.mach_o.MH_KEXT_BUNDLE] = "kext" 253 254SEGMENT_TYPES = (macholib.mach_o.segment_command_64, macholib.mach_o.segment_command) 255 256def get_load_command_human_name(lc): 257 return lc.get_cmd_name() 258 259 260class VisualMachoMap(object): 261 KB_1 = 1024 262 KB_16 = 16 * 1024 263 MB_1 = 1 * 1024 * 1024 264 GB_1 = 1 * 1024 * 1024 * 1024 265 266 def __init__(self, name, width=40): 267 self.name = name 268 self.width = 40 269 self.default_side_padding = 2 270 271 def get_header_line(self): 272 return '+' + '-' * (self.width - 2) + '+' 273 274 def get_space_line(self): 275 return '|' + ' ' * (self.width - 2) + '|' 276 277 def get_dashed_line(self): 278 return '|' + '-' * (self.width - 2) + '|' 279 280 def get_dotted_line(self): 281 return '|' + '.' * (self.width - 2) + '|' 282 283 def center_text_in_line(self, line, text): 284 even_length = bool(len(text) % 2 == 0) 285 if len(text) > len(line) - 2: 286 raise ValueError("text is larger than line of text") 287 288 lbreak_pos = (len(line) // 2) - (len(text) // 2) 289 if not even_length: 290 lbreak_pos -= 1 291 out = line[:lbreak_pos] + text 292 return out + line[len(out):] 293 294 def get_separator_lines(self): 295 return ['/' + ' ' * (self.width - 2) + '/', '/' + ' ' * (self.width - 2) + '/'] 296 297 def printMachoMap(self, mobj): 298 MapBlock = namedtuple('MapBlock', 'name vmaddr vmsize fileoff filesize extra_info is_segment') 299 outstr = self.name + '\n' 300 other_cmds = '' 301 blocks = [] 302 for hdr in mobj.headers: 303 cmd_index = 0 304 for cmd in hdr.commands: 305 # cmds is [(load_command, segment, [sections..])] 306 (lc, segment, sections) = cmd 307 lc_cmd_str = get_load_command_human_name(lc) 308 lc_str_rep = "\n\t LC: {:s} size:{:d} nsects:{:d}".format(lc_cmd_str, lc.cmdsize, len(sections)) 309 # print lc_str_rep 310 if isinstance(segment, SEGMENT_TYPES): 311 segname = segment.segname[:segment.segname.find(b'\x00')].decode() 312 # print "\tsegment: {:s} vmaddr: {:x} vmsize:{:d} fileoff: {:x} filesize: {:d}".format( 313 # segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize) 314 blocks.append(MapBlock(segname, segment.vmaddr, segment.vmsize, segment.fileoff, segment.filesize, 315 ' LC:{} : {} init:{:#0X} max:{:#0X}'.format(lc_cmd_str, segname, segment.initprot, segment.maxprot), 316 True)) 317 for section in sections: 318 section_name = section.sectname[:section.sectname.find(b'\x00')].decode() 319 blocks.append(MapBlock(section_name, section.addr, section.size, section.offset, 320 section.size, 'al:{} flags:{:#0X}'.format(section.align, section.flags), False)) 321 #print "\t\tsection:{:s} addr:{:x} off:{:x} size:{:d}".format(section_name, section.addr, section.offset, section.size) 322 elif isinstance(segment, macholib.mach_o.uuid_command): 323 other_cmds += "\n\t uuid: {:s}".format(str(uuid.UUID(bytes=segment.uuid)).upper()) 324 elif isinstance(segment, macholib.mach_o.rpath_command): 325 other_cmds += "\n\t rpath: {:s}".format(segment.path) 326 elif isinstance(segment, macholib.mach_o.dylib_command): 327 other_cmds += "\n\t dylib: {:s} ({:s})".format(str(sections[:sections.find(b'\x00')]), str(segment.current_version)) 328 else: 329 other_cmds += lc_str_rep 330 cmd_index += 1 331 332 # fixup the self.width param 333 for _b in blocks: 334 if self.default_side_padding + len(_b.name) + 2 > self.width: 335 self.width = self.default_side_padding + len(_b.name) + 2 336 if self.width % 2 != 0: 337 self.width += 1 338 339 sorted_blocks = sorted(blocks, key=lambda b: b.vmaddr) 340 mstr = [self.get_header_line()] 341 prev_block = MapBlock('', 0, 0, 0, 0, '', False) 342 for b in sorted_blocks: 343 # TODO add separator blocks if vmaddr is large from prev_block 344 if b.is_segment: 345 s = self.get_dashed_line() 346 else: 347 s = self.get_dotted_line() 348 s = self.center_text_in_line(s, b.name) 349 line = "{:s} {: <#020X} ({: <10d}) floff:{: <#08x} {}".format(s, b.vmaddr, b.vmsize, b.fileoff, b.extra_info) 350 if (b.vmaddr - prev_block.vmaddr) > VisualMachoMap.KB_16: 351 mstr.append(self.get_space_line()) 352 mstr.append(self.get_space_line()) 353 354 mstr.append(line) 355 356 if b.vmsize > VisualMachoMap.MB_1: 357 mstr.append(self.get_space_line()) 358 mstr.extend(self.get_separator_lines()) 359 mstr.append(self.get_space_line()) 360 #mstr.append(self.get_space_line()) 361 prev_block = b 362 mstr.append(self.get_space_line()) 363 if prev_block.vmsize > VisualMachoMap.KB_16: 364 mstr.append(self.get_space_line()) 365 mstr.append(self.get_header_line()) 366 print(outstr) 367 print("\n".join(mstr)) 368 print("\n\n=============== Other Load Commands ===============") 369 print(other_cmds) 370 371 372if __name__ == '__main__': 373 import sys 374 if len(sys.argv) < 2: 375 print("Usage: {} /path/to/macho_binary".format(sys.argv[0])) 376 sys.exit(1) 377 with open(sys.argv[-1], 'rb') as fp: 378 mobject = MemMachO(fp) 379 380 p = VisualMachoMap(sys.argv[-1]) 381 p.printMachoMap(mobject) 382 sys.exit(0) 383