import sys # This script's return values ERROR = 1 SUCCESS = 0 error_help_message = """Safe types were used in MIG in violation of VM API sanitization enforcement. \t- If you created a new MIG file that is not related to Virtual Memory, you will want to add it to the list of \ exceptions in this Python script. \t- If your work is related to Virtual Memory, you need to use unsafe types for userspace inputs. Make sure you \ define VM_KERNEL_SERVER in the MIG file, and that the safe type is associated to an unsafe type via VM_UNSAFE_TYPE/VM_TYPE_SAFE_UNSAFE. Learn more by reading doc/vm/sanitize.md.""" # List of safe types that should not be used to represent userspace inputs. # Note that is may be counter-intuitive that we want to prevent the use of # safe types: they are safe after all, so wouldn't it be better to use them? # The source of the confusion is that the safety of the type really describes # the safety of the data. Since the data will be coming from userspace, it is # considered unsafe/untrusted, and we want to make sure we represent it with # unsafe types to force the programmer to sanitize it before it can be # represented with the (directly usable) safe type. # This is a list of all the safe types which have corresponding unsafe types # in vm_types_unsafe.h. safe_type_list = [ "mach_vm_address_t", "mach_vm_offset_t", "mach_vm_size_t", "vm_address_t", "vm_offset_t", "vm_size_t", "vm_map_address_t", "vm_map_offset_t", "vm_map_size_t", "memory_object_offset_t", "memory_object_size_t", "vm_object_offset_t", "vm_object_size_t", "pointer_t", "vm32_address_t", "vm32_offset_t", "vm32_size_t", "vm_prot_t", "vm_inherit_t", "vm_behavior_t", "caddr_t", "user_addr_t", "size_t", "user_size_t", "struct mach_vm_range", "mach_vm_range_recipe_v1_t", ] # Files that are considered outside the VM boundary and are thus not subject to enforcement. file_ignorelist = [ "arcade_register_server.c", "clock_server.c", "exc_server.c", "mach_eventlink_server.c", "mach_exc_server.c", "mach_notify_server.c", "mach_port_server.c", "mach_voucher_server.c", "memory_entry_server.c", "processor_server.c", "processor_set_server.c", "restartable_server.c", "task_server.c", "thread_act_server.c", "upl_server.c", ] def print_error(*args, **kwargs): print("error:", *args, file=sys.stderr, **kwargs) def is_type_used_in_line(safe_type, line): # This is used by an autogenerated struct in MIG that isn't an argument to a MIG call if "vm_address_t reserved; /* Reserved */" in line: return False # arguments to MIG functions are typically the first thing on the line in the generated header, # but we search for the type elsewhere to be on the safe side. We still need to be careful not # to trigger false positives by doing a naive search # e.g. size_t is in "__Request__host_page_size_t __attribute__((unused));" if safe_type in line.replace(':', ' ').replace(';', ' ').replace(',', ' ').split(): return True return False def are_safe_types_used_in_file(filepath): are_safe_types_used = False lineno = 1 with open(filepath, "r") as file: for line in file: for safe_type in safe_type_list: if is_type_used_in_line(safe_type, line): print_error("Found safe type \"" + safe_type + "\" in " +filepath + ":" + str(lineno) + ". Line is \"" + line.strip() + "\"") are_safe_types_used = True lineno += 1 return are_safe_types_used def main(): if len(sys.argv) < 2: print_error("usage: python vm_api_enforcement.py filename [extra_filename...]") return ERROR are_safe_types_used = False for filename in sys.argv[1:]: if filename in file_ignorelist: continue if not (filename.endswith(".c") or filename.endswith(".h")): print_error("File should be a .c or .h file:", filename) return ERROR header = filename[:-1] + "h" are_safe_types_used = are_safe_types_used_in_file(header) if are_safe_types_used: print_error("{}: {}".format(sys.argv[0], error_help_message)) return ERROR return SUCCESS sys.exit(main())