/* * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* */ /* * File: ipc/ipc_entry.c * Author: Rich Draves * Date: 1989 * * Primitive functions to manipulate translation entries. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include KALLOC_ARRAY_TYPE_DEFINE(ipc_entry_table, struct ipc_entry, KT_PRIV_ACCT); /* * Routine: ipc_entry_table_count_max * Purpose: * returns the maximum number of entries an IPC space * is allowed to contain (the maximum size to which it will grow) * Conditions: * none */ unsigned int ipc_entry_table_count_max(void) { return ipc_entry_table_size_to_count(CONFIG_IPC_TABLE_ENTRIES_SIZE_MAX); } /* * Routine: ipc_entry_lookup * Purpose: * Searches for an entry, given its name. * Conditions: * The space must be read or write locked throughout. * The space must be active. */ ipc_entry_t ipc_entry_lookup( ipc_space_t space, mach_port_name_t name) { mach_port_index_t index; ipc_entry_table_t table; ipc_entry_t entry; table = is_active_table(space); index = MACH_PORT_INDEX(name); if (__improbable(index == 0)) { return IE_NULL; } entry = ipc_entry_table_get(table, index); if (__improbable(!entry || IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) || IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)) { return IE_NULL; } return entry; } /* * Routine: ipc_entries_hold * Purpose: * Verifies that there are at least 'entries_needed' * free list members * Conditions: * The space is write-locked and active throughout. * An object may be locked. Will not allocate memory. * Returns: * KERN_SUCCESS Free entries were found. * KERN_NO_SPACE No entry allocated. */ kern_return_t ipc_entries_hold( ipc_space_t space, uint32_t entries_needed) { mach_port_index_t next_free = 0; ipc_entry_table_t table; ipc_entry_t entry; uint32_t i; /* * Assume that all new entries will need hashing. * If the table is more than 87.5% full pretend we didn't have space. */ table = is_active_table(space); if (space->is_table_hashed + entries_needed > ipc_entry_table_count(table) * 7 / 8) { return KERN_NO_SPACE; } entry = ipc_entry_table_base(table); for (i = 0; i < entries_needed; i++) { next_free = entry->ie_next; if (next_free == 0) { return KERN_NO_SPACE; } entry = ipc_entry_table_get(table, next_free); assert(entry && entry->ie_object == IO_NULL); } #if CONFIG_PROC_RESOURCE_LIMITS ipc_space_check_limit_exceeded(space); #endif /* CONFIG_PROC_RESOURCE_LIMITS */ return KERN_SUCCESS; } /* * Routine: ipc_entry_claim * Purpose: * Take formal ownership of a held entry. * Conditions: * The space is write-locked and active throughout. * Objects must be: NULL, locked, or not initialized yet. * Will not allocate memory. * * Note: The returned entry must be marked as modified before * releasing the space lock */ kern_return_t ipc_entry_claim( ipc_space_t space, ipc_object_t object, mach_port_name_t *namep, ipc_entry_t *entryp) { ipc_entry_t base, entry; ipc_entry_table_t table; mach_port_index_t first_free; mach_port_gen_t gen; mach_port_name_t new_name; table = is_active_table(space); base = ipc_entry_table_base(table); first_free = base->ie_next; assert(first_free != 0); entry = ipc_entry_table_get(table, first_free); assert(entry && ipc_entry_table_contains(table, entry->ie_next) && entry->ie_object == IO_NULL); base->ie_next = entry->ie_next; space->is_table_free--; if (object && waitq_valid(io_waitq(object))) { assert(waitq_held(io_waitq(object))); } /* * Initialize the new entry: increment gencount and reset * rollover point if it rolled over, and clear ie_request. */ gen = ipc_entry_new_gen(entry->ie_bits); if (__improbable(ipc_entry_gen_rolled(entry->ie_bits, gen))) { ipc_entry_bits_t roll = ipc_space_get_rollpoint(space); gen = ipc_entry_new_rollpoint(roll); } entry->ie_bits = gen; entry->ie_request = IE_REQ_NONE; entry->ie_object = object; /* * The new name can't be MACH_PORT_NULL because index * is non-zero. It can't be MACH_PORT_DEAD because * the table isn't allowed to grow big enough. * (See comment in ipc/ipc_table.h.) */ new_name = MACH_PORT_MAKE(first_free, gen); assert(MACH_PORT_VALID(new_name)); *namep = new_name; *entryp = entry; return KERN_SUCCESS; } /* * Routine: ipc_entry_alloc * Purpose: * Allocate an entry out of the space. * Conditions: * The space is not locked before, but it is write-locked after * if the call is successful. May allocate memory. * Returns: * KERN_SUCCESS An entry was allocated. * KERN_INVALID_TASK The space is dead. * KERN_NO_SPACE No room for an entry in the space. * KERN_RESOURCE_SHORTAGE Couldn't allocate memory for an entry. */ kern_return_t ipc_entry_alloc( ipc_space_t space, ipc_object_t object, mach_port_name_t *namep, ipc_entry_t *entryp) { kern_return_t kr; is_write_lock(space); for (;;) { if (!is_active(space)) { is_write_unlock(space); return KERN_INVALID_TASK; } kr = ipc_entries_hold(space, 1); if (kr == KERN_SUCCESS) { return ipc_entry_claim(space, object, namep, entryp); } kr = ipc_entry_grow_table(space, ITS_SIZE_NONE); if (kr != KERN_SUCCESS) { return kr; /* space is unlocked */ } } } /* * Routine: ipc_entry_alloc_name * Purpose: * Allocates/finds an entry with a specific name. * If an existing entry is returned, its type will be nonzero. * Conditions: * The space is not locked before, but it is write-locked after * if the call is successful. May allocate memory. * Returns: * KERN_SUCCESS Found existing entry with same name. * KERN_SUCCESS Allocated a new entry. * KERN_INVALID_TASK The space is dead. * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. * KERN_FAILURE Couldn't allocate requested name. * KERN_INVALID_VALUE Supplied port name is invalid. */ kern_return_t ipc_entry_alloc_name( ipc_space_t space, mach_port_name_t name, ipc_entry_t *entryp) { const mach_port_index_t index = MACH_PORT_INDEX(name); mach_port_gen_t gen = MACH_PORT_GEN(name); /* * Callers today never pass MACH_PORT_NULL */ assert(MACH_PORT_VALID(name)); if (index > ipc_entry_table_count_max()) { return KERN_NO_SPACE; } if (name != ipc_entry_name_mask(name)) { /* must have valid generation bits */ return KERN_INVALID_VALUE; } if (index == 0) { return KERN_FAILURE; } is_write_lock(space); for (;;) { ipc_entry_table_t table; ipc_entry_t entry; if (!is_active(space)) { is_write_unlock(space); return KERN_INVALID_TASK; } table = is_active_table(space); /* * If we are under the table cutoff, * there are usually four cases: * 1) The entry is reserved (index 0) * dealt with on entry. * 2) The entry is free * 3) The entry is inuse, for the same name * 4) The entry is inuse, for a different name * * For a task with a "fast" IPC space, we disallow * cases 1) and 4), because ports cannot be renamed. */ entry = ipc_entry_table_get(table, index); if (!entry) { /* * We grow the table so that the name * index fits in the array space. * Because the space will be unlocked, * we must restart. */ kern_return_t kr; kr = ipc_entry_grow_table(space, index + 1); if (kr != KERN_SUCCESS) { /* space is unlocked */ return kr; } continue; } if (!IE_BITS_TYPE(entry->ie_bits)) { mach_port_index_t prev_index; ipc_entry_t prev_entry; /* * case #2 -- the entry is free * Rip the entry out of the free list. */ prev_index = 0; prev_entry = ipc_entry_table_base(table); while (prev_entry->ie_next != index) { prev_index = prev_entry->ie_next; prev_entry = ipc_entry_table_get(table, prev_index); } prev_entry->ie_next = entry->ie_next; space->is_table_free--; /* * prev_index can be 0 here if the desired index * happens to be at the top of the freelist. * * Mark the previous entry modified - * reconstructing the name. * * Do not do so for the first entry, which is * reserved and ipc_entry_grow_table() will handle * its ie_next separately after the rescan loop. */ if (prev_index > 0) { /* */ ipc_entry_modified(space, MACH_PORT_MAKE(prev_index, IE_BITS_GEN(prev_entry->ie_bits)), prev_entry); } entry->ie_bits = gen; entry->ie_request = IE_REQ_NONE; *entryp = entry; assert(entry->ie_object == IO_NULL); return KERN_SUCCESS; } else if (IE_BITS_GEN(entry->ie_bits) == gen) { /* case #3 -- the entry is inuse, for the same name */ *entryp = entry; return KERN_SUCCESS; } else { /* case #4 -- the entry is inuse, for a different name. */ /* Collisions are not allowed */ is_write_unlock(space); return KERN_FAILURE; } } } /* * Routine: ipc_entry_dealloc * Purpose: * Deallocates an entry from a space. * Conditions: * The space must be write-locked throughout. * The space must be active. */ void ipc_entry_dealloc( ipc_space_t space, ipc_object_t object, mach_port_name_t name, ipc_entry_t entry) { ipc_entry_table_t table; mach_port_index_t index; ipc_entry_t base; assert(entry->ie_object == object); assert(entry->ie_request == IE_REQ_NONE); if (object) { io_lock_held(object); } #if 1 if (entry->ie_request != IE_REQ_NONE) { panic("ipc_entry_dealloc()"); } #endif index = MACH_PORT_INDEX(name); table = is_active_table(space); base = ipc_entry_table_base(table); assert(index > 0 && entry == ipc_entry_table_get(table, index)); assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name)); entry->ie_bits &= (IE_BITS_GEN_MASK | IE_BITS_ROLL_MASK); entry->ie_next = base->ie_next; entry->ie_object = IO_NULL; base->ie_next = index; space->is_table_free++; ipc_entry_modified(space, name, entry); } /* * Routine: ipc_entry_modified * Purpose: * Note that an entry was modified in a space. * Conditions: * Assumes exclusive write access to the space, * either through a write lock or being the cleaner * on an inactive space. */ void ipc_entry_modified( ipc_space_t space, mach_port_name_t name, __assert_only ipc_entry_t entry) { ipc_entry_table_t table; mach_port_index_t index; index = MACH_PORT_INDEX(name); table = is_active_table(space); assert(entry == ipc_entry_table_get(table, index)); assert(space->is_low_mod <= ipc_entry_table_count(table)); assert(space->is_high_mod < ipc_entry_table_count(table)); if (index < space->is_low_mod) { space->is_low_mod = index; } if (index > space->is_high_mod) { space->is_high_mod = index; } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_PORT_ENTRY_MODIFY) | DBG_FUNC_NONE, space->is_task ? task_pid(space->is_task) : 0, name, entry->ie_bits, 0, 0); } #define IPC_ENTRY_GROW_STATS 1 #if IPC_ENTRY_GROW_STATS static uint64_t ipc_entry_grow_count = 0; static uint64_t ipc_entry_grow_rescan = 0; static uint64_t ipc_entry_grow_rescan_max = 0; static uint64_t ipc_entry_grow_rescan_entries = 0; static uint64_t ipc_entry_grow_rescan_entries_max = 0; static uint64_t ipc_entry_grow_freelist_entries = 0; static uint64_t ipc_entry_grow_freelist_entries_max = 0; #endif static inline void ipc_space_start_growing(ipc_space_t is) { assert(!is_growing(is)); is->is_grower = current_thread(); } static void ipc_space_done_growing_and_unlock(ipc_space_t space) { assert(space->is_grower == current_thread()); space->is_grower = THREAD_NULL; is_write_unlock(space); wakeup_all_with_inheritor((event_t)space, THREAD_AWAKENED); } /* * Routine: ipc_entry_grow_table * Purpose: * Grows the table in a space. * Conditions: * The space must be write-locked and active before. * If successful, the space is also returned locked. * On failure, the space is returned unlocked. * Allocates memory. * Returns: * KERN_SUCCESS Grew the table. * KERN_SUCCESS Somebody else grew the table. * KERN_SUCCESS The space died. * KERN_NO_SPACE Table has maximum size already. * KERN_RESOURCE_SHORTAGE Couldn't allocate a new table. */ kern_return_t ipc_entry_grow_table( ipc_space_t space, ipc_table_elems_t target_count) { ipc_entry_num_t osize, nsize; ipc_entry_num_t ocount, ncount; ipc_entry_table_t otable, ntable; ipc_entry_t obase, nbase; mach_port_index_t free_index; mach_port_index_t low_mod, hi_mod; ipc_table_index_t sanity; #if IPC_ENTRY_GROW_STATS uint64_t rescan_count = 0; #endif if (is_growing(space)) { /* * Somebody else is growing the table. * We just wait for them to finish. */ is_write_sleep(space); return KERN_SUCCESS; } otable = is_active_table(space); obase = ipc_entry_table_base(otable); osize = ipc_entry_table_size(otable); ocount = ipc_entry_table_size_to_count(osize); if (target_count == ITS_SIZE_NONE) { nsize = ipc_entry_table_next_size(IPC_ENTRY_TABLE_MIN, osize, IPC_ENTRY_TABLE_PERIOD); } else if (target_count <= ocount) { return KERN_SUCCESS; } else if (target_count > ipc_entry_table_count_max()) { goto no_space; } else { uint32_t tsize = ipc_entry_table_count_to_size(target_count); nsize = ipc_entry_table_next_size(IPC_ENTRY_TABLE_MIN, tsize, IPC_ENTRY_TABLE_PERIOD); } if (nsize > CONFIG_IPC_TABLE_ENTRIES_SIZE_MAX) { nsize = CONFIG_IPC_TABLE_ENTRIES_SIZE_MAX; } if (osize == nsize) { goto no_space; } /* * We'll attempt to grow the table. * * Because we will be copying without the space lock, reset * the lowest_mod index to just beyond the end of the current * table. Modification of entries (other than hashes) will * bump this downward, and we only have to reprocess entries * above that mark. Eventually, we'll get done. */ ipc_space_start_growing(space); space->is_low_mod = ocount; space->is_high_mod = 0; #if IPC_ENTRY_GROW_STATS ipc_entry_grow_count++; #endif is_write_unlock(space); ntable = ipc_entry_table_alloc_by_size(nsize, Z_WAITOK | Z_ZERO); if (ntable == NULL) { is_write_lock(space); ipc_space_done_growing_and_unlock(space); return KERN_RESOURCE_SHORTAGE; } nbase = ipc_entry_table_base(ntable); nsize = ipc_entry_table_size(ntable); ncount = ipc_entry_table_count(ntable); ipc_space_rand_freelist(space, nbase, ocount, ncount); low_mod = 1; hi_mod = ocount - 1; rescan: /* * Within the range of the table that changed, determine what we * have to take action on. For each entry, take a snapshot of the * corresponding entry in the old table (so it won't change * during this iteration). The snapshot may not be self-consistent * (if we caught it in the middle of being changed), so be very * cautious with the values. */ assert(low_mod > 0); for (mach_port_index_t i = MAX(1, low_mod); i <= hi_mod; i++) { ipc_entry_t entry = &nbase[i]; ipc_object_t osnap_object = obase[i].ie_object; ipc_entry_bits_t osnap_bits = obase[i].ie_bits; ipc_entry_bits_t osnap_request = obase[i].ie_request; /* * We need to make sure the osnap_* fields are never reloaded. */ os_compiler_barrier(); if (entry->ie_object != osnap_object || IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap_bits)) { if (entry->ie_object != IO_NULL && IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND) { ipc_hash_table_delete(ntable, entry->ie_object, i, entry); } entry->ie_object = osnap_object; entry->ie_bits = osnap_bits; entry->ie_request = osnap_request; /* or ie_next */ if (osnap_object != IO_NULL && IE_BITS_TYPE(osnap_bits) == MACH_PORT_TYPE_SEND) { ipc_hash_table_insert(ntable, osnap_object, i, entry); } } else { entry->ie_bits = osnap_bits; entry->ie_request = osnap_request; /* or ie_next */ } } nbase[0].ie_next = obase[0].ie_next; /* always rebase the freelist */ /* * find the end of the freelist (should be short). But be careful, * the list items can change so only follow through truly free entries * (no problem stopping short in those cases, because we'll rescan). */ free_index = 0; for (sanity = 0; sanity < ocount; sanity++) { if (nbase[free_index].ie_object != IPC_OBJECT_NULL) { break; } mach_port_index_t i = nbase[free_index].ie_next; if (i == 0 || i >= ocount) { break; } free_index = i; } #if IPC_ENTRY_GROW_STATS ipc_entry_grow_freelist_entries += sanity; if (sanity > ipc_entry_grow_freelist_entries_max) { ipc_entry_grow_freelist_entries_max = sanity; } #endif is_write_lock(space); /* * We need to do a wakeup on the space, * to rouse waiting threads. We defer * this until the space is unlocked, * because we don't want them to spin. */ if (!is_active(space)) { /* * The space died while it was unlocked. */ ipc_space_done_growing_and_unlock(space); ipc_entry_table_free(&ntable); is_write_lock(space); return KERN_SUCCESS; } /* If the space changed while unlocked, go back and process the changes */ if (space->is_low_mod < ocount) { assert(space->is_high_mod > 0); low_mod = space->is_low_mod; space->is_low_mod = ocount; hi_mod = space->is_high_mod; space->is_high_mod = 0; is_write_unlock(space); if (hi_mod >= ocount) { panic("corrupt hi_mod: %d, obase: %p, ocount: %d\n", hi_mod, obase, ocount); } #if IPC_ENTRY_GROW_STATS rescan_count++; if (rescan_count > ipc_entry_grow_rescan_max) { ipc_entry_grow_rescan_max = rescan_count; } ipc_entry_grow_rescan++; ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1; if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max) { ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1; } #endif goto rescan; } /* link new free entries onto the rest of the freelist */ assert(nbase[free_index].ie_next == 0 && nbase[free_index].ie_object == IO_NULL); nbase[free_index].ie_next = ocount; assert(smr_serialized_load(&space->is_table) == otable); space->is_table_free += ncount - ocount; smr_serialized_store(&space->is_table, ntable); ipc_space_done_growing_and_unlock(space); /* * Now we need to free the old table. */ ipc_space_retire_table(otable); is_write_lock(space); return KERN_SUCCESS; no_space: ipc_space_set_at_max_limit(space); is_write_unlock(space); return KERN_NO_SPACE; } /* * Routine: ipc_entry_name_mask * Purpose: * Ensure a mach port name has the default ipc entry * generation bits set. This can be used to ensure that * a name passed in by user space matches names generated * by the kernel. * Conditions: * None. * Returns: * 'name' input with default generation bits masked or added * as appropriate. */ mach_port_name_t ipc_entry_name_mask(mach_port_name_t name) { #ifndef NO_PORT_GEN static mach_port_name_t null_name = MACH_PORT_MAKE(0, IE_BITS_GEN_MASK + IE_BITS_GEN_ONE); return name | null_name; #else static mach_port_name_t null_name = MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK + IE_BITS_GEN_ONE)); return name & ~null_name; #endif }