xref: /xnu-10002.1.13/bsd/dev/dtrace/dtrace_ptss.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/types.h>
30 #include <sys/proc.h>
31 #include <sys/proc_internal.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/dtrace_ptss.h>
35 
36 #include <mach/vm_map.h>
37 #include <mach/vm_param.h>
38 #include <mach/mach_vm.h>
39 
40 #include <kern/task.h>
41 
42 #include <vm/vm_map.h>
43 
44 /*
45  * This function requires the sprlock to be held
46  *
47  * In general, it will not block. If it needs to allocate a new
48  * page of memory, the underlying kernel kalloc may block.
49  */
50 struct dtrace_ptss_page_entry*
dtrace_ptss_claim_entry_locked(struct proc * p)51 dtrace_ptss_claim_entry_locked(struct proc* p)
52 {
53 	LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
54 
55 	struct dtrace_ptss_page_entry* entry = NULL;
56 
57 	while (TRUE) {
58 		struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
59 
60 		if (temp == NULL) {
61 			// Nothing on the free list. Allocate a new page, its okay if multiple threads race here.
62 			struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p);
63 
64 			// Make sure we actually got a page
65 			if (page == NULL) {
66 				return NULL;
67 			}
68 
69 			// Add the page to the page list
70 			page->next = p->p_dtrace_ptss_pages;
71 			p->p_dtrace_ptss_pages = page;
72 
73 			// CAS the entries onto the free list.
74 			do {
75 				page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = p->p_dtrace_ptss_free_list;
76 			} while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next,
77 			    (void *)&page->entries[0],
78 			    (void * volatile *)&p->p_dtrace_ptss_free_list));
79 
80 			// Now that we've added to the free list, try again.
81 			continue;
82 		}
83 
84 		// Claim temp
85 		if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
86 			continue;
87 		}
88 
89 		// At this point, we own temp.
90 		entry = temp;
91 
92 		break;
93 	}
94 
95 	return entry;
96 }
97 
98 /*
99  * This function does not require any locks to be held on entry.
100  */
101 struct dtrace_ptss_page_entry*
dtrace_ptss_claim_entry(struct proc * p)102 dtrace_ptss_claim_entry(struct proc* p)
103 {
104 	// Verify no locks held on entry
105 	LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
106 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
107 
108 	struct dtrace_ptss_page_entry* entry = NULL;
109 
110 	while (TRUE) {
111 		struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
112 
113 		if (temp == NULL) {
114 			lck_mtx_lock(&p->p_dtrace_sprlock);
115 			temp = dtrace_ptss_claim_entry_locked(p);
116 			lck_mtx_unlock(&p->p_dtrace_sprlock);
117 			return temp;
118 		}
119 
120 		// Claim temp
121 		if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
122 			continue;
123 		}
124 
125 		// At this point, we own temp.
126 		entry = temp;
127 
128 		break;
129 	}
130 
131 	return entry;
132 }
133 
134 /*
135  * This function does not require any locks to be held on entry.
136  *
137  * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can
138  * no longer be referenced safely. When found in this state, the chore
139  * of releasing an entry to the free list is ignored.
140  */
141 void
dtrace_ptss_release_entry(struct proc * p,struct dtrace_ptss_page_entry * e)142 dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e)
143 {
144 	if (p && p->p_dtrace_ptss_pages && e) {
145 		do {
146 			e->next = p->p_dtrace_ptss_free_list;
147 		} while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list));
148 	}
149 }
150 
151 /*
152  * This function allocates a new page in the target process's address space.
153  *
154  * It returns a dtrace_ptss_page that has its entries chained, with the last
155  * entries next field set to NULL. It does not add the page or the entries to
156  * the process's page/entry lists.
157  *
158  * This function does not require that any locks be held when it is invoked.
159  */
160 struct dtrace_ptss_page*
dtrace_ptss_allocate_page(struct proc * p)161 dtrace_ptss_allocate_page(struct proc* p)
162 {
163 	// Allocate the kernel side data
164 	struct dtrace_ptss_page* ptss_page = kalloc_type(struct dtrace_ptss_page, Z_ZERO | Z_WAITOK);
165 	if (ptss_page == NULL) {
166 		return NULL;
167 	}
168 
169 	// Now allocate a page in user space and set its protections to allow execute.
170 	task_t task = proc_task(p);
171 	vm_map_t map = get_task_map_reference(task);
172 	if (map == NULL) {
173 		goto err;
174 	}
175 
176 	mach_vm_size_t size = PAGE_MAX_SIZE;
177 	mach_vm_offset_t addr = 0;
178 	mach_vm_offset_t write_addr = 0;
179 	/*
180 	 * The embedded OS has extra permissions for writable and executable pages.
181 	 * To ensure correct permissions, we must set the page protections separately.
182 	 */
183 	vm_prot_t cur_protection = VM_PROT_READ | VM_PROT_EXECUTE;
184 	vm_prot_t max_protection = VM_PROT_READ | VM_PROT_EXECUTE;
185 	kern_return_t kr;
186 
187 	kr = mach_vm_map_kernel(map, &addr, size, 0,
188 	    VM_MAP_KERNEL_FLAGS_ANYWHERE(), IPC_PORT_NULL, 0, FALSE,
189 	    cur_protection, max_protection, VM_INHERIT_DEFAULT);
190 	if (kr != KERN_SUCCESS) {
191 		goto err;
192 	}
193 
194 	/*
195 	 * To ensure the page is properly marked as user debug, temporarily change
196 	 * the permissions to rw and then back again to rx. The VM will keep track
197 	 * of this remapping and on fault will pass PMAP_OPTIONS_XNU_USER_DEBUG
198 	 * properly to the PMAP layer.
199 	 */
200 	kr = mach_vm_protect(map, (mach_vm_offset_t)addr, (mach_vm_size_t)size, 0,
201 	    VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
202 	if (kr != KERN_SUCCESS) {
203 		goto err;
204 	}
205 
206 	kr = mach_vm_protect(map, (mach_vm_offset_t)addr, (mach_vm_size_t)size, 0,
207 	    VM_PROT_READ | VM_PROT_EXECUTE);
208 	if (kr != KERN_SUCCESS) {
209 		goto err;
210 	}
211 
212 	/*
213 	 * If on embedded, remap the scratch space as writable at another
214 	 * virtual address
215 	 */
216 	kr = mach_vm_remap_kernel(map, &write_addr, size, 0, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, map, addr, FALSE, &cur_protection, &max_protection, VM_INHERIT_DEFAULT);
217 	if (kr != KERN_SUCCESS || !(max_protection & VM_PROT_WRITE)) {
218 		goto err;
219 	}
220 
221 	kr = mach_vm_protect(map, (mach_vm_offset_t)write_addr, (mach_vm_size_t)size, 0, VM_PROT_READ | VM_PROT_WRITE);
222 	if (kr != KERN_SUCCESS) {
223 		goto err;
224 	}
225 
226 	// Chain the page entries.
227 	int i;
228 	for (i = 0; i < DTRACE_PTSS_ENTRIES_PER_PAGE; i++) {
229 		ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
230 		ptss_page->entries[i].write_addr = write_addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
231 		ptss_page->entries[i].next = &ptss_page->entries[i + 1];
232 	}
233 
234 	// The last entry should point to NULL
235 	ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = NULL;
236 
237 	vm_map_deallocate(map);
238 
239 	return ptss_page;
240 
241 err:
242 	kfree_type(struct dtrace_ptss_page, ptss_page);
243 
244 	if (map) {
245 		vm_map_deallocate(map);
246 	}
247 
248 	return NULL;
249 }
250 
251 /*
252  * This function frees an existing page in the target process's address space.
253  *
254  * It does not alter any of the process's page/entry lists.
255  *
256  * TODO: Inline in dtrace_ptrace_exec_exit?
257  */
258 void
dtrace_ptss_free_page(struct proc * p,struct dtrace_ptss_page * ptss_page)259 dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page)
260 {
261 	// Grab the task and get a reference to its vm_map
262 	task_t task = proc_task(p);
263 	vm_map_t map = get_task_map_reference(task);
264 
265 	mach_vm_address_t addr = ptss_page->entries[0].addr;
266 	mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
267 
268 	// Silent failures, no point in checking return code.
269 	mach_vm_deallocate(map, addr, size);
270 
271 	mach_vm_address_t write_addr = ptss_page->entries[0].write_addr;
272 	mach_vm_deallocate(map, write_addr, size);
273 
274 	vm_map_deallocate(map);
275 }
276 
277 /*
278  * This function assumes that the target process has been
279  * suspended, and the proc_lock & sprlock is held
280  */
281 void
dtrace_ptss_enable(struct proc * p)282 dtrace_ptss_enable(struct proc* p)
283 {
284 	LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
285 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_OWNED);
286 
287 	struct uthread* uth;
288 	/*
289 	 * XXX There has been a concern raised about holding the proc_lock
290 	 * while calling dtrace_ptss_claim_entry(), due to the fact
291 	 * that dtrace_ptss_claim_entry() can potentially malloc.
292 	 */
293 	TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
294 		uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p);
295 	}
296 }
297 
298 /*
299  * This function is not thread safe.
300  *
301  * It assumes the sprlock is held, and the proc_lock is not.
302  */
303 void
dtrace_ptss_exec_exit(struct proc * p)304 dtrace_ptss_exec_exit(struct proc* p)
305 {
306 	/*
307 	 * Should hold sprlock to touch the pages list. Must not
308 	 * hold the proc lock to avoid deadlock.
309 	 */
310 	LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
311 	LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
312 
313 	p->p_dtrace_ptss_free_list = NULL;
314 
315 	struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages;
316 	p->p_dtrace_ptss_pages = NULL;
317 
318 	while (temp != NULL) {
319 		struct dtrace_ptss_page* next = temp->next;
320 
321 		// Do we need to specifically mach_vm_deallocate the user pages?
322 		// This can be called when the process is exiting, I believe the proc's
323 		// vm_map_t may already be toast.
324 
325 		// Must be certain to free the kernel memory!
326 		kfree_type(struct dtrace_ptss_page, temp);
327 		temp = next;
328 	}
329 }
330 
331 /*
332  * This function is not thread safe.
333  *
334  * The child proc ptss fields are initialized to NULL at fork time.
335  * Pages allocated in the parent are copied as part of the vm_map copy, though.
336  * We need to deallocate those pages.
337  *
338  * Parent and child sprlock should be held, and proc_lock must NOT be held.
339  */
340 void
dtrace_ptss_fork(struct proc * parent,struct proc * child)341 dtrace_ptss_fork(struct proc* parent, struct proc* child)
342 {
343 	// The child should not have any pages/entries allocated at this point.
344 	// ASSERT(child->p_dtrace_ptss_pages == NULL);
345 	// ASSERT(child->p_dtrace_ptss_free_list == NULL);
346 
347 	/*
348 	 * The parent's sprlock should be held, to protect its pages list
349 	 * from changing while the child references it. The child's sprlock
350 	 * must also be held, because we are modifying its pages list.
351 	 * Finally, to prevent a deadlock with the fasttrap cleanup code,
352 	 * neither the parent or child proc_lock should be held.
353 	 */
354 	LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
355 	LCK_MTX_ASSERT(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
356 	LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
357 	LCK_MTX_ASSERT(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
358 
359 	// Get page list from *PARENT*
360 	struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages;
361 
362 	while (temp != NULL) {
363 		// Freeing the page in the *CHILD*
364 		dtrace_ptss_free_page(child, temp);
365 
366 		// Do not free the kernel memory, it belong to the parent.
367 		temp = temp->next;
368 	}
369 }
370