xref: /xnu-8019.80.24/osfmk/kern/restartable.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/task.h>
31 
32 #include <kern/ast.h>
33 #include <kern/kalloc.h>
34 #include <kern/kern_types.h>
35 #include <kern/mach_param.h>
36 #include <kern/machine.h>
37 #include <kern/misc_protos.h>
38 #include <kern/processor.h>
39 #include <kern/queue.h>
40 #include <kern/restartable.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/waitq.h>
44 
45 #include <os/hash.h>
46 #include <os/refcnt.h>
47 
48 /**
49  * @file osfmk/kern/restartable.c
50  *
51  * @brief
52  * This module implements restartable userspace functions.
53  *
54  * @discussion
55  * task_restartable_ranges_register() allows task to configure
56  * the restartable ranges, only once per task,
57  * before it has made its second thread.
58  *
59  * task_restartable_ranges_synchronize() can later be used to trigger
60  * restarts for threads with a PC in a restartable region.
61  *
62  * It is implemented with an AST (AST_RESET_PCS) that will cause threads
63  * as they return to userspace to reset PCs in a restartable region
64  * to the recovery offset of this region.
65  *
66  * Because signal delivery would mask the proper saved PC for threads,
67  * sigreturn also forcefully sets the AST and will go through the logic
68  * every single time.
69  */
70 
71 typedef int (*cmpfunc_t)(const void *a, const void *b);
72 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
73 
74 #define RR_RANGES_MAX   64
75 struct restartable_ranges {
76 	queue_chain_t            rr_link;
77 	os_refcnt_t              rr_ref;
78 	uint32_t                 rr_count;
79 	uint32_t                 rr_hash;
80 	task_restartable_range_t rr_ranges[RR_RANGES_MAX];
81 };
82 
83 #if DEBUG || DEVELOPMENT
84 #define RR_HASH_SIZE   256
85 #else
86 // Release kernel userspace should have shared caches and a single registration
87 #define RR_HASH_SIZE    16
88 #endif
89 
90 static queue_head_t rr_hash[RR_HASH_SIZE];
91 LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges");
92 LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp);
93 
94 #define rr_lock()   lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp)
95 #define rr_unlock() lck_spin_unlock(&rr_spinlock);
96 
97 #pragma mark internals
98 
99 /**
100  * @function _ranges_cmp
101  *
102  * @brief
103  * Compares two ranges together.
104  */
105 static int
_ranges_cmp(const void * _r1,const void * _r2)106 _ranges_cmp(const void *_r1, const void *_r2)
107 {
108 	const task_restartable_range_t *r1 = _r1;
109 	const task_restartable_range_t *r2 = _r2;
110 
111 	if (r1->location != r2->location) {
112 		return r1->location < r2->location ? -1 : 1;
113 	}
114 	if (r1->length == r2->length) {
115 		return 0;
116 	}
117 	return r1->length < r2->length ? -1 : 1;
118 }
119 
120 /**
121  * @function _ranges_validate
122  *
123  * @brief
124  * Validates an array of PC ranges for wraps and intersections.
125  *
126  * @discussion
127  * This sorts and modifies the input.
128  *
129  * The ranges must:
130  * - not wrap around,
131  * - have a length/recovery offset within a page of the range start
132  *
133  * @returns
134  * - KERN_SUCCESS:          ranges are valid
135  * - KERN_INVALID_ARGUMENT: ranges are invalid
136  */
137 static kern_return_t
_ranges_validate(task_t task,task_restartable_range_t * ranges,uint32_t count)138 _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count)
139 {
140 	qsort(ranges, count, sizeof(task_restartable_range_t), _ranges_cmp);
141 	uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX;
142 	uint64_t end, recovery;
143 
144 	if (count == 0) {
145 		return KERN_INVALID_ARGUMENT;
146 	}
147 
148 	for (size_t i = 0; i < count; i++) {
149 		if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX ||
150 		    ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) {
151 			return KERN_INVALID_ARGUMENT;
152 		}
153 		if (ranges[i].flags) {
154 			return KERN_INVALID_ARGUMENT;
155 		}
156 		if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) {
157 			return KERN_INVALID_ARGUMENT;
158 		}
159 		if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) {
160 			return KERN_INVALID_ARGUMENT;
161 		}
162 		if (ranges[i].location > limit || end > limit || recovery > limit) {
163 			return KERN_INVALID_ARGUMENT;
164 		}
165 		if (i + 1 < count && end > ranges[i + 1].location) {
166 			return KERN_INVALID_ARGUMENT;
167 		}
168 	}
169 
170 	return KERN_SUCCESS;
171 }
172 
173 /**
174  * @function _ranges_lookup
175  *
176  * @brief
177  * Lookup the left side of a range for a given PC within a set of ranges.
178  *
179  * @returns
180  * - 0: no PC range found
181  * - the left-side of the range.
182  */
183 __attribute__((always_inline))
184 static mach_vm_address_t
_ranges_lookup(struct restartable_ranges * rr,mach_vm_address_t pc)185 _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc)
186 {
187 	task_restartable_range_t *ranges = rr->rr_ranges;
188 	uint32_t l = 0, r = rr->rr_count;
189 
190 	if (pc <= ranges[0].location) {
191 		return 0;
192 	}
193 	if (pc >= ranges[r - 1].location + ranges[r - 1].length) {
194 		return 0;
195 	}
196 
197 	while (l < r) {
198 		uint32_t i = (r + l) / 2;
199 		mach_vm_address_t location = ranges[i].location;
200 
201 		if (pc <= location) {
202 			/* if the PC is exactly at pc_start, no reset is needed */
203 			r = i;
204 		} else if (location + ranges[i].length <= pc) {
205 			/* if the PC is exactly at the end, it's out of the function */
206 			l = i + 1;
207 		} else {
208 			/* else it's strictly in the range, return the recovery pc */
209 			return location + ranges[i].recovery_offs;
210 		}
211 	}
212 
213 	return 0;
214 }
215 
216 /**
217  * @function _restartable_ranges_dispose
218  *
219  * @brief
220  * Helper to dispose of a range that has reached a 0 refcount.
221  */
222 __attribute__((noinline))
223 static void
_restartable_ranges_dispose(struct restartable_ranges * rr,bool hash_remove)224 _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove)
225 {
226 	if (hash_remove) {
227 		rr_lock();
228 		remqueue(&rr->rr_link);
229 		rr_unlock();
230 	}
231 	kfree_type(struct restartable_ranges, rr);
232 }
233 
234 /**
235  * @function _restartable_ranges_equals
236  *
237  * @brief
238  * Helper to compare two restartable ranges.
239  */
240 static bool
_restartable_ranges_equals(const struct restartable_ranges * rr1,const struct restartable_ranges * rr2)241 _restartable_ranges_equals(
242 	const struct restartable_ranges *rr1,
243 	const struct restartable_ranges *rr2)
244 {
245 	size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t);
246 	return rr1->rr_hash == rr2->rr_hash &&
247 	       rr1->rr_count == rr2->rr_count &&
248 	       memcmp(rr1->rr_ranges, rr2->rr_ranges, rr1_size) == 0;
249 }
250 
251 /**
252  * @function _restartable_ranges_create
253  *
254  * @brief
255  * Helper to create a uniqued restartable range.
256  *
257  * @returns
258  * - KERN_SUCCESS
259  * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed.
260  * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory
261  */
262 static kern_return_t
_restartable_ranges_create(task_t task,task_restartable_range_t * ranges,uint32_t count,struct restartable_ranges ** rr_storage)263 _restartable_ranges_create(task_t task, task_restartable_range_t *ranges,
264     uint32_t count, struct restartable_ranges **rr_storage)
265 {
266 	struct restartable_ranges *rr, *rr_found, *rr_base;
267 	queue_head_t *head;
268 	uint32_t base_count, total_count;
269 	size_t base_size, size;
270 	kern_return_t kr;
271 
272 	rr_base = *rr_storage;
273 	base_count = rr_base ? rr_base->rr_count : 0;
274 	base_size = sizeof(task_restartable_range_t) * base_count;
275 	size = sizeof(task_restartable_range_t) * count;
276 
277 	if (os_add_overflow(base_count, count, &total_count)) {
278 		return KERN_INVALID_ARGUMENT;
279 	}
280 	if (total_count > RR_RANGES_MAX) {
281 		return KERN_RESOURCE_SHORTAGE;
282 	}
283 
284 	rr = kalloc_type(struct restartable_ranges,
285 	    (zalloc_flags_t) (Z_WAITOK | Z_ZERO | Z_NOFAIL));
286 
287 	queue_chain_init(rr->rr_link);
288 	os_ref_init(&rr->rr_ref, NULL);
289 	rr->rr_count = total_count;
290 	if (base_size) {
291 		memcpy(rr->rr_ranges, rr_base->rr_ranges, base_size);
292 	}
293 	memcpy(rr->rr_ranges + base_count, ranges, size);
294 	kr = _ranges_validate(task, rr->rr_ranges, total_count);
295 	if (kr) {
296 		_restartable_ranges_dispose(rr, false);
297 		return kr;
298 	}
299 	rr->rr_hash = os_hash_jenkins(rr->rr_ranges,
300 	    rr->rr_count * sizeof(task_restartable_range_t));
301 
302 	head = &rr_hash[rr->rr_hash % RR_HASH_SIZE];
303 
304 	rr_lock();
305 	queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) {
306 		if (_restartable_ranges_equals(rr, rr_found) &&
307 		os_ref_retain_try(&rr_found->rr_ref)) {
308 			goto found;
309 		}
310 	}
311 
312 	enqueue_tail(head, &rr->rr_link);
313 	rr_found = rr;
314 
315 found:
316 	if (rr_base && os_ref_release_relaxed(&rr_base->rr_ref) == 0) {
317 		remqueue(&rr_base->rr_link);
318 	} else {
319 		rr_base = NULL;
320 	}
321 	rr_unlock();
322 
323 	*rr_storage = rr_found;
324 
325 	if (rr_found != rr) {
326 		_restartable_ranges_dispose(rr, false);
327 	}
328 	if (rr_base) {
329 		_restartable_ranges_dispose(rr_base, false);
330 	}
331 	return KERN_SUCCESS;
332 }
333 
334 #pragma mark extern interfaces
335 
336 void
restartable_ranges_release(struct restartable_ranges * rr)337 restartable_ranges_release(struct restartable_ranges *rr)
338 {
339 	if (os_ref_release_relaxed(&rr->rr_ref) == 0) {
340 		_restartable_ranges_dispose(rr, true);
341 	}
342 }
343 
344 void
thread_reset_pcs_ast(task_t task,thread_t thread)345 thread_reset_pcs_ast(task_t task, thread_t thread)
346 {
347 	struct restartable_ranges *rr;
348 	mach_vm_address_t pc;
349 
350 	/*
351 	 * Because restartable_ranges are set while the task only has on thread
352 	 * and can't be mutated outside of this, no lock is required to read this.
353 	 */
354 	rr = task->restartable_ranges;
355 	if (thread->active && rr) {
356 		pc = _ranges_lookup(rr, machine_thread_pc(thread));
357 
358 		if (pc) {
359 			machine_thread_reset_pc(thread, pc);
360 		}
361 	}
362 }
363 
364 void
restartable_init(void)365 restartable_init(void)
366 {
367 	for (size_t i = 0; i < RR_HASH_SIZE; i++) {
368 		queue_head_init(rr_hash[i]);
369 	}
370 }
371 
372 #pragma mark MiG interfaces
373 
374 kern_return_t
task_restartable_ranges_register(task_t task,task_restartable_range_t * ranges,mach_msg_type_number_t count)375 task_restartable_ranges_register(
376 	task_t                    task,
377 	task_restartable_range_t *ranges,
378 	mach_msg_type_number_t    count)
379 {
380 	kern_return_t kr;
381 	thread_t th;
382 
383 	if (task != current_task()) {
384 		return KERN_FAILURE;
385 	}
386 
387 
388 	kr = _ranges_validate(task, ranges, count);
389 
390 	if (kr == KERN_SUCCESS) {
391 		task_lock(task);
392 
393 		queue_iterate(&task->threads, th, thread_t, task_threads) {
394 			if (th != current_thread()) {
395 				kr = KERN_NOT_SUPPORTED;
396 				break;
397 			}
398 		}
399 #if !DEBUG && !DEVELOPMENT
400 		/*
401 		 * For security reasons, on release kernels, only allow for this to be
402 		 * configured once.
403 		 *
404 		 * But to be able to test the feature we need to relax this for
405 		 * dev kernels.
406 		 */
407 		if (task->restartable_ranges) {
408 			kr = KERN_NOT_SUPPORTED;
409 		}
410 #endif
411 		if (kr == KERN_SUCCESS) {
412 			kr = _restartable_ranges_create(task, ranges, count,
413 			    &task->restartable_ranges);
414 		}
415 		task_unlock(task);
416 	}
417 
418 	return kr;
419 }
420 
421 kern_return_t
task_restartable_ranges_synchronize(task_t task)422 task_restartable_ranges_synchronize(task_t task)
423 {
424 	ast_gen_t gens[MAX_CPUS] = {0};
425 	thread_pri_floor_t token;
426 	thread_t thread;
427 
428 	if (task != current_task()) {
429 		return KERN_FAILURE;
430 	}
431 
432 	/*
433 	 * When initiating a GC, artificially raise the priority for the
434 	 * duration of sending ASTs, we want to be preemptible, but this
435 	 * sequence has to terminate in a timely fashion.
436 	 */
437 	token = thread_priority_floor_start();
438 
439 	task_lock(task);
440 
441 	if (task->restartable_ranges) {
442 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
443 			if (thread != current_thread()) {
444 				act_set_ast_reset_pcs(thread, gens);
445 			}
446 		}
447 	}
448 
449 	task_unlock(task);
450 
451 	/*
452 	 * For any core that was running one of our threads,
453 	 * make sure the AST has been acknowledged.
454 	 *
455 	 * threads that aren't on core can't fail to see the AST
456 	 * we set, because act_set_ast_reset_pcs() takes
457 	 * the thread_lock() and so does the scheduler.
458 	 */
459 	ast_generation_wait(gens);
460 
461 	thread_priority_floor_end(&token);
462 
463 	return KERN_SUCCESS;
464 }
465