1 /*
2 * Copyright (c) 2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/task.h>
31
32 #include <kern/ast.h>
33 #include <kern/kalloc.h>
34 #include <kern/kern_types.h>
35 #include <kern/mach_param.h>
36 #include <kern/machine.h>
37 #include <kern/misc_protos.h>
38 #include <kern/processor.h>
39 #include <kern/queue.h>
40 #include <kern/restartable.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/waitq.h>
44
45 #include <os/hash.h>
46 #include <os/refcnt.h>
47
48 /**
49 * @file osfmk/kern/restartable.c
50 *
51 * @brief
52 * This module implements restartable userspace functions.
53 *
54 * @discussion
55 * task_restartable_ranges_register() allows task to configure
56 * the restartable ranges, only once per task,
57 * before it has made its second thread.
58 *
59 * task_restartable_ranges_synchronize() can later be used to trigger
60 * restarts for threads with a PC in a restartable region.
61 *
62 * It is implemented with an AST (AST_RESET_PCS) that will cause threads
63 * as they return to userspace to reset PCs in a restartable region
64 * to the recovery offset of this region.
65 *
66 * Because signal delivery would mask the proper saved PC for threads,
67 * sigreturn also forcefully sets the AST and will go through the logic
68 * every single time.
69 */
70
71 typedef int (*cmpfunc_t)(const void *a, const void *b);
72 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
73
74 #define RR_RANGES_MAX 64
75 struct restartable_ranges {
76 queue_chain_t rr_link;
77 os_refcnt_t rr_ref;
78 uint32_t rr_count;
79 uint32_t rr_hash;
80 task_restartable_range_t rr_ranges[RR_RANGES_MAX];
81 };
82
83 #if DEBUG || DEVELOPMENT
84 #define RR_HASH_SIZE 256
85 #else
86 // Release kernel userspace should have shared caches and a single registration
87 #define RR_HASH_SIZE 16
88 #endif
89
90 static queue_head_t rr_hash[RR_HASH_SIZE];
91 LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges");
92 LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp);
93
94 #define rr_lock() lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp)
95 #define rr_unlock() lck_spin_unlock(&rr_spinlock);
96
97 #pragma mark internals
98
99 /**
100 * @function _ranges_cmp
101 *
102 * @brief
103 * Compares two ranges together.
104 */
105 static int
_ranges_cmp(const void * _r1,const void * _r2)106 _ranges_cmp(const void *_r1, const void *_r2)
107 {
108 const task_restartable_range_t *r1 = _r1;
109 const task_restartable_range_t *r2 = _r2;
110
111 if (r1->location != r2->location) {
112 return r1->location < r2->location ? -1 : 1;
113 }
114 if (r1->length == r2->length) {
115 return 0;
116 }
117 return r1->length < r2->length ? -1 : 1;
118 }
119
120 /**
121 * @function _ranges_validate
122 *
123 * @brief
124 * Validates an array of PC ranges for wraps and intersections.
125 *
126 * @discussion
127 * This sorts and modifies the input.
128 *
129 * The ranges must:
130 * - not wrap around,
131 * - have a length/recovery offset within a page of the range start
132 *
133 * @returns
134 * - KERN_SUCCESS: ranges are valid
135 * - KERN_INVALID_ARGUMENT: ranges are invalid
136 */
137 static kern_return_t
_ranges_validate(task_t task,task_restartable_range_t * ranges,uint32_t count)138 _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count)
139 {
140 qsort(ranges, count, sizeof(task_restartable_range_t), _ranges_cmp);
141 uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX;
142 uint64_t end, recovery;
143
144 if (count == 0) {
145 return KERN_INVALID_ARGUMENT;
146 }
147
148 for (size_t i = 0; i < count; i++) {
149 if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX ||
150 ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) {
151 return KERN_INVALID_ARGUMENT;
152 }
153 if (ranges[i].flags) {
154 return KERN_INVALID_ARGUMENT;
155 }
156 if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) {
157 return KERN_INVALID_ARGUMENT;
158 }
159 if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) {
160 return KERN_INVALID_ARGUMENT;
161 }
162 if (ranges[i].location > limit || end > limit || recovery > limit) {
163 return KERN_INVALID_ARGUMENT;
164 }
165 if (i + 1 < count && end > ranges[i + 1].location) {
166 return KERN_INVALID_ARGUMENT;
167 }
168 }
169
170 return KERN_SUCCESS;
171 }
172
173 /**
174 * @function _ranges_lookup
175 *
176 * @brief
177 * Lookup the left side of a range for a given PC within a set of ranges.
178 *
179 * @returns
180 * - 0: no PC range found
181 * - the left-side of the range.
182 */
183 __attribute__((always_inline))
184 static mach_vm_address_t
_ranges_lookup(struct restartable_ranges * rr,mach_vm_address_t pc)185 _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc)
186 {
187 task_restartable_range_t *ranges = rr->rr_ranges;
188 uint32_t l = 0, r = rr->rr_count;
189
190 if (pc <= ranges[0].location) {
191 return 0;
192 }
193 if (pc >= ranges[r - 1].location + ranges[r - 1].length) {
194 return 0;
195 }
196
197 while (l < r) {
198 uint32_t i = (r + l) / 2;
199 mach_vm_address_t location = ranges[i].location;
200
201 if (pc <= location) {
202 /* if the PC is exactly at pc_start, no reset is needed */
203 r = i;
204 } else if (location + ranges[i].length <= pc) {
205 /* if the PC is exactly at the end, it's out of the function */
206 l = i + 1;
207 } else {
208 /* else it's strictly in the range, return the recovery pc */
209 return location + ranges[i].recovery_offs;
210 }
211 }
212
213 return 0;
214 }
215
216 /**
217 * @function _restartable_ranges_dispose
218 *
219 * @brief
220 * Helper to dispose of a range that has reached a 0 refcount.
221 */
222 __attribute__((noinline))
223 static void
_restartable_ranges_dispose(struct restartable_ranges * rr,bool hash_remove)224 _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove)
225 {
226 if (hash_remove) {
227 rr_lock();
228 remqueue(&rr->rr_link);
229 rr_unlock();
230 }
231 kfree_type(struct restartable_ranges, rr);
232 }
233
234 /**
235 * @function _restartable_ranges_equals
236 *
237 * @brief
238 * Helper to compare two restartable ranges.
239 */
240 static bool
_restartable_ranges_equals(const struct restartable_ranges * rr1,const struct restartable_ranges * rr2)241 _restartable_ranges_equals(
242 const struct restartable_ranges *rr1,
243 const struct restartable_ranges *rr2)
244 {
245 size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t);
246 return rr1->rr_hash == rr2->rr_hash &&
247 rr1->rr_count == rr2->rr_count &&
248 memcmp(rr1->rr_ranges, rr2->rr_ranges, rr1_size) == 0;
249 }
250
251 /**
252 * @function _restartable_ranges_create
253 *
254 * @brief
255 * Helper to create a uniqued restartable range.
256 *
257 * @returns
258 * - KERN_SUCCESS
259 * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed.
260 * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory
261 */
262 static kern_return_t
_restartable_ranges_create(task_t task,task_restartable_range_t * ranges,uint32_t count,struct restartable_ranges ** rr_storage)263 _restartable_ranges_create(task_t task, task_restartable_range_t *ranges,
264 uint32_t count, struct restartable_ranges **rr_storage)
265 {
266 struct restartable_ranges *rr, *rr_found, *rr_base;
267 queue_head_t *head;
268 uint32_t base_count, total_count;
269 size_t base_size, size;
270 kern_return_t kr;
271
272 rr_base = *rr_storage;
273 base_count = rr_base ? rr_base->rr_count : 0;
274 base_size = sizeof(task_restartable_range_t) * base_count;
275 size = sizeof(task_restartable_range_t) * count;
276
277 if (os_add_overflow(base_count, count, &total_count)) {
278 return KERN_INVALID_ARGUMENT;
279 }
280 if (total_count > RR_RANGES_MAX) {
281 return KERN_RESOURCE_SHORTAGE;
282 }
283
284 rr = kalloc_type(struct restartable_ranges,
285 (zalloc_flags_t) (Z_WAITOK | Z_ZERO | Z_NOFAIL));
286
287 queue_chain_init(rr->rr_link);
288 os_ref_init(&rr->rr_ref, NULL);
289 rr->rr_count = total_count;
290 if (base_size) {
291 memcpy(rr->rr_ranges, rr_base->rr_ranges, base_size);
292 }
293 memcpy(rr->rr_ranges + base_count, ranges, size);
294 kr = _ranges_validate(task, rr->rr_ranges, total_count);
295 if (kr) {
296 _restartable_ranges_dispose(rr, false);
297 return kr;
298 }
299 rr->rr_hash = os_hash_jenkins(rr->rr_ranges,
300 rr->rr_count * sizeof(task_restartable_range_t));
301
302 head = &rr_hash[rr->rr_hash % RR_HASH_SIZE];
303
304 rr_lock();
305 queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) {
306 if (_restartable_ranges_equals(rr, rr_found) &&
307 os_ref_retain_try(&rr_found->rr_ref)) {
308 goto found;
309 }
310 }
311
312 enqueue_tail(head, &rr->rr_link);
313 rr_found = rr;
314
315 found:
316 if (rr_base && os_ref_release_relaxed(&rr_base->rr_ref) == 0) {
317 remqueue(&rr_base->rr_link);
318 } else {
319 rr_base = NULL;
320 }
321 rr_unlock();
322
323 *rr_storage = rr_found;
324
325 if (rr_found != rr) {
326 _restartable_ranges_dispose(rr, false);
327 }
328 if (rr_base) {
329 _restartable_ranges_dispose(rr_base, false);
330 }
331 return KERN_SUCCESS;
332 }
333
334 #pragma mark extern interfaces
335
336 void
restartable_ranges_release(struct restartable_ranges * rr)337 restartable_ranges_release(struct restartable_ranges *rr)
338 {
339 if (os_ref_release_relaxed(&rr->rr_ref) == 0) {
340 _restartable_ranges_dispose(rr, true);
341 }
342 }
343
344 void
thread_reset_pcs_ast(task_t task,thread_t thread)345 thread_reset_pcs_ast(task_t task, thread_t thread)
346 {
347 struct restartable_ranges *rr;
348 mach_vm_address_t pc;
349
350 /*
351 * Because restartable_ranges are set while the task only has on thread
352 * and can't be mutated outside of this, no lock is required to read this.
353 */
354 rr = task->restartable_ranges;
355 if (thread->active && rr) {
356 pc = _ranges_lookup(rr, machine_thread_pc(thread));
357
358 if (pc) {
359 machine_thread_reset_pc(thread, pc);
360 }
361 }
362 }
363
364 void
restartable_init(void)365 restartable_init(void)
366 {
367 for (size_t i = 0; i < RR_HASH_SIZE; i++) {
368 queue_head_init(rr_hash[i]);
369 }
370 }
371
372 #pragma mark MiG interfaces
373
374 kern_return_t
task_restartable_ranges_register(task_t task,task_restartable_range_t * ranges,mach_msg_type_number_t count)375 task_restartable_ranges_register(
376 task_t task,
377 task_restartable_range_t *ranges,
378 mach_msg_type_number_t count)
379 {
380 kern_return_t kr;
381 thread_t th;
382
383 if (task != current_task()) {
384 return KERN_FAILURE;
385 }
386
387
388 kr = _ranges_validate(task, ranges, count);
389
390 if (kr == KERN_SUCCESS) {
391 task_lock(task);
392
393 queue_iterate(&task->threads, th, thread_t, task_threads) {
394 if (th != current_thread()) {
395 kr = KERN_NOT_SUPPORTED;
396 break;
397 }
398 }
399 #if !DEBUG && !DEVELOPMENT
400 /*
401 * For security reasons, on release kernels, only allow for this to be
402 * configured once.
403 *
404 * But to be able to test the feature we need to relax this for
405 * dev kernels.
406 */
407 if (task->restartable_ranges) {
408 kr = KERN_NOT_SUPPORTED;
409 }
410 #endif
411 if (kr == KERN_SUCCESS) {
412 kr = _restartable_ranges_create(task, ranges, count,
413 &task->restartable_ranges);
414 }
415 task_unlock(task);
416 }
417
418 return kr;
419 }
420
421 kern_return_t
task_restartable_ranges_synchronize(task_t task)422 task_restartable_ranges_synchronize(task_t task)
423 {
424 ast_gen_t gens[MAX_CPUS] = {0};
425 thread_pri_floor_t token;
426 thread_t thread;
427
428 if (task != current_task()) {
429 return KERN_FAILURE;
430 }
431
432 /*
433 * When initiating a GC, artificially raise the priority for the
434 * duration of sending ASTs, we want to be preemptible, but this
435 * sequence has to terminate in a timely fashion.
436 */
437 token = thread_priority_floor_start();
438
439 task_lock(task);
440
441 if (task->restartable_ranges) {
442 queue_iterate(&task->threads, thread, thread_t, task_threads) {
443 if (thread != current_thread()) {
444 act_set_ast_reset_pcs(thread, gens);
445 }
446 }
447 }
448
449 task_unlock(task);
450
451 /*
452 * For any core that was running one of our threads,
453 * make sure the AST has been acknowledged.
454 *
455 * threads that aren't on core can't fail to see the AST
456 * we set, because act_set_ast_reset_pcs() takes
457 * the thread_lock() and so does the scheduler.
458 */
459 ast_generation_wait(gens);
460
461 thread_priority_floor_end(&token);
462
463 return KERN_SUCCESS;
464 }
465