1 /*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/sysctl.h>
30
31 #include <kern/cpu_data.h>
32
33 #if __arm64__
34 #include <arm/machine_routines.h>
35 #endif /* __arm64__ */
36
37 #if CONFIG_DEBUG_SYSCALL_REJECTION
38
39 #include <mach/mach_time.h>
40
41 #include <kern/bits.h>
42 #include <kern/clock.h>
43 #include <kern/exc_guard.h>
44 #include <kern/exception.h>
45 #include <kern/kalloc.h>
46 #include <kern/simple_lock.h>
47 #include <kern/startup.h>
48 #include <kern/syscall_sw.h>
49 #include <kern/task.h>
50
51 #include <pexpert/pexpert.h>
52
53 #include <sys/syscall.h>
54 #include <sys/sysent.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57 #include <sys/user.h>
58 #include <sys/variant_internal.h>
59 #include <sys/reason.h>
60
61 #include <sys/kern_debug.h>
62
63 #define SYSCALL_REJECTION_MODE_IGNORE 0
64 #define SYSCALL_REJECTION_MODE_GUARD 1
65 #define SYSCALL_REJECTION_MODE_CRASH 2
66
67 TUNABLE_WRITEABLE(int, debug_syscall_rejection_mode, "syscall_rejection_mode",
68 #if DEVELOPMENT || DEBUG
69 SYSCALL_REJECTION_MODE_GUARD
70 #else
71 SYSCALL_REJECTION_MODE_IGNORE
72 #endif
73 );
74
75 static int
sysctl_debug_syscall_rejection_mode(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)76 sysctl_debug_syscall_rejection_mode(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
77 struct sysctl_req *req)
78 {
79 int error, changed;
80 int value = *(int *) arg1;
81
82 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
83 return ENOTSUP;
84 }
85
86 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
87 if (!error && changed) {
88 debug_syscall_rejection_mode = value;
89 }
90 return error;
91 }
92
93 void
reset_debug_syscall_rejection_mode(void)94 reset_debug_syscall_rejection_mode(void)
95 {
96 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
97 debug_syscall_rejection_mode = 0;
98 }
99 }
100
101 SYSCTL_PROC(_kern, OID_AUTO, debug_syscall_rejection_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
102 &debug_syscall_rejection_mode, 0, sysctl_debug_syscall_rejection_mode, "I", "0: ignore, 1: non-fatal, 2: crash");
103
104
105 static size_t const predefined_masks = 2; // 0: null mask (all 0), 1: all mask (all 1)
106
107 /*
108 * The number of masks is derived from the mask selector width:
109 *
110 * A selector is just made of an index into syscall_rejection_masks,
111 * with the exception of the highest bit, which indicates whether the
112 * mask is to be added as an "allow" mask or a "deny" mask.
113 * Additionally, predefined masks don't actually have storage and are
114 * handled specially, so syscall_rejection_masks starts with the first
115 * non-predefined mask (and is sized appropriately).
116 */
117 static size_t const syscall_rejection_mask_count = SYSCALL_REJECTION_SELECTOR_MASK_COUNT - predefined_masks;
118 static syscall_rejection_mask_t syscall_rejection_masks[syscall_rejection_mask_count];
119
120 #define SR_MASK_SIZE (BITMAP_SIZE(mach_trap_count + nsysent))
121
122 static LCK_GRP_DECLARE(syscall_rejection_lck_grp, "syscall rejection lock");
123 static LCK_MTX_DECLARE(syscall_rejection_mtx, &syscall_rejection_lck_grp);
124
125 bool
debug_syscall_rejection_handle(int syscall_mach_trap_number)126 debug_syscall_rejection_handle(int syscall_mach_trap_number)
127 {
128 uthread_t ut = current_uthread();
129 uint64_t const flags = ut->syscall_rejection_flags;
130 bool fatal = (bool)(flags & SYSCALL_REJECTION_FLAGS_FORCE_FATAL);
131
132 switch (debug_syscall_rejection_mode) {
133 case SYSCALL_REJECTION_MODE_IGNORE:
134 if (!fatal) {
135 /* ignore */
136 break;
137 }
138 OS_FALLTHROUGH;
139 case SYSCALL_REJECTION_MODE_CRASH:
140 fatal = true;
141 OS_FALLTHROUGH;
142 case SYSCALL_REJECTION_MODE_GUARD: {
143 if (flags & SYSCALL_REJECTION_FLAGS_ONCE) {
144 int const number = syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : (mach_trap_count + syscall_mach_trap_number);
145
146 // don't trip on this system call again
147 bitmap_set(ut->syscall_rejection_mask, number);
148 bitmap_set(ut->syscall_rejection_once_mask, number);
149 }
150
151 mach_exception_code_t code = 0;
152 EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_REJECTED_SC);
153 EXC_GUARD_ENCODE_FLAVOR(code, 0);
154 EXC_GUARD_ENCODE_TARGET(code, syscall_mach_trap_number < 0);
155 mach_exception_subcode_t subcode =
156 syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : syscall_mach_trap_number;
157
158 if (!fatal) {
159 task_violated_guard(code, subcode, NULL, TRUE);
160 } else {
161 thread_guard_violation(current_thread(), code, subcode, fatal);
162 }
163 break;
164 };
165 default:
166 /* ignore */
167 ;
168 }
169 return fatal;
170 }
171
172 void
rejected_syscall_guard_ast(thread_t t,mach_exception_data_type_t code,mach_exception_data_type_t subcode)173 rejected_syscall_guard_ast(
174 thread_t t,
175 mach_exception_data_type_t code,
176 mach_exception_data_type_t subcode)
177 {
178 const bool fatal = true;
179 /*
180 * Check if anyone has registered for Synchronous EXC_GUARD, if yes then,
181 * deliver it synchronously and then kill the process, else kill the process
182 * and deliver the exception via EXC_CORPSE_NOTIFY. Always kill the process if we are not in dev mode.
183 */
184
185 int flags = PX_DEBUG_NO_HONOR;
186 exception_info_t info = {
187 .os_reason = OS_REASON_GUARD,
188 .exception_type = EXC_GUARD,
189 .mx_code = code,
190 .mx_subcode = subcode,
191 };
192
193 if (task_exception_notify(EXC_GUARD, code, subcode, fatal) == KERN_SUCCESS) {
194 psignal_uthread(t, SIGSYS);
195 } else {
196 exit_with_mach_exception(current_proc(), info, flags);
197 }
198 }
199
200
201 static void
_syscall_rejection_apply_mask(syscall_rejection_mask_t dest,const syscall_rejection_mask_t src,bool apply_as_allow)202 _syscall_rejection_apply_mask(syscall_rejection_mask_t dest, const syscall_rejection_mask_t src, bool apply_as_allow)
203 {
204 assert(dest != NULL);
205 assert(src != NULL);
206
207 if (apply_as_allow) {
208 bitmap_or(dest, dest, src, mach_trap_count + nsysent);
209 } else {
210 bitmap_and_not(dest, dest, src, mach_trap_count + nsysent);
211 }
212 }
213
214 /*
215 * The masks to apply are passed to the kernel as packed selectors,
216 * which are just however many of the selector data type fit into one
217 * (or more) fields of the natural word size (i.e. a register). This
218 * avoids copying from user space.
219 *
220 * More specifically, at the time of this writing, a selector is 7
221 * bits wide, and there are two uint64_t arguments
222 * (args->packed_selectors<n>), so up to 18 selectors can be
223 * specified, which are then stuffed into the 128 bits of the
224 * arguments. If less than 18 masks are requested to be applied, the
225 * remaining selectors will just be left as 0, which naturally
226 * resolves as the "empty" or "NULL" mask that changes nothing.
227 *
228 * The libsyscall wrapper provides a more convenient interface where
229 * an array (up to 18 elements long) and its length are passed in,
230 * which the wrapper then packs into packed_selectors of the actual
231 * system call.
232 */
233
234 int
sys_debug_syscall_reject_config(struct proc * p __unused,struct debug_syscall_reject_config_args * args,int * retval)235 sys_debug_syscall_reject_config(struct proc *p __unused, struct debug_syscall_reject_config_args *args, int *retval)
236 {
237 int error = 0;
238
239 *retval = 0;
240
241 uthread_t ut = current_uthread();
242
243 bitmap_t mask[SR_MASK_SIZE / sizeof(bitmap_t)];
244 // syscall rejection masks are always reset to "deny all"
245 memset(mask, 0, SR_MASK_SIZE);
246
247 lck_mtx_lock(&syscall_rejection_mtx);
248
249 for (int i = 0;
250 i + SYSCALL_REJECTION_SELECTOR_BITS < (sizeof(args->packed_selectors1) + sizeof(args->packed_selectors2)) * 8;
251 i += SYSCALL_REJECTION_SELECTOR_BITS) {
252 #define s_left_shift(x, n) ((n) < 0 ? ((x) >> -(n)) : ((x) << (n)))
253
254 syscall_rejection_selector_t const selector = (syscall_rejection_selector_t)
255 (((i < 64 ? (args->packed_selectors1 >> i) : 0) |
256 (i > 64 - SYSCALL_REJECTION_SELECTOR_BITS ? s_left_shift(args->packed_selectors2, 64 - i) : 0)) & SYSCALL_REJECTION_SELECTOR_MASK);
257 bool const is_allow_mask = selector & SYSCALL_REJECTION_IS_ALLOW_MASK;
258 int const mask_index = selector & SYSCALL_REJECTION_INDEX_MASK;
259
260 if (mask_index == SYSCALL_REJECTION_NULL) {
261 // mask 0 is always empty (nothing to apply)
262 continue;
263 }
264
265 if (mask_index == SYSCALL_REJECTION_ALL) {
266 // mask 1 is always full (overrides everything)
267 memset(mask, is_allow_mask ? 0xff : 0x00, SR_MASK_SIZE);
268 continue;
269 }
270
271 syscall_rejection_mask_t mask_to_apply = syscall_rejection_masks[mask_index - predefined_masks];
272
273 if (mask_to_apply == NULL) {
274 error = ENOENT;
275 goto out_locked;
276 }
277
278 _syscall_rejection_apply_mask(mask, mask_to_apply, is_allow_mask);
279 }
280
281 /* Not RT-safe, but only necessary once. */
282 if (ut->syscall_rejection_mask == NULL) {
283 ut->syscall_rejection_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
284
285 if (ut->syscall_rejection_mask == NULL) {
286 error = ENOMEM;
287 goto out_locked;
288 }
289 }
290
291 memcpy(ut->syscall_rejection_mask, mask, SR_MASK_SIZE);
292
293 if ((args->flags & SYSCALL_REJECTION_FLAGS_ONCE)) {
294 if (ut->syscall_rejection_once_mask == NULL) {
295 ut->syscall_rejection_once_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
296
297 if (ut->syscall_rejection_once_mask == NULL) {
298 kfree_data(ut->syscall_rejection_mask, SR_MASK_SIZE);
299 ut->syscall_rejection_mask = NULL;
300 error = ENOMEM;
301 goto out_locked;
302 }
303
304 memset(ut->syscall_rejection_once_mask, 0, SR_MASK_SIZE);
305 } else {
306 // prevent the already hit syscalls from hitting again.
307 bitmap_or(ut->syscall_rejection_mask, ut->syscall_rejection_mask, ut->syscall_rejection_once_mask, mach_trap_count + nsysent);
308 }
309 }
310
311 out_locked:
312 lck_mtx_unlock(&syscall_rejection_mtx);
313
314 if (error == 0) {
315 ut->syscall_rejection_flags = args->flags;
316 }
317
318 if (error == ENOENT && debug_syscall_rejection_mode == SYSCALL_REJECTION_MODE_IGNORE) {
319 /* Existing code may rely on the system call failing
320 * gracefully if syscall rejection is currently off. */
321 error = 0;
322 }
323
324 return error;
325 }
326
327 /*
328 * debug_syscall_reject
329 *
330 * Compatibility interface to the old form of the system call.
331 */
332 int
debug_syscall_reject(struct proc * p,struct debug_syscall_reject_args * args,int * retval)333 debug_syscall_reject(struct proc *p, struct debug_syscall_reject_args *args, int *retval)
334 {
335 struct debug_syscall_reject_config_args new_args;
336
337 bzero(&new_args, sizeof(new_args));
338 new_args.packed_selectors1 = args->packed_selectors;
339 // packed_selectors2 left empty
340 new_args.flags = SYSCALL_REJECTION_FLAGS_DEFAULT;
341
342 return sys_debug_syscall_reject_config(p, &new_args, retval);
343 }
344
345
346 static bool
_syscall_rejection_add(syscall_rejection_mask_t dst,char const * name)347 _syscall_rejection_add(syscall_rejection_mask_t dst, char const *name)
348 {
349 /*
350 * Yes, this function is O(n+m), making the whole act of setting a
351 * mask O(l*(n+m)), but defining masks is done rarely enough (and
352 * i, n and m small enough) for this to not matter.
353 */
354
355 for (int i = 0; i < mach_trap_count; i++) {
356 if (strcmp(mach_syscall_name_table[i], name) == 0) {
357 bitmap_set(dst, i);
358 return true;
359 }
360 }
361
362 extern char const *syscallnames[];
363
364 for (int i = 0; i < nsysent; i++) {
365 if (strcmp(syscallnames[i], name) == 0) {
366 bitmap_set(dst, i + mach_trap_count);
367 return true;
368 }
369 }
370
371 printf("%s: trying to add non-existing syscall/mach trap '%s'\n", __func__, name);
372 return false;
373 }
374
375 /* Pretty much arbitrary, we just don't want userspace to pass
376 * unreasonably large buffers to parse. */
377 static size_t const max_input_size = 16 * PAGE_MAX_SIZE;
378
379 static int
_sysctl_debug_syscall_rejection_masks(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)380 _sysctl_debug_syscall_rejection_masks(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
381 struct sysctl_req *req)
382 {
383 size_t const max_name_len = 128;
384 char name[max_name_len];
385
386 if (req->newptr == 0) {
387 return 0;
388 }
389
390 if (req->newlen > max_input_size) {
391 return E2BIG;
392 }
393
394 size_t const len = req->newlen;
395 char *buf = kalloc_data(len + 1, Z_WAITOK);
396
397 if (buf == NULL) {
398 return ENOMEM;
399 }
400
401 /*
402 * sysctl_io_string always copies out the given buffer as the
403 * "old" value if requested. We could construct a text
404 * representation of existing masks, but this is not particularly
405 * interesting, so we just return the dummy string "<masks>".
406 */
407 strlcpy(buf, "<masks>", len + 1);
408 int changed = 0;
409 int error = sysctl_io_string(req, buf, len + 1, 0, &changed);
410
411 if (error != 0 || !changed) {
412 goto out;
413 }
414
415 char const *p = buf;
416
417 int id = 0;
418 int l = 0;
419 int n = sscanf(p, "%i: %n", &id, &l);
420
421 if (n != 1 || id < predefined_masks || id > syscall_rejection_mask_count + predefined_masks) {
422 printf("%s: invalid mask id %i (or conversion failed)\n", __FUNCTION__, id);
423 error = EINVAL;
424 goto out;
425 }
426
427 p += l;
428
429 syscall_rejection_mask_t new_mask = kalloc_data(SR_MASK_SIZE,
430 Z_WAITOK | Z_ZERO);
431 if (new_mask == NULL) {
432 printf("%s: allocating new mask for id %i failed\n", __FUNCTION__, id);
433 error = ENOMEM;
434 goto out;
435 }
436
437 error = 0;
438
439 while (p < buf + len && *p != 0) {
440 name[0] = 0;
441 n = sscanf(p, "%127s %n", name, &l);
442 if (n != 1 || name[0] == 0) {
443 error = EINVAL;
444 kfree_data(new_mask, SR_MASK_SIZE);
445 goto out;
446 }
447
448 if (!_syscall_rejection_add(new_mask, name)) {
449 error = ENOENT;
450 kfree_data(new_mask, SR_MASK_SIZE);
451 goto out;
452 }
453
454 p += l;
455 }
456
457
458 syscall_rejection_mask_t to_free = NULL;
459
460 lck_mtx_lock(&syscall_rejection_mtx);
461
462 syscall_rejection_mask_t *target_mask = &syscall_rejection_masks[id - predefined_masks];
463
464 to_free = *target_mask;
465 *target_mask = new_mask;
466
467 lck_mtx_unlock(&syscall_rejection_mtx);
468
469 kfree_data(to_free, SR_MASK_SIZE);
470 out:
471
472 kfree_data(buf, len + 1);
473 return error;
474 }
475
476 SYSCTL_PROC(_kern, OID_AUTO, syscall_rejection_masks, CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
477 0, 0, _sysctl_debug_syscall_rejection_masks, "A", "system call rejection masks");
478
479 #else /* CONFIG_DEBUG_SYSCALL_REJECTION */
480
481 #include <sys/kern_debug.h>
482
483 int
sys_debug_syscall_reject_config(struct proc * __unused p,struct debug_syscall_reject_config_args * __unused args,int __unused * ret)484 sys_debug_syscall_reject_config(struct proc * __unused p, struct debug_syscall_reject_config_args * __unused args, int __unused *ret)
485 {
486 /* not supported. */
487 return ENOTSUP;
488 }
489
490 int
debug_syscall_reject(struct proc * __unused p,struct debug_syscall_reject_args * __unused args,int * __unused retval)491 debug_syscall_reject(struct proc * __unused p, struct debug_syscall_reject_args * __unused args, int * __unused retval)
492 {
493 /* not supported. */
494 return ENOTSUP;
495 }
496
497 void
reset_debug_syscall_rejection_mode(void)498 reset_debug_syscall_rejection_mode(void)
499 {
500 /* not supported. */
501 }
502
503 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
504
505 #if __arm64__ && (DEBUG || DEVELOPMENT)
506
507 static void
_spinfor(uint64_t nanoseconds)508 _spinfor(uint64_t nanoseconds)
509 {
510 uint64_t mt = 0;
511 nanoseconds_to_absolutetime(nanoseconds, &mt);
512
513 uint64_t start = mach_absolute_time();
514
515 while (mach_absolute_time() < start + mt) {
516 // Spinning.
517 }
518 }
519
520 static int
_sysctl_debug_disable_interrupts_test(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)521 _sysctl_debug_disable_interrupts_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
522 struct sysctl_req *req)
523 {
524 int error = 0;
525
526 if (req->newptr == 0) {
527 goto out;
528 }
529
530 uint64_t val = 0;
531 error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
532
533 if (error != 0 || val == 0) {
534 goto out;
535 }
536
537 boolean_t istate = ml_set_interrupts_enabled(false);
538 _spinfor(val);
539 ml_set_interrupts_enabled(istate);
540
541 out:
542 return error;
543 }
544
545 static int
_sysctl_debug_disable_preemption_test(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)546 _sysctl_debug_disable_preemption_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
547 struct sysctl_req *req)
548 {
549 int error = 0;
550
551 if (req->newptr == 0) {
552 goto out;
553 }
554
555 uint64_t val = 0;
556 error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
557
558 if (error != 0 || val == 0) {
559 goto out;
560 }
561
562 disable_preemption();
563 _spinfor(val);
564 enable_preemption();
565
566 out:
567 return error;
568 }
569
570 SYSCTL_PROC(_kern, OID_AUTO, debug_disable_interrupts_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
571 0, 0, _sysctl_debug_disable_interrupts_test, "Q", "disable interrupts for specified number of nanoseconds, for testing");
572
573 SYSCTL_PROC(_kern, OID_AUTO, debug_disable_preemption_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
574 0, 0, _sysctl_debug_disable_preemption_test, "Q", "disable preemption for specified number of nanoseconds, for testing");
575
576 #endif /* __arm64__ && (DEBUG || DEVELOPMENT) */
577