xref: /xnu-10002.61.3/bsd/kern/kern_debug.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/sysctl.h>
30 
31 #include <kern/cpu_data.h>
32 
33 #if __arm64__
34 #include <arm/machine_routines.h>
35 #endif /* __arm64__ */
36 
37 #if CONFIG_DEBUG_SYSCALL_REJECTION
38 
39 #include <mach/mach_time.h>
40 
41 #include <kern/bits.h>
42 #include <kern/clock.h>
43 #include <kern/exc_guard.h>
44 #include <kern/exception.h>
45 #include <kern/kalloc.h>
46 #include <kern/simple_lock.h>
47 #include <kern/startup.h>
48 #include <kern/syscall_sw.h>
49 #include <kern/task.h>
50 
51 #include <pexpert/pexpert.h>
52 
53 #include <sys/syscall.h>
54 #include <sys/sysent.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57 #include <sys/user.h>
58 #include <sys/variant_internal.h>
59 
60 #include <sys/kern_debug.h>
61 
62 #define SYSCALL_REJECTION_MODE_IGNORE   0
63 #define SYSCALL_REJECTION_MODE_GUARD    1
64 #define SYSCALL_REJECTION_MODE_CRASH    2
65 
66 TUNABLE_WRITEABLE(int, debug_syscall_rejection_mode, "syscall_rejection_mode",
67 #if DEVELOPMENT || DEBUG
68     SYSCALL_REJECTION_MODE_GUARD
69 #else
70     SYSCALL_REJECTION_MODE_IGNORE
71 #endif
72     );
73 
74 static int
sysctl_debug_syscall_rejection_mode(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)75 sysctl_debug_syscall_rejection_mode(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
76     struct sysctl_req *req)
77 {
78 	int error, changed;
79 	int value = *(int *) arg1;
80 
81 	if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
82 		return ENOTSUP;
83 	}
84 
85 	error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
86 	if (!error && changed) {
87 		debug_syscall_rejection_mode = value;
88 	}
89 	return error;
90 }
91 
92 void
reset_debug_syscall_rejection_mode(void)93 reset_debug_syscall_rejection_mode(void)
94 {
95 	if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
96 		debug_syscall_rejection_mode = 0;
97 	}
98 }
99 
100 SYSCTL_PROC(_kern, OID_AUTO, debug_syscall_rejection_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
101     &debug_syscall_rejection_mode, 0, sysctl_debug_syscall_rejection_mode, "I", "0: ignore, 1: non-fatal, 2: crash");
102 
103 
104 static size_t const predefined_masks = 2; // 0: null mask (all 0), 1: all mask (all 1)
105 
106 /*
107  * The number of masks is derived from the mask selector width:
108  *
109  * A selector is just made of an index into syscall_rejection_masks,
110  * with the exception of the highest bit, which indicates whether the
111  * mask is to be added as an "allow" mask or a "deny" mask.
112  * Additionally, predefined masks don't actually have storage and are
113  * handled specially, so syscall_rejection_masks starts with the first
114  * non-predefined mask (and is sized appropriately).
115  */
116 static size_t const syscall_rejection_mask_count = SYSCALL_REJECTION_SELECTOR_MASK_COUNT - predefined_masks;
117 static syscall_rejection_mask_t syscall_rejection_masks[syscall_rejection_mask_count];
118 
119 #define SR_MASK_SIZE (BITMAP_SIZE(mach_trap_count + nsysent))
120 
121 static LCK_GRP_DECLARE(syscall_rejection_lck_grp, "syscall rejection lock");
122 static LCK_MTX_DECLARE(syscall_rejection_mtx, &syscall_rejection_lck_grp);
123 
124 bool
debug_syscall_rejection_handle(int syscall_mach_trap_number)125 debug_syscall_rejection_handle(int syscall_mach_trap_number)
126 {
127 	uthread_t ut = current_uthread();
128 	uint64_t const flags = ut->syscall_rejection_flags;
129 	bool fatal = (bool)(flags & SYSCALL_REJECTION_FLAGS_FORCE_FATAL);
130 
131 	switch (debug_syscall_rejection_mode) {
132 	case SYSCALL_REJECTION_MODE_IGNORE:
133 		if (!fatal) {
134 			/* ignore */
135 			break;
136 		}
137 		OS_FALLTHROUGH;
138 	case SYSCALL_REJECTION_MODE_CRASH:
139 		fatal = true;
140 		OS_FALLTHROUGH;
141 	case SYSCALL_REJECTION_MODE_GUARD: {
142 		if (flags & SYSCALL_REJECTION_FLAGS_ONCE) {
143 			int const number = syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : (mach_trap_count + syscall_mach_trap_number);
144 
145 			// don't trip on this system call again
146 			bitmap_set(ut->syscall_rejection_mask, number);
147 			bitmap_set(ut->syscall_rejection_once_mask, number);
148 		}
149 
150 		mach_exception_code_t code = 0;
151 		EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_REJECTED_SC);
152 		EXC_GUARD_ENCODE_FLAVOR(code, 0);
153 		EXC_GUARD_ENCODE_TARGET(code, syscall_mach_trap_number < 0);
154 		mach_exception_subcode_t subcode =
155 		    syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : syscall_mach_trap_number;
156 
157 		if (!fatal) {
158 			task_violated_guard(code, subcode, NULL, TRUE);
159 		} else {
160 			thread_guard_violation(current_thread(), code, subcode, fatal);
161 		}
162 		break;
163 	};
164 	default:
165 		/* ignore */
166 		;
167 	}
168 	return fatal;
169 }
170 
171 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
172     mach_exception_data_type_t subcode);
173 
174 void
rejected_syscall_guard_ast(thread_t t,mach_exception_data_type_t code,mach_exception_data_type_t subcode)175 rejected_syscall_guard_ast(
176 	thread_t t,
177 	mach_exception_data_type_t code,
178 	mach_exception_data_type_t subcode)
179 {
180 	const bool fatal = true;
181 	/*
182 	 * Check if anyone has registered for Synchronous EXC_GUARD, if yes then,
183 	 * deliver it synchronously and then kill the process, else kill the process
184 	 * and deliver the exception via EXC_CORPSE_NOTIFY. Always kill the process if we are not in dev mode.
185 	 */
186 	if (task_exception_notify(EXC_GUARD, code, subcode, fatal) == KERN_SUCCESS) {
187 		psignal_uthread(t, SIGSYS);
188 	} else {
189 		exit_with_guard_exception(current_proc(), code, subcode);
190 	}
191 }
192 
193 
194 static void
_syscall_rejection_apply_mask(syscall_rejection_mask_t dest,const syscall_rejection_mask_t src,bool apply_as_allow)195 _syscall_rejection_apply_mask(syscall_rejection_mask_t dest, const syscall_rejection_mask_t src, bool apply_as_allow)
196 {
197 	assert(dest != NULL);
198 	assert(src != NULL);
199 
200 	if (apply_as_allow) {
201 		bitmap_or(dest, dest, src, mach_trap_count + nsysent);
202 	} else {
203 		bitmap_and_not(dest, dest, src, mach_trap_count + nsysent);
204 	}
205 }
206 
207 /*
208  * The masks to apply are passed to the kernel as packed selectors,
209  * which are just however many of the selector data type fit into one
210  * (or more) fields of the natural word size (i.e. a register). This
211  * avoids copying from user space.
212  *
213  * More specifically, at the time of this writing, a selector is 7
214  * bits wide, and there are two uint64_t arguments
215  * (args->packed_selectors<n>), so up to 18 selectors can be
216  * specified, which are then stuffed into the 128 bits of the
217  * arguments. If less than 18 masks are requested to be applied, the
218  * remaining selectors will just be left as 0, which naturally
219  * resolves as the "empty" or "NULL" mask that changes nothing.
220  *
221  * The libsyscall wrapper provides a more convenient interface where
222  * an array (up to 18 elements long) and its length are passed in,
223  * which the wrapper then packs into packed_selectors of the actual
224  * system call.
225  */
226 
227 int
sys_debug_syscall_reject_config(struct proc * p __unused,struct debug_syscall_reject_config_args * args,int * retval)228 sys_debug_syscall_reject_config(struct proc *p __unused, struct debug_syscall_reject_config_args *args, int *retval)
229 {
230 	int error = 0;
231 
232 	*retval = 0;
233 
234 	uthread_t ut = current_uthread();
235 
236 	bitmap_t mask[SR_MASK_SIZE / sizeof(bitmap_t)];
237 	// syscall rejection masks are always reset to "deny all"
238 	memset(mask, 0, SR_MASK_SIZE);
239 
240 	lck_mtx_lock(&syscall_rejection_mtx);
241 
242 	for (int i = 0;
243 	    i + SYSCALL_REJECTION_SELECTOR_BITS < (sizeof(args->packed_selectors1) + sizeof(args->packed_selectors2)) * 8;
244 	    i += SYSCALL_REJECTION_SELECTOR_BITS) {
245 #define s_left_shift(x, n) ((n) < 0 ? ((x) >> -(n)) : ((x) << (n)))
246 
247 		syscall_rejection_selector_t const selector = (syscall_rejection_selector_t)
248 		    (((i < 64 ? (args->packed_selectors1 >> i) : 0) |
249 		    (i > 64 - SYSCALL_REJECTION_SELECTOR_BITS ? s_left_shift(args->packed_selectors2, 64 - i) : 0)) & SYSCALL_REJECTION_SELECTOR_MASK);
250 		bool const is_allow_mask = selector & SYSCALL_REJECTION_IS_ALLOW_MASK;
251 		int const mask_index = selector & SYSCALL_REJECTION_INDEX_MASK;
252 
253 		if (mask_index == SYSCALL_REJECTION_NULL) {
254 			// mask 0 is always empty (nothing to apply)
255 			continue;
256 		}
257 
258 		if (mask_index == SYSCALL_REJECTION_ALL) {
259 			// mask 1 is always full (overrides everything)
260 			memset(mask, is_allow_mask ? 0xff : 0x00, SR_MASK_SIZE);
261 			continue;
262 		}
263 
264 		syscall_rejection_mask_t mask_to_apply = syscall_rejection_masks[mask_index - predefined_masks];
265 
266 		if (mask_to_apply == NULL) {
267 			error = ENOENT;
268 			goto out_locked;
269 		}
270 
271 		_syscall_rejection_apply_mask(mask, mask_to_apply, is_allow_mask);
272 	}
273 
274 	/* Not RT-safe, but only necessary once. */
275 	if (ut->syscall_rejection_mask == NULL) {
276 		ut->syscall_rejection_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
277 
278 		if (ut->syscall_rejection_mask == NULL) {
279 			error = ENOMEM;
280 			goto out_locked;
281 		}
282 	}
283 
284 	memcpy(ut->syscall_rejection_mask, mask, SR_MASK_SIZE);
285 
286 	if ((args->flags & SYSCALL_REJECTION_FLAGS_ONCE)) {
287 		if (ut->syscall_rejection_once_mask == NULL) {
288 			ut->syscall_rejection_once_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
289 
290 			if (ut->syscall_rejection_once_mask == NULL) {
291 				kfree_data(ut->syscall_rejection_mask, SR_MASK_SIZE);
292 				ut->syscall_rejection_mask = NULL;
293 				error = ENOMEM;
294 				goto out_locked;
295 			}
296 
297 			memset(ut->syscall_rejection_once_mask, 0, SR_MASK_SIZE);
298 		} else {
299 			// prevent the already hit syscalls from hitting again.
300 			bitmap_or(ut->syscall_rejection_mask, ut->syscall_rejection_mask, ut->syscall_rejection_once_mask, mach_trap_count + nsysent);
301 		}
302 	}
303 
304 out_locked:
305 	lck_mtx_unlock(&syscall_rejection_mtx);
306 
307 	if (error == 0) {
308 		ut->syscall_rejection_flags = args->flags;
309 	}
310 
311 	if (error == ENOENT && debug_syscall_rejection_mode == SYSCALL_REJECTION_MODE_IGNORE) {
312 		/* Existing code may rely on the system call failing
313 		 * gracefully if syscall rejection is currently off. */
314 		error = 0;
315 	}
316 
317 	return error;
318 }
319 
320 /*
321  * debug_syscall_reject
322  *
323  * Compatibility interface to the old form of the system call.
324  */
325 int
debug_syscall_reject(struct proc * p,struct debug_syscall_reject_args * args,int * retval)326 debug_syscall_reject(struct proc *p, struct debug_syscall_reject_args *args, int *retval)
327 {
328 	struct debug_syscall_reject_config_args new_args;
329 
330 	bzero(&new_args, sizeof(new_args));
331 	new_args.packed_selectors1 = args->packed_selectors;
332 	// packed_selectors2 left empty
333 	new_args.flags = SYSCALL_REJECTION_FLAGS_DEFAULT;
334 
335 	return sys_debug_syscall_reject_config(p, &new_args, retval);
336 }
337 
338 
339 static bool
_syscall_rejection_add(syscall_rejection_mask_t dst,char const * name)340 _syscall_rejection_add(syscall_rejection_mask_t dst, char const *name)
341 {
342 	/*
343 	 * Yes, this function is O(n+m), making the whole act of setting a
344 	 * mask O(l*(n+m)), but defining masks is done rarely enough (and
345 	 * i, n and m small enough) for this to not matter.
346 	 */
347 
348 	for (int i = 0; i < mach_trap_count; i++) {
349 		if (strcmp(mach_syscall_name_table[i], name) == 0) {
350 			bitmap_set(dst, i);
351 			return true;
352 		}
353 	}
354 
355 	extern char const *syscallnames[];
356 
357 	for (int i = 0; i < nsysent; i++) {
358 		if (strcmp(syscallnames[i], name) == 0) {
359 			bitmap_set(dst, i + mach_trap_count);
360 			return true;
361 		}
362 	}
363 
364 	printf("%s: trying to add non-existing syscall/mach trap '%s'\n", __func__, name);
365 	return false;
366 }
367 
368 /* Pretty much arbitrary, we just don't want userspace to pass
369  * unreasonably large buffers to parse. */
370 static size_t const max_input_size = 16 * PAGE_MAX_SIZE;
371 
372 static int
_sysctl_debug_syscall_rejection_masks(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)373 _sysctl_debug_syscall_rejection_masks(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
374     struct sysctl_req *req)
375 {
376 	size_t const max_name_len = 128;
377 	char name[max_name_len];
378 
379 	if (req->newptr == 0) {
380 		return 0;
381 	}
382 
383 	if (req->newlen > max_input_size) {
384 		return E2BIG;
385 	}
386 
387 	size_t const len = req->newlen;
388 	char *buf = kalloc_data(len + 1, Z_WAITOK);
389 
390 	if (buf == NULL) {
391 		return ENOMEM;
392 	}
393 
394 	/*
395 	 * sysctl_io_string always copies out the given buffer as the
396 	 * "old" value if requested.  We could construct a text
397 	 * representation of existing masks, but this is not particularly
398 	 * interesting, so we just return the dummy string "<masks>".
399 	 */
400 	strlcpy(buf, "<masks>", len + 1);
401 	int changed = 0;
402 	int error = sysctl_io_string(req, buf, len + 1, 0, &changed);
403 
404 	if (error != 0 || !changed) {
405 		goto out;
406 	}
407 
408 	char const *p = buf;
409 
410 	int id = 0;
411 	int l = 0;
412 	int n = sscanf(p, "%i: %n", &id, &l);
413 
414 	if (n != 1 || id < predefined_masks || id > syscall_rejection_mask_count + predefined_masks) {
415 		printf("%s: invalid mask id %i (or conversion failed)\n", __FUNCTION__, id);
416 		error = EINVAL;
417 		goto out;
418 	}
419 
420 	p += l;
421 
422 	syscall_rejection_mask_t new_mask = kalloc_data(SR_MASK_SIZE,
423 	    Z_WAITOK | Z_ZERO);
424 	if (new_mask == NULL) {
425 		printf("%s: allocating new mask for id %i failed\n", __FUNCTION__, id);
426 		error = ENOMEM;
427 		goto out;
428 	}
429 
430 	error = 0;
431 
432 	while (p < buf + len && *p != 0) {
433 		name[0] = 0;
434 		n = sscanf(p, "%127s %n", name, &l);
435 		if (n != 1 || name[0] == 0) {
436 			error = EINVAL;
437 			kfree_data(new_mask, SR_MASK_SIZE);
438 			goto out;
439 		}
440 
441 		if (!_syscall_rejection_add(new_mask, name)) {
442 			error = ENOENT;
443 			kfree_data(new_mask, SR_MASK_SIZE);
444 			goto out;
445 		}
446 
447 		p += l;
448 	}
449 
450 
451 	syscall_rejection_mask_t to_free = NULL;
452 
453 	lck_mtx_lock(&syscall_rejection_mtx);
454 
455 	syscall_rejection_mask_t *target_mask = &syscall_rejection_masks[id - predefined_masks];
456 
457 	to_free = *target_mask;
458 	*target_mask = new_mask;
459 
460 	lck_mtx_unlock(&syscall_rejection_mtx);
461 
462 	kfree_data(to_free, SR_MASK_SIZE);
463 out:
464 
465 	kfree_data(buf, len + 1);
466 	return error;
467 }
468 
469 SYSCTL_PROC(_kern, OID_AUTO, syscall_rejection_masks, CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
470     0, 0, _sysctl_debug_syscall_rejection_masks, "A", "system call rejection masks");
471 
472 #else /* CONFIG_DEBUG_SYSCALL_REJECTION */
473 
474 #include <sys/kern_debug.h>
475 
476 int
sys_debug_syscall_reject_config(struct proc * __unused p,struct debug_syscall_reject_config_args * __unused args,int __unused * ret)477 sys_debug_syscall_reject_config(struct proc * __unused p, struct debug_syscall_reject_config_args * __unused args, int __unused *ret)
478 {
479 	/* not supported. */
480 	return ENOTSUP;
481 }
482 
483 int
debug_syscall_reject(struct proc * __unused p,struct debug_syscall_reject_args * __unused args,int * __unused retval)484 debug_syscall_reject(struct proc * __unused p, struct debug_syscall_reject_args * __unused args, int * __unused retval)
485 {
486 	/* not supported. */
487 	return ENOTSUP;
488 }
489 
490 void
reset_debug_syscall_rejection_mode(void)491 reset_debug_syscall_rejection_mode(void)
492 {
493 	/* not supported. */
494 }
495 
496 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
497 
498 #if __arm64__ && (DEBUG || DEVELOPMENT)
499 
500 static void
_spinfor(uint64_t nanoseconds)501 _spinfor(uint64_t nanoseconds)
502 {
503 	uint64_t mt = 0;
504 	nanoseconds_to_absolutetime(nanoseconds, &mt);
505 
506 	uint64_t start = mach_absolute_time();
507 
508 	while (mach_absolute_time() < start + mt) {
509 		// Spinning.
510 	}
511 }
512 
513 static int
_sysctl_debug_disable_interrupts_test(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)514 _sysctl_debug_disable_interrupts_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
515     struct sysctl_req *req)
516 {
517 	int error = 0;
518 
519 	if (req->newptr == 0) {
520 		goto out;
521 	}
522 
523 	uint64_t val = 0;
524 	error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
525 
526 	if (error != 0 || val == 0) {
527 		goto out;
528 	}
529 
530 	boolean_t istate = ml_set_interrupts_enabled(false);
531 	_spinfor(val);
532 	ml_set_interrupts_enabled(istate);
533 
534 out:
535 	return error;
536 }
537 
538 static int
_sysctl_debug_disable_preemption_test(struct sysctl_oid __unused * oidp,void * __unused arg1,int __unused arg2,struct sysctl_req * req)539 _sysctl_debug_disable_preemption_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
540     struct sysctl_req *req)
541 {
542 	int error = 0;
543 
544 	if (req->newptr == 0) {
545 		goto out;
546 	}
547 
548 	uint64_t val = 0;
549 	error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
550 
551 	if (error != 0 || val == 0) {
552 		goto out;
553 	}
554 
555 	disable_preemption();
556 	_spinfor(val);
557 	enable_preemption();
558 
559 out:
560 	return error;
561 }
562 
563 SYSCTL_PROC(_kern, OID_AUTO, debug_disable_interrupts_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
564     0, 0, _sysctl_debug_disable_interrupts_test, "Q", "disable interrupts for specified number of nanoseconds, for testing");
565 
566 SYSCTL_PROC(_kern, OID_AUTO, debug_disable_preemption_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
567     0, 0, _sysctl_debug_disable_preemption_test, "Q", "disable preemption for specified number of nanoseconds, for testing");
568 
569 #endif /* __arm64__ && (DEBUG || DEVELOPMENT) */
570