1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37
38 #include <sys/kernel.h>
39 #include <sys/vm.h>
40 #include <sys/proc_internal.h>
41 #include <sys/syscall.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/errno.h>
45 #include <sys/kdebug.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/kauth.h>
49 #include <sys/systm.h>
50 #include <sys/bitstring.h>
51
52 #include <security/audit/audit.h>
53
54 #include <i386/seg.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
57
58 #include <machine/pal_routines.h>
59
60 #if CONFIG_MACF
61 #include <security/mac_framework.h>
62 #endif
63
64 #if CONFIG_DTRACE
65 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
66 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
67 #endif
68
69 extern void unix_syscall(x86_saved_state_t *);
70 extern void unix_syscall64(x86_saved_state_t *);
71 extern void *find_user_regs(thread_t);
72
73 /* dynamically generated at build time based on syscalls.master */
74 extern const char *syscallnames[];
75
76 #define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
77 ((code) == SYS_kdebug_trace64) || \
78 ((code) == SYS_kdebug_trace_string))
79
80 /*
81 * Function: unix_syscall
82 *
83 * Inputs: regs - pointer to i386 save area
84 *
85 * Outputs: none
86 */
87 __attribute__((noreturn))
88 void
unix_syscall(x86_saved_state_t * state)89 unix_syscall(x86_saved_state_t *state)
90 {
91 thread_t thread;
92 void *vt;
93 unsigned int code, syscode;
94 const struct sysent *callp;
95
96 int error;
97 vm_offset_t params;
98 struct proc *p;
99 struct uthread *uthread;
100 x86_saved_state32_t *regs;
101 pid_t pid;
102
103 assert(is_saved_state32(state));
104 regs = saved_state32(state);
105 #if DEBUG
106 if (regs->eax == 0x800) {
107 thread_exception_return();
108 }
109 #endif
110 thread = current_thread();
111 uthread = get_bsdthread_info(thread);
112 p = current_proc();
113
114 uthread_reset_proc_refcount(uthread);
115
116 code = regs->eax & I386_SYSCALL_NUMBER_MASK;
117 syscode = (code < nsysent) ? code : SYS_invalid;
118 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
119 code, syscallnames[syscode], (uint32_t)regs->eip);
120 params = (vm_offset_t) (regs->uesp + sizeof(int));
121
122 regs->efl &= ~(EFL_CF);
123
124 callp = &sysent[syscode];
125
126 if (__improbable(callp == sysent)) {
127 code = fuword(params);
128 params += sizeof(int);
129 syscode = (code < nsysent) ? code : SYS_invalid;
130 callp = &sysent[syscode];
131 }
132
133 vt = (void *)uthread->uu_arg;
134
135 if (callp->sy_arg_bytes != 0) {
136 #if CONFIG_REQUIRES_U32_MUNGING
137 sy_munge_t *mungerp;
138 #else
139 #error U32 syscalls on x86_64 kernel requires munging
140 #endif
141 uint32_t nargs;
142
143 assert((unsigned) callp->sy_arg_bytes <= sizeof(uthread->uu_arg));
144 nargs = callp->sy_arg_bytes;
145 error = copyin((user_addr_t) params, (char *) vt, nargs);
146 if (error) {
147 regs->eax = error;
148 regs->efl |= EFL_CF;
149 thread_exception_return();
150 /* NOTREACHED */
151 }
152
153 if (__probable(!code_is_kdebug_trace(code))) {
154 uint32_t *uip = vt;
155 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
156 uip[0], uip[1], uip[2], uip[3]);
157 }
158
159 #if CONFIG_REQUIRES_U32_MUNGING
160 mungerp = callp->sy_arg_munge32;
161
162 if (mungerp != NULL) {
163 (*mungerp)(vt);
164 }
165 #endif
166 } else {
167 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START);
168 }
169
170 /*
171 * Delayed binding of thread credential to process credential, if we
172 * are not running with an explicitly set thread credential.
173 */
174 kauth_cred_thread_update(thread, p);
175
176 uthread->uu_rval[0] = 0;
177 uthread->uu_rval[1] = 0;
178 uthread->uu_flag |= UT_NOTCANCELPT;
179 uthread->syscall_code = code;
180 pid = proc_pid(p);
181
182 #ifdef CONFIG_IOCOUNT_TRACE
183 uthread->uu_iocount = 0;
184 uthread->uu_vpindex = 0;
185 #endif
186
187 #if CONFIG_MACF
188 if (__improbable(proc_syscall_filter_mask(p) != NULL && !bitstr_test(proc_syscall_filter_mask(p), syscode))) {
189 error = mac_proc_check_syscall_unix(p, syscode);
190 if (error) {
191 goto skip_syscall;
192 }
193 }
194 #endif /* CONFIG_MACF */
195
196 AUDIT_SYSCALL_ENTER(code, p, uthread);
197 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
198 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
199
200 #if CONFIG_MACF
201 skip_syscall:
202 #endif /* CONFIG_MACF */
203
204 #ifdef CONFIG_IOCOUNT_TRACE
205 if (uthread->uu_iocount) {
206 printf("system call returned with uu_iocount(%d) != 0\n",
207 uthread->uu_iocount);
208 }
209 #endif
210 #if CONFIG_DTRACE
211 uthread->t_dtrace_errno = error;
212 #endif /* CONFIG_DTRACE */
213
214 if (__improbable(error == ERESTART)) {
215 /*
216 * Move the user's pc back to repeat the syscall:
217 * 5 bytes for a sysenter, or 2 for an int 8x.
218 * The SYSENTER_TF_CS covers single-stepping over a sysenter
219 * - see debug trap handler in idt.s/idt64.s
220 */
221
222 pal_syscall_restart(thread, state);
223 } else if (error != EJUSTRETURN) {
224 if (__improbable(error)) {
225 regs->eax = error;
226 regs->efl |= EFL_CF; /* carry bit */
227 } else { /* (not error) */
228 /*
229 * We split retval across two registers, in case the
230 * syscall had a 64-bit return value, in which case
231 * eax/edx matches the function call ABI.
232 */
233 regs->eax = uthread->uu_rval[0];
234 regs->edx = uthread->uu_rval[1];
235 }
236 }
237
238 DEBUG_KPRINT_SYSCALL_UNIX(
239 "unix_syscall: error=%d retval=(%u,%u)\n",
240 error, regs->eax, regs->edx);
241
242 uthread->uu_flag &= ~UT_NOTCANCELPT;
243 uthread->syscall_code = 0;
244
245 #if DEBUG || DEVELOPMENT
246 kern_allocation_name_t
247 prior __assert_only = thread_set_allocation_name(NULL);
248 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
249 #endif /* DEBUG || DEVELOPMENT */
250
251 if (__improbable(uthread->uu_lowpri_window)) {
252 /*
253 * task is marked as a low priority I/O type
254 * and the I/O we issued while in this system call
255 * collided with normal I/O operations... we'll
256 * delay in order to mitigate the impact of this
257 * task on the normal operation of the system
258 */
259 throttle_lowpri_io(1);
260 }
261 if (__probable(!code_is_kdebug_trace(code))) {
262 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
263 error, uthread->uu_rval[0], uthread->uu_rval[1], pid);
264 }
265
266 if (__improbable(callp->sy_call == (sy_call_t *)execve && !error)) {
267 pal_execve_return(thread);
268 }
269
270 uthread_assert_zero_proc_refcount(uthread);
271 thread_exception_return();
272 /* NOTREACHED */
273 }
274
275 __attribute__((noreturn))
276 void
unix_syscall64(x86_saved_state_t * state)277 unix_syscall64(x86_saved_state_t *state)
278 {
279 thread_t thread;
280 void *vt;
281 unsigned int code, syscode;
282 const struct sysent *callp;
283 int args_in_regs;
284 boolean_t args_start_at_rdi;
285 int error;
286 struct proc *p;
287 struct uthread *uthread;
288 x86_saved_state64_t *regs;
289 pid_t pid;
290
291 assert(is_saved_state64(state));
292 regs = saved_state64(state);
293 #if DEBUG
294 if (regs->rax == 0x2000800) {
295 thread_exception_return();
296 }
297 #endif
298 thread = current_thread();
299 uthread = get_bsdthread_info(thread);
300 p = current_proc();
301
302 uthread_reset_proc_refcount(uthread);
303
304 /* Verify that we are not being called from a task without a proc */
305 if (__improbable(p == NULL)) {
306 regs->rax = EPERM;
307 regs->isf.rflags |= EFL_CF;
308 task_terminate_internal(current_task());
309 thread_exception_return();
310 /* NOTREACHED */
311 }
312
313 code = regs->rax & SYSCALL_NUMBER_MASK;
314 syscode = (code < nsysent) ? code : SYS_invalid;
315 DEBUG_KPRINT_SYSCALL_UNIX(
316 "unix_syscall64: code=%d(%s) rip=%llx\n",
317 code, syscallnames[syscode], regs->isf.rip);
318 callp = &sysent[syscode];
319
320 vt = (void *)uthread->uu_arg;
321
322 if (__improbable(callp == sysent)) {
323 /*
324 * indirect system call... system call number
325 * passed as 'arg0'
326 */
327 code = regs->rdi;
328 syscode = (code < nsysent) ? code : SYS_invalid;
329 callp = &sysent[syscode];
330 args_start_at_rdi = FALSE;
331 args_in_regs = 5;
332 } else {
333 args_start_at_rdi = TRUE;
334 args_in_regs = 6;
335 }
336
337 if (callp->sy_narg != 0) {
338 assert(callp->sy_narg <= 8); /* size of uu_arg */
339
340 args_in_regs = MIN(args_in_regs, callp->sy_narg);
341 memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t));
342
343 if (!code_is_kdebug_trace(code)) {
344 uint64_t *uip = vt;
345
346 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
347 uip[0], uip[1], uip[2], uip[3]);
348 }
349
350 if (__improbable(callp->sy_narg > args_in_regs)) {
351 int copyin_count;
352
353 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);
354
355 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
356 if (error) {
357 regs->rax = error;
358 regs->isf.rflags |= EFL_CF;
359 thread_exception_return();
360 /* NOTREACHED */
361 }
362 }
363 } else {
364 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START);
365 }
366
367 /*
368 * Delayed binding of thread credential to process credential, if we
369 * are not running with an explicitly set thread credential.
370 */
371 kauth_cred_thread_update(thread, p);
372
373 uthread->uu_rval[0] = 0;
374 uthread->uu_rval[1] = 0;
375 uthread->uu_flag |= UT_NOTCANCELPT;
376 uthread->syscall_code = code;
377 pid = proc_pid(p);
378
379 #ifdef CONFIG_IOCOUNT_TRACE
380 uthread->uu_iocount = 0;
381 uthread->uu_vpindex = 0;
382 #endif
383
384 #if CONFIG_MACF
385 if (__improbable(proc_syscall_filter_mask(p) != NULL && !bitstr_test(proc_syscall_filter_mask(p), syscode))) {
386 error = mac_proc_check_syscall_unix(p, syscode);
387 if (error) {
388 goto skip_syscall;
389 }
390 }
391 #endif /* CONFIG_MACF */
392
393 AUDIT_SYSCALL_ENTER(code, p, uthread);
394 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
395 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
396
397 #if CONFIG_MACF
398 skip_syscall:
399 #endif /* CONFIG_MACF */
400
401 #ifdef CONFIG_IOCOUNT_TRACE
402 if (uthread->uu_iocount) {
403 printf("system call returned with uu_iocount(%d) != 0\n",
404 uthread->uu_iocount);
405 }
406 #endif
407
408 #if CONFIG_DTRACE
409 uthread->t_dtrace_errno = error;
410 #endif /* CONFIG_DTRACE */
411
412 if (__improbable(error == ERESTART)) {
413 /*
414 * all system calls come through via the syscall instruction
415 * in 64 bit mode... its 2 bytes in length
416 * move the user's pc back to repeat the syscall:
417 */
418 pal_syscall_restart( thread, state );
419 } else if (error != EJUSTRETURN) {
420 if (__improbable(error)) {
421 regs->rax = error;
422 regs->isf.rflags |= EFL_CF; /* carry bit */
423 } else { /* (not error) */
424 switch (callp->sy_return_type) {
425 case _SYSCALL_RET_INT_T:
426 regs->rax = uthread->uu_rval[0];
427 regs->rdx = uthread->uu_rval[1];
428 break;
429 case _SYSCALL_RET_UINT_T:
430 regs->rax = ((u_int)uthread->uu_rval[0]);
431 regs->rdx = ((u_int)uthread->uu_rval[1]);
432 break;
433 case _SYSCALL_RET_OFF_T:
434 case _SYSCALL_RET_ADDR_T:
435 case _SYSCALL_RET_SIZE_T:
436 case _SYSCALL_RET_SSIZE_T:
437 case _SYSCALL_RET_UINT64_T:
438 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
439 regs->rdx = 0;
440 break;
441 case _SYSCALL_RET_NONE:
442 break;
443 default:
444 panic("unix_syscall: unknown return type");
445 break;
446 }
447 regs->isf.rflags &= ~EFL_CF;
448 }
449 }
450
451 DEBUG_KPRINT_SYSCALL_UNIX(
452 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
453 error, regs->rax, regs->rdx);
454
455 uthread->uu_flag &= ~UT_NOTCANCELPT;
456 uthread->syscall_code = 0;
457
458 #if DEBUG || DEVELOPMENT
459 kern_allocation_name_t
460 prior __assert_only = thread_set_allocation_name(NULL);
461 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
462 #endif /* DEBUG || DEVELOPMENT */
463
464 if (__improbable(uthread->uu_lowpri_window)) {
465 /*
466 * task is marked as a low priority I/O type
467 * and the I/O we issued while in this system call
468 * collided with normal I/O operations... we'll
469 * delay in order to mitigate the impact of this
470 * task on the normal operation of the system
471 */
472 throttle_lowpri_io(1);
473 }
474 if (__probable(!code_is_kdebug_trace(code))) {
475 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
476 error, uthread->uu_rval[0], uthread->uu_rval[1], pid);
477 }
478
479 uthread_assert_zero_proc_refcount(uthread);
480 thread_exception_return();
481 /* NOTREACHED */
482 }
483
484
485 void
unix_syscall_return(int error)486 unix_syscall_return(int error)
487 {
488 thread_t thread;
489 struct uthread *uthread;
490 struct proc *p;
491 unsigned int code;
492 const struct sysent *callp;
493
494 thread = current_thread();
495 uthread = get_bsdthread_info(thread);
496
497 pal_register_cache_state(thread, DIRTY);
498
499 p = current_proc();
500
501 if (proc_is64bit(p)) {
502 x86_saved_state64_t *regs;
503
504 regs = saved_state64(find_user_regs(thread));
505
506 code = uthread->syscall_code;
507 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
508
509 #if CONFIG_DTRACE
510 if (callp->sy_call == dtrace_systrace_syscall) {
511 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
512 }
513 #endif /* CONFIG_DTRACE */
514 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
515
516 if (error == ERESTART) {
517 /*
518 * repeat the syscall
519 */
520 pal_syscall_restart( thread, find_user_regs(thread));
521 } else if (error != EJUSTRETURN) {
522 if (error) {
523 regs->rax = error;
524 regs->isf.rflags |= EFL_CF; /* carry bit */
525 } else { /* (not error) */
526 switch (callp->sy_return_type) {
527 case _SYSCALL_RET_INT_T:
528 regs->rax = uthread->uu_rval[0];
529 regs->rdx = uthread->uu_rval[1];
530 break;
531 case _SYSCALL_RET_UINT_T:
532 regs->rax = ((u_int)uthread->uu_rval[0]);
533 regs->rdx = ((u_int)uthread->uu_rval[1]);
534 break;
535 case _SYSCALL_RET_OFF_T:
536 case _SYSCALL_RET_ADDR_T:
537 case _SYSCALL_RET_SIZE_T:
538 case _SYSCALL_RET_SSIZE_T:
539 case _SYSCALL_RET_UINT64_T:
540 regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
541 regs->rdx = 0;
542 break;
543 case _SYSCALL_RET_NONE:
544 break;
545 default:
546 panic("unix_syscall: unknown return type");
547 break;
548 }
549 regs->isf.rflags &= ~EFL_CF;
550 }
551 }
552 DEBUG_KPRINT_SYSCALL_UNIX(
553 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
554 error, regs->rax, regs->rdx);
555 } else {
556 x86_saved_state32_t *regs;
557
558 regs = saved_state32(find_user_regs(thread));
559
560 regs->efl &= ~(EFL_CF);
561
562 code = uthread->syscall_code;
563 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
564
565 #if CONFIG_DTRACE
566 if (callp->sy_call == dtrace_systrace_syscall) {
567 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
568 }
569 #endif /* CONFIG_DTRACE */
570 AUDIT_SYSCALL_EXIT(code, p, uthread, error);
571
572 if (error == ERESTART) {
573 pal_syscall_restart( thread, find_user_regs(thread));
574 } else if (error != EJUSTRETURN) {
575 if (error) {
576 regs->eax = error;
577 regs->efl |= EFL_CF; /* carry bit */
578 } else { /* (not error) */
579 regs->eax = uthread->uu_rval[0];
580 regs->edx = uthread->uu_rval[1];
581 }
582 }
583 DEBUG_KPRINT_SYSCALL_UNIX(
584 "unix_syscall_return: error=%d retval=(%u,%u)\n",
585 error, regs->eax, regs->edx);
586 }
587
588
589 uthread->uu_flag &= ~UT_NOTCANCELPT;
590 uthread->syscall_code = 0;
591
592 #if DEBUG || DEVELOPMENT
593 kern_allocation_name_t
594 prior __assert_only = thread_set_allocation_name(NULL);
595 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
596 #endif /* DEBUG || DEVELOPMENT */
597
598 if (uthread->uu_lowpri_window) {
599 /*
600 * task is marked as a low priority I/O type
601 * and the I/O we issued while in this system call
602 * collided with normal I/O operations... we'll
603 * delay in order to mitigate the impact of this
604 * task on the normal operation of the system
605 */
606 throttle_lowpri_io(1);
607 }
608 if (!code_is_kdebug_trace(code)) {
609 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
610 error, uthread->uu_rval[0], uthread->uu_rval[1], proc_getpid(p));
611 }
612
613 thread_exception_return();
614 /* NOTREACHED */
615 }
616