1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/ioctl.h>
79 #include <sys/file_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/socketvar.h>
82 #include <sys/uio_internal.h>
83 #include <sys/kernel.h>
84 #include <sys/guarded.h>
85 #include <sys/stat.h>
86 #include <sys/malloc.h>
87 #include <sys/sysproto.h>
88
89 #include <sys/mount_internal.h>
90 #include <sys/protosw.h>
91 #include <sys/ev.h>
92 #include <sys/user.h>
93 #include <sys/kdebug.h>
94 #include <sys/poll.h>
95 #include <sys/event.h>
96 #include <sys/eventvar.h>
97 #include <sys/proc.h>
98 #include <sys/kauth.h>
99
100 #include <machine/smp.h>
101 #include <mach/mach_types.h>
102 #include <kern/kern_types.h>
103 #include <kern/assert.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread.h>
106 #include <kern/clock.h>
107 #include <kern/ledger.h>
108 #include <kern/task.h>
109 #include <kern/telemetry.h>
110 #include <kern/waitq.h>
111 #include <kern/sched_hygiene.h>
112 #include <kern/sched_prim.h>
113 #include <kern/mpsc_queue.h>
114 #include <kern/debug.h>
115
116 #include <sys/mbuf.h>
117 #include <sys/domain.h>
118 #include <sys/socket.h>
119 #include <sys/socketvar.h>
120 #include <sys/errno.h>
121 #include <sys/syscall.h>
122 #include <sys/pipe.h>
123
124 #include <security/audit/audit.h>
125
126 #include <net/if.h>
127 #include <net/route.h>
128
129 #include <netinet/in.h>
130 #include <netinet/in_systm.h>
131 #include <netinet/ip.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/ip_var.h>
134 #include <netinet/ip6.h>
135 #include <netinet/tcp.h>
136 #include <netinet/tcp_fsm.h>
137 #include <netinet/tcp_seq.h>
138 #include <netinet/tcp_timer.h>
139 #include <netinet/tcp_var.h>
140 #include <netinet/tcpip.h>
141 #include <netinet/tcp_debug.h>
142 /* for wait queue based select */
143 #include <kern/waitq.h>
144 #include <sys/vnode_internal.h>
145 /* for remote time api*/
146 #include <kern/remote_time.h>
147 #include <os/log.h>
148 #include <sys/log_data.h>
149
150 #if CONFIG_MACF
151 #include <security/mac_framework.h>
152 #endif
153
154 #ifdef CONFIG_KDP_INTERACTIVE_DEBUGGING
155 #include <mach_debug/mach_debug_types.h>
156 #endif
157
158 /* for entitlement check */
159 #include <IOKit/IOBSD.h>
160 /*
161 * If you need accounting for KM_SELECT consider using
162 * KALLOC_HEAP_DEFINE to define a view.
163 */
164 #define KM_SELECT KHEAP_DEFAULT
165
166 /* XXX should be in a header file somewhere */
167 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
168
169 int rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval);
170 int wr_uio(struct proc *p, int fdes, uio_t uio, int is_pwritev, user_ssize_t *retval);
171 int do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval);
172
173 __private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
174 user_addr_t bufp, user_size_t nbyte,
175 off_t offset, int flags, user_ssize_t *retval);
176 __private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
177 user_addr_t bufp, user_size_t nbyte,
178 off_t offset, int flags, user_ssize_t *retval);
179 static int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
180
181 /* Conflict wait queue for when selects collide (opaque type) */
182 struct waitq select_conflict_queue;
183
184 /*
185 * Init routine called from bsd_init.c
186 */
187 void select_waitq_init(void);
188 void
select_waitq_init(void)189 select_waitq_init(void)
190 {
191 waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO);
192 }
193
194 #define f_flag fp_glob->fg_flag
195 #define f_type fp_glob->fg_ops->fo_type
196 #define f_cred fp_glob->fg_cred
197 #define f_ops fp_glob->fg_ops
198
199 /*
200 * Validate if the file can be used for random access (pread, pwrite, etc).
201 *
202 * Conditions:
203 * proc_fdlock is held
204 *
205 * Returns: 0 Success
206 * ESPIPE
207 * ENXIO
208 */
209 static int
valid_for_random_access(struct fileproc * fp)210 valid_for_random_access(struct fileproc *fp)
211 {
212 if (__improbable(fp->f_type != DTYPE_VNODE)) {
213 return ESPIPE;
214 }
215
216 vnode_t vp = (struct vnode *)fp_get_data(fp);
217 if (__improbable(vnode_isfifo(vp))) {
218 return ESPIPE;
219 }
220
221 if (__improbable(vp->v_flag & VISTTY)) {
222 return ENXIO;
223 }
224
225 return 0;
226 }
227
228 /*
229 * Read system call.
230 *
231 * Returns: 0 Success
232 * preparefileread:EBADF
233 * preparefileread:ESPIPE
234 * preparefileread:ENXIO
235 * preparefileread:EBADF
236 * dofileread:???
237 */
238 int
read(struct proc * p,struct read_args * uap,user_ssize_t * retval)239 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
240 {
241 __pthread_testcancel(1);
242 return read_nocancel(p, (struct read_nocancel_args *)uap, retval);
243 }
244
245 int
read_nocancel(struct proc * p,struct read_nocancel_args * uap,user_ssize_t * retval)246 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
247 {
248 struct fileproc *fp;
249 int error;
250 int fd = uap->fd;
251 struct vfs_context context;
252
253 if ((error = preparefileread(p, &fp, fd, 0))) {
254 return error;
255 }
256
257 context = *(vfs_context_current());
258 context.vc_ucred = fp->fp_glob->fg_cred;
259
260 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
261 (off_t)-1, 0, retval);
262
263 fp_drop(p, fd, fp, 0);
264
265 return error;
266 }
267
268 /*
269 * Pread system call
270 *
271 * Returns: 0 Success
272 * preparefileread:EBADF
273 * preparefileread:ESPIPE
274 * preparefileread:ENXIO
275 * preparefileread:EBADF
276 * dofileread:???
277 */
278 int
pread(struct proc * p,struct pread_args * uap,user_ssize_t * retval)279 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
280 {
281 __pthread_testcancel(1);
282 return pread_nocancel(p, (struct pread_nocancel_args *)uap, retval);
283 }
284
285 int
pread_nocancel(struct proc * p,struct pread_nocancel_args * uap,user_ssize_t * retval)286 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
287 {
288 struct fileproc *fp = NULL; /* fp set by preparefileread() */
289 int fd = uap->fd;
290 int error;
291 struct vfs_context context;
292
293 if ((error = preparefileread(p, &fp, fd, 1))) {
294 goto out;
295 }
296
297 context = *(vfs_context_current());
298 context.vc_ucred = fp->fp_glob->fg_cred;
299
300 error = dofileread(&context, fp, uap->buf, uap->nbyte,
301 uap->offset, FOF_OFFSET, retval);
302
303 fp_drop(p, fd, fp, 0);
304
305 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
306 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
307
308 out:
309 return error;
310 }
311
312 /*
313 * Code common for read and pread
314 */
315
316 /*
317 * Returns: 0 Success
318 * EBADF
319 * ESPIPE
320 * ENXIO
321 * fp_lookup:EBADF
322 * valid_for_random_access:ESPIPE
323 * valid_for_random_access:ENXIO
324 */
325 static int
preparefileread(struct proc * p,struct fileproc ** fp_ret,int fd,int check_for_pread)326 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
327 {
328 int error;
329 struct fileproc *fp;
330
331 AUDIT_ARG(fd, fd);
332
333 proc_fdlock_spin(p);
334
335 error = fp_lookup(p, fd, &fp, 1);
336
337 if (error) {
338 proc_fdunlock(p);
339 return error;
340 }
341 if ((fp->f_flag & FREAD) == 0) {
342 error = EBADF;
343 goto out;
344 }
345 if (check_for_pread) {
346 if ((error = valid_for_random_access(fp))) {
347 goto out;
348 }
349 }
350
351 *fp_ret = fp;
352
353 proc_fdunlock(p);
354 return 0;
355
356 out:
357 fp_drop(p, fd, fp, 1);
358 proc_fdunlock(p);
359 return error;
360 }
361
362
363 /*
364 * Returns: 0 Success
365 * EINVAL
366 * fo_read:???
367 */
368 __private_extern__ int
dofileread(vfs_context_t ctx,struct fileproc * fp,user_addr_t bufp,user_size_t nbyte,off_t offset,int flags,user_ssize_t * retval)369 dofileread(vfs_context_t ctx, struct fileproc *fp,
370 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
371 user_ssize_t *retval)
372 {
373 uio_t auio;
374 user_ssize_t bytecnt;
375 int error = 0;
376 uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
377
378 if (nbyte > INT_MAX) {
379 return EINVAL;
380 }
381
382 if (vfs_context_is64bit(ctx)) {
383 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
384 &uio_buf[0], sizeof(uio_buf));
385 } else {
386 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
387 &uio_buf[0], sizeof(uio_buf));
388 }
389 if (uio_addiov(auio, bufp, nbyte) != 0) {
390 *retval = 0;
391 return EINVAL;
392 }
393
394 bytecnt = nbyte;
395
396 if ((error = fo_read(fp, auio, flags, ctx))) {
397 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
398 error == EINTR || error == EWOULDBLOCK)) {
399 error = 0;
400 }
401 }
402 bytecnt -= uio_resid(auio);
403
404 *retval = bytecnt;
405
406 return error;
407 }
408
409 /*
410 * Vector read.
411 *
412 * Returns: 0 Success
413 * EINVAL
414 * ENOMEM
415 * preparefileread:EBADF
416 * preparefileread:ESPIPE
417 * preparefileread:ENXIO
418 * preparefileread:EBADF
419 * copyin:EFAULT
420 * rd_uio:???
421 */
422 static int
readv_preadv_uio(struct proc * p,int fdes,user_addr_t user_iovp,int iovcnt,off_t offset,int is_preadv,user_ssize_t * retval)423 readv_preadv_uio(struct proc *p, int fdes,
424 user_addr_t user_iovp, int iovcnt, off_t offset, int is_preadv,
425 user_ssize_t *retval)
426 {
427 uio_t auio = NULL;
428 int error;
429 struct user_iovec *iovp;
430
431 /* Verify range before calling uio_create() */
432 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV) {
433 return EINVAL;
434 }
435
436 /* allocate a uio large enough to hold the number of iovecs passed */
437 auio = uio_create(iovcnt, offset,
438 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
439 UIO_READ);
440
441 /* get location of iovecs within the uio. then copyin the iovecs from
442 * user space.
443 */
444 iovp = uio_iovsaddr(auio);
445 if (iovp == NULL) {
446 error = ENOMEM;
447 goto ExitThisRoutine;
448 }
449 error = copyin_user_iovec_array(user_iovp,
450 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
451 iovcnt, iovp);
452 if (error) {
453 goto ExitThisRoutine;
454 }
455
456 /* finalize uio_t for use and do the IO
457 */
458 error = uio_calculateresid(auio);
459 if (error) {
460 goto ExitThisRoutine;
461 }
462 error = rd_uio(p, fdes, auio, is_preadv, retval);
463
464 ExitThisRoutine:
465 if (auio != NULL) {
466 uio_free(auio);
467 }
468 return error;
469 }
470
471 /*
472 * Scatter read system call.
473 */
474 int
readv(struct proc * p,struct readv_args * uap,user_ssize_t * retval)475 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
476 {
477 __pthread_testcancel(1);
478 return readv_nocancel(p, (struct readv_nocancel_args *)uap, retval);
479 }
480
481 int
readv_nocancel(struct proc * p,struct readv_nocancel_args * uap,user_ssize_t * retval)482 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
483 {
484 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
485 }
486
487 /*
488 * Preadv system call
489 */
490 int
sys_preadv(struct proc * p,struct preadv_args * uap,user_ssize_t * retval)491 sys_preadv(struct proc *p, struct preadv_args *uap, user_ssize_t *retval)
492 {
493 __pthread_testcancel(1);
494 return sys_preadv_nocancel(p, (struct preadv_nocancel_args *)uap, retval);
495 }
496
497 int
sys_preadv_nocancel(struct proc * p,struct preadv_nocancel_args * uap,user_ssize_t * retval)498 sys_preadv_nocancel(struct proc *p, struct preadv_nocancel_args *uap, user_ssize_t *retval)
499 {
500 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
501 }
502
503 /*
504 * Write system call
505 *
506 * Returns: 0 Success
507 * EBADF
508 * fp_lookup:EBADF
509 * dofilewrite:???
510 */
511 int
write(struct proc * p,struct write_args * uap,user_ssize_t * retval)512 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
513 {
514 __pthread_testcancel(1);
515 return write_nocancel(p, (struct write_nocancel_args *)uap, retval);
516 }
517
518 int
write_nocancel(struct proc * p,struct write_nocancel_args * uap,user_ssize_t * retval)519 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
520 {
521 struct fileproc *fp;
522 int error;
523 int fd = uap->fd;
524
525 AUDIT_ARG(fd, fd);
526
527 error = fp_lookup(p, fd, &fp, 0);
528 if (error) {
529 return error;
530 }
531 if ((fp->f_flag & FWRITE) == 0) {
532 error = EBADF;
533 } else if (fp_isguarded(fp, GUARD_WRITE)) {
534 proc_fdlock(p);
535 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
536 proc_fdunlock(p);
537 } else {
538 struct vfs_context context = *(vfs_context_current());
539 context.vc_ucred = fp->fp_glob->fg_cred;
540
541 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
542 (off_t)-1, 0, retval);
543 }
544 fp_drop(p, fd, fp, 0);
545 return error;
546 }
547
548 /*
549 * pwrite system call
550 *
551 * Returns: 0 Success
552 * EBADF
553 * ESPIPE
554 * ENXIO
555 * EINVAL
556 * fp_lookup:EBADF
557 * dofilewrite:???
558 */
559 int
pwrite(struct proc * p,struct pwrite_args * uap,user_ssize_t * retval)560 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
561 {
562 __pthread_testcancel(1);
563 return pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval);
564 }
565
566 int
pwrite_nocancel(struct proc * p,struct pwrite_nocancel_args * uap,user_ssize_t * retval)567 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
568 {
569 struct fileproc *fp;
570 int error;
571 int fd = uap->fd;
572 vnode_t vp = (vnode_t)0;
573
574 AUDIT_ARG(fd, fd);
575
576 error = fp_get_ftype(p, fd, DTYPE_VNODE, ESPIPE, &fp);
577 if (error) {
578 return error;
579 }
580
581 if ((fp->f_flag & FWRITE) == 0) {
582 error = EBADF;
583 } else if (fp_isguarded(fp, GUARD_WRITE)) {
584 proc_fdlock(p);
585 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
586 proc_fdunlock(p);
587 } else {
588 struct vfs_context context = *vfs_context_current();
589 context.vc_ucred = fp->fp_glob->fg_cred;
590
591 vp = (vnode_t)fp_get_data(fp);
592 if (vnode_isfifo(vp)) {
593 error = ESPIPE;
594 goto errout;
595 }
596 if ((vp->v_flag & VISTTY)) {
597 error = ENXIO;
598 goto errout;
599 }
600 if (uap->offset == (off_t)-1) {
601 error = EINVAL;
602 goto errout;
603 }
604
605 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
606 uap->offset, FOF_OFFSET, retval);
607 }
608 errout:
609 fp_drop(p, fd, fp, 0);
610
611 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
612 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
613
614 return error;
615 }
616
617 /*
618 * Returns: 0 Success
619 * EINVAL
620 * <fo_write>:EPIPE
621 * <fo_write>:??? [indirect through struct fileops]
622 */
623 __private_extern__ int
dofilewrite(vfs_context_t ctx,struct fileproc * fp,user_addr_t bufp,user_size_t nbyte,off_t offset,int flags,user_ssize_t * retval)624 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
625 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
626 user_ssize_t *retval)
627 {
628 uio_t auio;
629 int error = 0;
630 user_ssize_t bytecnt;
631 uio_stackbuf_t uio_buf[UIO_SIZEOF(1)];
632
633 if (nbyte > INT_MAX) {
634 *retval = 0;
635 return EINVAL;
636 }
637
638 if (vfs_context_is64bit(ctx)) {
639 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
640 &uio_buf[0], sizeof(uio_buf));
641 } else {
642 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
643 &uio_buf[0], sizeof(uio_buf));
644 }
645 if (uio_addiov(auio, bufp, nbyte) != 0) {
646 *retval = 0;
647 return EINVAL;
648 }
649
650 bytecnt = nbyte;
651 if ((error = fo_write(fp, auio, flags, ctx))) {
652 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
653 error == EINTR || error == EWOULDBLOCK)) {
654 error = 0;
655 }
656 /* The socket layer handles SIGPIPE */
657 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
658 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
659 /* XXX Raise the signal on the thread? */
660 psignal(vfs_context_proc(ctx), SIGPIPE);
661 }
662 }
663 bytecnt -= uio_resid(auio);
664 if (bytecnt) {
665 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
666 }
667 *retval = bytecnt;
668
669 return error;
670 }
671
672 /*
673 * Returns: 0 Success
674 * EBADF
675 * ESPIPE
676 * ENXIO
677 * fp_lookup:EBADF
678 * fp_guard_exception:???
679 * valid_for_random_access:ESPIPE
680 * valid_for_random_access:ENXIO
681 */
682 static int
preparefilewrite(struct proc * p,struct fileproc ** fp_ret,int fd,int check_for_pwrite)683 preparefilewrite(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pwrite)
684 {
685 int error;
686 struct fileproc *fp;
687
688 AUDIT_ARG(fd, fd);
689
690 proc_fdlock_spin(p);
691
692 error = fp_lookup(p, fd, &fp, 1);
693
694 if (error) {
695 proc_fdunlock(p);
696 return error;
697 }
698 if ((fp->f_flag & FWRITE) == 0) {
699 error = EBADF;
700 goto ExitThisRoutine;
701 }
702 if (fp_isguarded(fp, GUARD_WRITE)) {
703 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
704 goto ExitThisRoutine;
705 }
706 if (check_for_pwrite) {
707 if ((error = valid_for_random_access(fp))) {
708 goto ExitThisRoutine;
709 }
710 }
711
712 *fp_ret = fp;
713
714 proc_fdunlock(p);
715 return 0;
716
717 ExitThisRoutine:
718 fp_drop(p, fd, fp, 1);
719 proc_fdunlock(p);
720 return error;
721 }
722
723 static int
writev_prwritev_uio(struct proc * p,int fd,user_addr_t user_iovp,int iovcnt,off_t offset,int is_pwritev,user_ssize_t * retval)724 writev_prwritev_uio(struct proc *p, int fd,
725 user_addr_t user_iovp, int iovcnt, off_t offset, int is_pwritev,
726 user_ssize_t *retval)
727 {
728 uio_t auio = NULL;
729 int error;
730 struct user_iovec *iovp;
731
732 /* Verify range before calling uio_create() */
733 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV || offset < 0) {
734 return EINVAL;
735 }
736
737 /* allocate a uio large enough to hold the number of iovecs passed */
738 auio = uio_create(iovcnt, offset,
739 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
740 UIO_WRITE);
741
742 /* get location of iovecs within the uio. then copyin the iovecs from
743 * user space.
744 */
745 iovp = uio_iovsaddr(auio);
746 if (iovp == NULL) {
747 error = ENOMEM;
748 goto ExitThisRoutine;
749 }
750 error = copyin_user_iovec_array(user_iovp,
751 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
752 iovcnt, iovp);
753 if (error) {
754 goto ExitThisRoutine;
755 }
756
757 /* finalize uio_t for use and do the IO
758 */
759 error = uio_calculateresid(auio);
760 if (error) {
761 goto ExitThisRoutine;
762 }
763
764 error = wr_uio(p, fd, auio, is_pwritev, retval);
765
766 ExitThisRoutine:
767 if (auio != NULL) {
768 uio_free(auio);
769 }
770 return error;
771 }
772
773 /*
774 * Gather write system call
775 */
776 int
writev(struct proc * p,struct writev_args * uap,user_ssize_t * retval)777 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
778 {
779 __pthread_testcancel(1);
780 return writev_nocancel(p, (struct writev_nocancel_args *)uap, retval);
781 }
782
783 int
writev_nocancel(struct proc * p,struct writev_nocancel_args * uap,user_ssize_t * retval)784 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
785 {
786 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
787 }
788
789 /*
790 * Pwritev system call
791 */
792 int
sys_pwritev(struct proc * p,struct pwritev_args * uap,user_ssize_t * retval)793 sys_pwritev(struct proc *p, struct pwritev_args *uap, user_ssize_t *retval)
794 {
795 __pthread_testcancel(1);
796 return sys_pwritev_nocancel(p, (struct pwritev_nocancel_args *)uap, retval);
797 }
798
799 int
sys_pwritev_nocancel(struct proc * p,struct pwritev_nocancel_args * uap,user_ssize_t * retval)800 sys_pwritev_nocancel(struct proc *p, struct pwritev_nocancel_args *uap, user_ssize_t *retval)
801 {
802 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
803 }
804
805 /*
806 * Returns: 0 Success
807 * preparefileread:EBADF
808 * preparefileread:ESPIPE
809 * preparefileread:ENXIO
810 * preparefileread:???
811 * fo_write:???
812 */
813 int
wr_uio(struct proc * p,int fd,uio_t uio,int is_pwritev,user_ssize_t * retval)814 wr_uio(struct proc *p, int fd, uio_t uio, int is_pwritev, user_ssize_t *retval)
815 {
816 struct fileproc *fp;
817 int error;
818 int flags;
819
820 if ((error = preparefilewrite(p, &fp, fd, is_pwritev))) {
821 return error;
822 }
823
824 flags = is_pwritev ? FOF_OFFSET : 0;
825 error = do_uiowrite(p, fp, uio, flags, retval);
826
827 fp_drop(p, fd, fp, 0);
828
829 return error;
830 }
831
832 int
do_uiowrite(struct proc * p,struct fileproc * fp,uio_t uio,int flags,user_ssize_t * retval)833 do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval)
834 {
835 int error;
836 user_ssize_t count;
837 struct vfs_context context = *vfs_context_current();
838
839 count = uio_resid(uio);
840
841 context.vc_ucred = fp->f_cred;
842 error = fo_write(fp, uio, flags, &context);
843 if (error) {
844 if (uio_resid(uio) != count && (error == ERESTART ||
845 error == EINTR || error == EWOULDBLOCK)) {
846 error = 0;
847 }
848 /* The socket layer handles SIGPIPE */
849 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
850 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
851 psignal(p, SIGPIPE);
852 }
853 }
854 count -= uio_resid(uio);
855 if (count) {
856 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
857 }
858 *retval = count;
859
860 return error;
861 }
862
863 /*
864 * Returns: 0 Success
865 * preparefileread:EBADF
866 * preparefileread:ESPIPE
867 * preparefileread:ENXIO
868 * fo_read:???
869 */
870 int
rd_uio(struct proc * p,int fdes,uio_t uio,int is_preadv,user_ssize_t * retval)871 rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval)
872 {
873 struct fileproc *fp;
874 int error;
875 user_ssize_t count;
876 struct vfs_context context = *vfs_context_current();
877
878 if ((error = preparefileread(p, &fp, fdes, is_preadv))) {
879 return error;
880 }
881
882 count = uio_resid(uio);
883
884 context.vc_ucred = fp->f_cred;
885
886 int flags = is_preadv ? FOF_OFFSET : 0;
887 error = fo_read(fp, uio, flags, &context);
888
889 if (error) {
890 if (uio_resid(uio) != count && (error == ERESTART ||
891 error == EINTR || error == EWOULDBLOCK)) {
892 error = 0;
893 }
894 }
895 *retval = count - uio_resid(uio);
896
897 fp_drop(p, fdes, fp, 0);
898
899 return error;
900 }
901
902 /*
903 * Ioctl system call
904 *
905 * Returns: 0 Success
906 * EBADF
907 * ENOTTY
908 * ENOMEM
909 * ESRCH
910 * copyin:EFAULT
911 * copyoutEFAULT
912 * fp_lookup:EBADF Bad file descriptor
913 * fo_ioctl:???
914 */
915 int
ioctl(struct proc * p,struct ioctl_args * uap,__unused int32_t * retval)916 ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
917 {
918 struct fileproc *fp = NULL;
919 int error = 0;
920 u_int size = 0;
921 caddr_t datap = NULL, memp = NULL;
922 boolean_t is64bit = FALSE;
923 int tmp = 0;
924 #define STK_PARAMS 128
925 char stkbuf[STK_PARAMS] = {};
926 int fd = uap->fd;
927 u_long com = uap->com;
928 struct vfs_context context = *vfs_context_current();
929
930 AUDIT_ARG(fd, uap->fd);
931 AUDIT_ARG(addr, uap->data);
932
933 is64bit = proc_is64bit(p);
934 #if CONFIG_AUDIT
935 if (is64bit) {
936 AUDIT_ARG(value64, com);
937 } else {
938 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
939 }
940 #endif /* CONFIG_AUDIT */
941
942 /*
943 * Interpret high order word to find amount of data to be
944 * copied to/from the user's address space.
945 */
946 size = IOCPARM_LEN(com);
947 if (size > IOCPARM_MAX) {
948 return ENOTTY;
949 }
950 if (size > sizeof(stkbuf)) {
951 memp = (caddr_t)kalloc_data(size, Z_WAITOK);
952 if (memp == 0) {
953 return ENOMEM;
954 }
955 datap = memp;
956 } else {
957 datap = &stkbuf[0];
958 }
959 if (com & IOC_IN) {
960 if (size) {
961 error = copyin(uap->data, datap, size);
962 if (error) {
963 goto out_nofp;
964 }
965 } else {
966 /* XXX - IOC_IN and no size? we should proably return an error here!! */
967 if (is64bit) {
968 *(user_addr_t *)datap = uap->data;
969 } else {
970 *(uint32_t *)datap = (uint32_t)uap->data;
971 }
972 }
973 } else if ((com & IOC_OUT) && size) {
974 /*
975 * Zero the buffer so the user always
976 * gets back something deterministic.
977 */
978 bzero(datap, size);
979 } else if (com & IOC_VOID) {
980 /* XXX - this is odd since IOC_VOID means no parameters */
981 if (is64bit) {
982 *(user_addr_t *)datap = uap->data;
983 } else {
984 *(uint32_t *)datap = (uint32_t)uap->data;
985 }
986 }
987
988 proc_fdlock(p);
989 error = fp_lookup(p, fd, &fp, 1);
990 if (error) {
991 proc_fdunlock(p);
992 goto out_nofp;
993 }
994
995 AUDIT_ARG(file, p, fp);
996
997 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
998 error = EBADF;
999 goto out;
1000 }
1001
1002 context.vc_ucred = fp->fp_glob->fg_cred;
1003
1004 #if CONFIG_MACF
1005 error = mac_file_check_ioctl(context.vc_ucred, fp->fp_glob, com);
1006 if (error) {
1007 goto out;
1008 }
1009 #endif
1010
1011 switch (com) {
1012 case FIONCLEX:
1013 fp->fp_flags &= ~FP_CLOEXEC;
1014 break;
1015
1016 case FIOCLEX:
1017 fp->fp_flags |= FP_CLOEXEC;
1018 break;
1019
1020 case FIONBIO:
1021 // FIXME (rdar://54898652)
1022 //
1023 // this code is broken if fnctl(F_SETFL), ioctl() are
1024 // called concurrently for the same fileglob.
1025 if ((tmp = *(int *)datap)) {
1026 os_atomic_or(&fp->f_flag, FNONBLOCK, relaxed);
1027 } else {
1028 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1029 }
1030 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1031 break;
1032
1033 case FIOASYNC:
1034 // FIXME (rdar://54898652)
1035 //
1036 // this code is broken if fnctl(F_SETFL), ioctl() are
1037 // called concurrently for the same fileglob.
1038 if ((tmp = *(int *)datap)) {
1039 os_atomic_or(&fp->f_flag, FASYNC, relaxed);
1040 } else {
1041 os_atomic_andnot(&fp->f_flag, FASYNC, relaxed);
1042 }
1043 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1044 break;
1045
1046 case FIOSETOWN:
1047 tmp = *(int *)datap;
1048 if (fp->f_type == DTYPE_SOCKET) {
1049 ((struct socket *)fp_get_data(fp))->so_pgid = tmp;
1050 break;
1051 }
1052 if (fp->f_type == DTYPE_PIPE) {
1053 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1054 break;
1055 }
1056 if (tmp <= 0) {
1057 tmp = -tmp;
1058 } else {
1059 struct proc *p1 = proc_find(tmp);
1060 if (p1 == 0) {
1061 error = ESRCH;
1062 break;
1063 }
1064 tmp = p1->p_pgrpid;
1065 proc_rele(p1);
1066 }
1067 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1068 break;
1069
1070 case FIOGETOWN:
1071 if (fp->f_type == DTYPE_SOCKET) {
1072 *(int *)datap = ((struct socket *)fp_get_data(fp))->so_pgid;
1073 break;
1074 }
1075 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
1076 *(int *)datap = -*(int *)datap;
1077 break;
1078
1079 default:
1080 error = fo_ioctl(fp, com, datap, &context);
1081 /*
1082 * Copy any data to user, size was
1083 * already set and checked above.
1084 */
1085 if (error == 0 && (com & IOC_OUT) && size) {
1086 error = copyout(datap, uap->data, (u_int)size);
1087 }
1088 break;
1089 }
1090 out:
1091 fp_drop(p, fd, fp, 1);
1092 proc_fdunlock(p);
1093
1094 out_nofp:
1095 if (memp) {
1096 kfree_data(memp, size);
1097 }
1098 return error;
1099 }
1100
1101 int selwait;
1102 #define SEL_FIRSTPASS 1
1103 #define SEL_SECONDPASS 2
1104 extern int selcontinue(int error);
1105 extern int selprocess(int error, int sel_pass);
1106 static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
1107 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
1108 static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
1109 static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup);
1110 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim);
1111 static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
1112
1113 /*
1114 * This is used for the special device nodes that do not implement
1115 * a proper kevent filter (see filt_specattach).
1116 *
1117 * In order to enable kevents on those, the spec_filtops will pretend
1118 * to call select, and try to sniff the selrecord(), if it observes one,
1119 * the knote is attached, which pairs with selwakeup() or selthreadclear().
1120 *
1121 * The last issue remaining, is that we need to serialize filt_specdetach()
1122 * with this, but it really can't know the "selinfo" or any locking domain.
1123 * To make up for this, We protect knote list operations with a global lock,
1124 * which give us a safe shared locking domain.
1125 *
1126 * Note: It is a little distasteful, but we really have very few of those.
1127 * The big problem here is that sharing a lock domain without
1128 * any kind of shared knowledge is a little complicated.
1129 *
1130 * 1. filters can really implement their own kqueue integration
1131 * to side step this,
1132 *
1133 * 2. There's an opportunity to pick a private lock in selspec_attach()
1134 * because both the selinfo and the knote are locked at that time.
1135 * The cleanup story is however a little complicated.
1136 */
1137 static LCK_GRP_DECLARE(selspec_grp, "spec_filtops");
1138 static LCK_SPIN_DECLARE(selspec_lock, &selspec_grp);
1139
1140 /*
1141 * The "primitive" lock is held.
1142 * The knote lock is held.
1143 */
1144 void
selspec_attach(struct knote * kn,struct selinfo * si)1145 selspec_attach(struct knote *kn, struct selinfo *si)
1146 {
1147 struct selinfo *cur = os_atomic_load(&kn->kn_hook, relaxed);
1148
1149 if (cur == NULL) {
1150 si->si_flags |= SI_SELSPEC;
1151 lck_spin_lock(&selspec_lock);
1152 kn->kn_hook = si;
1153 KNOTE_ATTACH(&si->si_note, kn);
1154 lck_spin_unlock(&selspec_lock);
1155 } else {
1156 /*
1157 * selspec_attach() can be called from e.g. filt_spectouch()
1158 * which might be called before any event was dequeued.
1159 *
1160 * It is hence not impossible for the knote already be hooked.
1161 *
1162 * Note that selwakeup_internal() could possibly
1163 * already have cleared this pointer. This is a race
1164 * that filt_specprocess will debounce.
1165 */
1166 assert(si->si_flags & SI_SELSPEC);
1167 assert(cur == si);
1168 }
1169 }
1170
1171 /*
1172 * The "primitive" lock is _not_ held.
1173 * The knote lock is held.
1174 */
1175 void
selspec_detach(struct knote * kn)1176 selspec_detach(struct knote *kn)
1177 {
1178 /*
1179 * kn_hook always becomes non NULL under the knote lock.
1180 * Seeing "NULL" can't be a false positive.
1181 */
1182 if (kn->kn_hook == NULL) {
1183 return;
1184 }
1185
1186 lck_spin_lock(&selspec_lock);
1187 if (kn->kn_hook) {
1188 struct selinfo *sip = kn->kn_hook;
1189
1190 kn->kn_hook = NULL;
1191 KNOTE_DETACH(&sip->si_note, kn);
1192 }
1193 lck_spin_unlock(&selspec_lock);
1194 }
1195
1196 /*
1197 * Select system call.
1198 *
1199 * Returns: 0 Success
1200 * EINVAL Invalid argument
1201 * EAGAIN Nonconformant error if allocation fails
1202 */
1203 int
select(struct proc * p,struct select_args * uap,int32_t * retval)1204 select(struct proc *p, struct select_args *uap, int32_t *retval)
1205 {
1206 __pthread_testcancel(1);
1207 return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
1208 }
1209
1210 int
select_nocancel(struct proc * p,struct select_nocancel_args * uap,int32_t * retval)1211 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
1212 {
1213 uint64_t timeout = 0;
1214
1215 if (uap->tv) {
1216 int err;
1217 struct timeval atv;
1218 if (IS_64BIT_PROCESS(p)) {
1219 struct user64_timeval atv64;
1220 err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1221 /* Loses resolution - assume timeout < 68 years */
1222 atv.tv_sec = (__darwin_time_t)atv64.tv_sec;
1223 atv.tv_usec = atv64.tv_usec;
1224 } else {
1225 struct user32_timeval atv32;
1226 err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1227 atv.tv_sec = atv32.tv_sec;
1228 atv.tv_usec = atv32.tv_usec;
1229 }
1230 if (err) {
1231 return err;
1232 }
1233
1234 if (itimerfix(&atv)) {
1235 err = EINVAL;
1236 return err;
1237 }
1238
1239 clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
1240 }
1241
1242 return select_internal(p, uap, timeout, retval);
1243 }
1244
1245 int
pselect(struct proc * p,struct pselect_args * uap,int32_t * retval)1246 pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
1247 {
1248 __pthread_testcancel(1);
1249 return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
1250 }
1251
1252 int
pselect_nocancel(struct proc * p,struct pselect_nocancel_args * uap,int32_t * retval)1253 pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
1254 {
1255 int err;
1256 struct uthread *ut;
1257 uint64_t timeout = 0;
1258
1259 if (uap->ts) {
1260 struct timespec ts;
1261
1262 if (IS_64BIT_PROCESS(p)) {
1263 struct user64_timespec ts64;
1264 err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
1265 ts.tv_sec = (__darwin_time_t)ts64.tv_sec;
1266 ts.tv_nsec = (long)ts64.tv_nsec;
1267 } else {
1268 struct user32_timespec ts32;
1269 err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
1270 ts.tv_sec = ts32.tv_sec;
1271 ts.tv_nsec = ts32.tv_nsec;
1272 }
1273 if (err) {
1274 return err;
1275 }
1276
1277 if (!timespec_is_valid(&ts)) {
1278 return EINVAL;
1279 }
1280 clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
1281 }
1282
1283 ut = current_uthread();
1284
1285 if (uap->mask != USER_ADDR_NULL) {
1286 /* save current mask, then copyin and set new mask */
1287 sigset_t newset;
1288 err = copyin(uap->mask, &newset, sizeof(sigset_t));
1289 if (err) {
1290 return err;
1291 }
1292 ut->uu_oldmask = ut->uu_sigmask;
1293 ut->uu_flag |= UT_SAS_OLDMASK;
1294 ut->uu_sigmask = (newset & ~sigcantmask);
1295 }
1296
1297 err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
1298
1299 if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
1300 /*
1301 * Restore old mask (direct return case). NOTE: EINTR can also be returned
1302 * if the thread is cancelled. In that case, we don't reset the signal
1303 * mask to its original value (which usually happens in the signal
1304 * delivery path). This behavior is permitted by POSIX.
1305 */
1306 ut->uu_sigmask = ut->uu_oldmask;
1307 ut->uu_oldmask = 0;
1308 ut->uu_flag &= ~UT_SAS_OLDMASK;
1309 }
1310
1311 return err;
1312 }
1313
1314 void
select_cleanup_uthread(struct _select * sel)1315 select_cleanup_uthread(struct _select *sel)
1316 {
1317 kfree_data(sel->ibits, 2 * sel->nbytes);
1318 sel->ibits = sel->obits = NULL;
1319 sel->nbytes = 0;
1320 }
1321
1322 static int
select_grow_uthread_cache(struct _select * sel,uint32_t nbytes)1323 select_grow_uthread_cache(struct _select *sel, uint32_t nbytes)
1324 {
1325 uint32_t *buf;
1326
1327 buf = kalloc_data(2 * nbytes, Z_WAITOK | Z_ZERO);
1328 if (buf) {
1329 select_cleanup_uthread(sel);
1330 sel->ibits = buf;
1331 sel->obits = buf + nbytes / sizeof(uint32_t);
1332 sel->nbytes = nbytes;
1333 return true;
1334 }
1335 return false;
1336 }
1337
1338 static void
select_bzero_uthread_cache(struct _select * sel)1339 select_bzero_uthread_cache(struct _select *sel)
1340 {
1341 bzero(sel->ibits, sel->nbytes * 2);
1342 }
1343
1344 /*
1345 * Generic implementation of {,p}select. Care: we type-pun uap across the two
1346 * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
1347 * are identical. The 5th (timeout) argument points to different types, so we
1348 * unpack in the syscall-specific code, but the generic code still does a null
1349 * check on this argument to determine if a timeout was specified.
1350 */
1351 static int
select_internal(struct proc * p,struct select_nocancel_args * uap,uint64_t timeout,int32_t * retval)1352 select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
1353 {
1354 int error = 0;
1355 u_int ni, nw;
1356 thread_t th_act;
1357 struct uthread *uth;
1358 struct _select *sel;
1359 struct _select_data *seldata;
1360 int count = 0;
1361 size_t sz = 0;
1362
1363 th_act = current_thread();
1364 uth = get_bsdthread_info(th_act);
1365 sel = &uth->uu_select;
1366 seldata = &uth->uu_save.uus_select_data;
1367 *retval = 0;
1368
1369 seldata->args = uap;
1370 seldata->retval = retval;
1371 seldata->wqp = NULL;
1372 seldata->count = 0;
1373
1374 if (uap->nd < 0) {
1375 return EINVAL;
1376 }
1377
1378 if (uap->nd > p->p_fd.fd_nfiles) {
1379 uap->nd = p->p_fd.fd_nfiles; /* forgiving; slightly wrong */
1380 }
1381 nw = howmany(uap->nd, NFDBITS);
1382 ni = nw * sizeof(fd_mask);
1383
1384 /*
1385 * if the previously allocated space for the bits is smaller than
1386 * what is requested or no space has yet been allocated for this
1387 * thread, allocate enough space now.
1388 *
1389 * Note: If this process fails, select() will return EAGAIN; this
1390 * is the same thing pool() returns in a no-memory situation, but
1391 * it is not a POSIX compliant error code for select().
1392 */
1393 if (sel->nbytes < (3 * ni)) {
1394 if (!select_grow_uthread_cache(sel, 3 * ni)) {
1395 return EAGAIN;
1396 }
1397 } else {
1398 select_bzero_uthread_cache(sel);
1399 }
1400
1401 /*
1402 * get the bits from the user address space
1403 */
1404 #define getbits(name, x) \
1405 do { \
1406 if (uap->name && (error = copyin(uap->name, \
1407 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1408 goto continuation; \
1409 } while (0)
1410
1411 getbits(in, 0);
1412 getbits(ou, 1);
1413 getbits(ex, 2);
1414 #undef getbits
1415
1416 seldata->abstime = timeout;
1417
1418 if ((error = selcount(p, sel->ibits, uap->nd, &count))) {
1419 goto continuation;
1420 }
1421
1422 /*
1423 * We need an array of waitq pointers. This is due to the new way
1424 * in which waitqs are linked to sets. When a thread selects on a
1425 * file descriptor, a waitq (embedded in a selinfo structure) is
1426 * added to the thread's local waitq set. There is no longer any
1427 * way to directly iterate over all members of a given waitq set.
1428 * The process of linking a waitq into a set may allocate a link
1429 * table object. Because we can't iterate over all the waitqs to
1430 * which our thread waitq set belongs, we need a way of removing
1431 * this link object!
1432 *
1433 * Thus we need a buffer which will hold one waitq pointer
1434 * per FD being selected. During the tear-down phase we can use
1435 * these pointers to dis-associate the underlying selinfo's waitq
1436 * from our thread's waitq set.
1437 *
1438 * Because we also need to allocate a waitq set for this thread,
1439 * we use a bare buffer pointer to hold all the memory. Note that
1440 * this memory is cached in the thread pointer and not reaped until
1441 * the thread exists. This is generally OK because threads that
1442 * call select tend to keep calling select repeatedly.
1443 */
1444 sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
1445 if (sz > uth->uu_wqstate_sz) {
1446 /* (re)allocate a buffer to hold waitq pointers */
1447 if (uth->uu_wqset) {
1448 if (waitq_set_is_valid(uth->uu_wqset)) {
1449 waitq_set_deinit(uth->uu_wqset);
1450 }
1451 kheap_free(KM_SELECT, uth->uu_wqset, uth->uu_wqstate_sz);
1452 } else if (uth->uu_wqstate_sz && !uth->uu_wqset) {
1453 panic("select: thread structure corrupt! "
1454 "uu_wqstate_sz:%ld, wqstate_buf == NULL",
1455 uth->uu_wqstate_sz);
1456 }
1457 uth->uu_wqstate_sz = sz;
1458 uth->uu_wqset = kheap_alloc(KM_SELECT, sz, Z_WAITOK);
1459 if (!uth->uu_wqset) {
1460 panic("can't allocate %ld bytes for wqstate buffer",
1461 uth->uu_wqstate_sz);
1462 }
1463 waitq_set_init(uth->uu_wqset, SYNC_POLICY_FIFO);
1464 }
1465
1466 if (!waitq_set_is_valid(uth->uu_wqset)) {
1467 waitq_set_init(uth->uu_wqset, SYNC_POLICY_FIFO);
1468 }
1469
1470 /* the last chunk of our buffer is an array of waitq pointers */
1471 seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
1472 bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
1473
1474 seldata->count = count;
1475
1476 continuation:
1477
1478 if (error) {
1479 /*
1480 * We have already cleaned up any state we established,
1481 * either locally or as a result of selcount(). We don't
1482 * need to wait_subqueue_unlink_all(), since we haven't set
1483 * anything at this point.
1484 */
1485 return error;
1486 }
1487
1488 return selprocess(0, SEL_FIRSTPASS);
1489 }
1490
1491 int
selcontinue(int error)1492 selcontinue(int error)
1493 {
1494 return selprocess(error, SEL_SECONDPASS);
1495 }
1496
1497
1498 /*
1499 * selprocess
1500 *
1501 * Parameters: error The error code from our caller
1502 * sel_pass The pass we are on
1503 */
1504 int
selprocess(int error,int sel_pass)1505 selprocess(int error, int sel_pass)
1506 {
1507 u_int ni, nw;
1508 thread_t th_act;
1509 struct uthread *uth;
1510 struct proc *p;
1511 struct select_nocancel_args *uap;
1512 int *retval;
1513 struct _select *sel;
1514 struct _select_data *seldata;
1515 int unwind = 1;
1516 int prepost = 0;
1517 int somewakeup = 0;
1518 int doretry = 0;
1519 wait_result_t wait_result;
1520
1521 p = current_proc();
1522 th_act = current_thread();
1523 uth = get_bsdthread_info(th_act);
1524 sel = &uth->uu_select;
1525 seldata = &uth->uu_save.uus_select_data;
1526 uap = seldata->args;
1527 retval = seldata->retval;
1528
1529 if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) {
1530 unwind = 0;
1531 }
1532 if (seldata->count == 0) {
1533 unwind = 0;
1534 }
1535 retry:
1536 if (error != 0) {
1537 goto done;
1538 }
1539
1540 OSBitOrAtomic(P_SELECT, &p->p_flag);
1541
1542 /* skip scans if the select is just for timeouts */
1543 if (seldata->count) {
1544 error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
1545 if (error || *retval) {
1546 goto done;
1547 }
1548 if (prepost || somewakeup) {
1549 /*
1550 * if the select of log, then we can wakeup and
1551 * discover some one else already read the data;
1552 * go to select again if time permits
1553 */
1554 prepost = 0;
1555 somewakeup = 0;
1556 doretry = 1;
1557 }
1558 }
1559
1560 if (uap->tv) {
1561 uint64_t now;
1562
1563 clock_get_uptime(&now);
1564 if (now >= seldata->abstime) {
1565 goto done;
1566 }
1567 }
1568
1569 if (doretry) {
1570 /* cleanup obits and try again */
1571 doretry = 0;
1572 sel_pass = SEL_FIRSTPASS;
1573 goto retry;
1574 }
1575
1576 /*
1577 * To effect a poll, the timeout argument should be
1578 * non-nil, pointing to a zero-valued timeval structure.
1579 */
1580 if (uap->tv && seldata->abstime == 0) {
1581 goto done;
1582 }
1583
1584 /* No spurious wakeups due to colls,no need to check for them */
1585 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1586 sel_pass = SEL_FIRSTPASS;
1587 goto retry;
1588 }
1589
1590 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1591
1592 /* if the select is just for timeout skip check */
1593 if (seldata->count && (sel_pass == SEL_SECONDPASS)) {
1594 panic("selprocess: 2nd pass assertwaiting");
1595 }
1596
1597 /* waitq_set has waitqueue as first element */
1598 wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
1599 NO_EVENT64, THREAD_ABORTSAFE,
1600 TIMEOUT_URGENCY_USER_NORMAL,
1601 seldata->abstime,
1602 TIMEOUT_NO_LEEWAY);
1603 if (wait_result != THREAD_AWAKENED) {
1604 /* there are no preposted events */
1605 error = tsleep1(NULL, PSOCK | PCATCH,
1606 "select", 0, selcontinue);
1607 } else {
1608 prepost = 1;
1609 error = 0;
1610 }
1611
1612 if (error == 0) {
1613 sel_pass = SEL_SECONDPASS;
1614 if (!prepost) {
1615 somewakeup = 1;
1616 }
1617 goto retry;
1618 }
1619 done:
1620 if (unwind) {
1621 seldrop(p, sel->ibits, uap->nd, seldata->count);
1622 waitq_set_deinit(uth->uu_wqset);
1623 /*
1624 * zero out the waitq pointer array to avoid use-after free
1625 * errors in the selcount error path (seldrop_locked) if/when
1626 * the thread re-calls select().
1627 */
1628 bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
1629 }
1630 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1631 /* select is not restarted after signals... */
1632 if (error == ERESTART) {
1633 error = EINTR;
1634 }
1635 if (error == EWOULDBLOCK) {
1636 error = 0;
1637 }
1638 nw = howmany(uap->nd, NFDBITS);
1639 ni = nw * sizeof(fd_mask);
1640
1641 #define putbits(name, x) \
1642 do { \
1643 if (uap->name && (error2 = \
1644 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1645 error = error2; \
1646 } while (0)
1647
1648 if (error == 0) {
1649 int error2;
1650
1651 putbits(in, 0);
1652 putbits(ou, 1);
1653 putbits(ex, 2);
1654 #undef putbits
1655 }
1656
1657 if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
1658 /* restore signal mask - continuation case */
1659 uth->uu_sigmask = uth->uu_oldmask;
1660 uth->uu_oldmask = 0;
1661 uth->uu_flag &= ~UT_SAS_OLDMASK;
1662 }
1663
1664 return error;
1665 }
1666
1667
1668 /**
1669 * remove the fileproc's underlying waitq from the supplied waitq set;
1670 * clear FP_INSELECT when appropriate
1671 *
1672 * Parameters:
1673 * fp File proc that is potentially currently in select
1674 * wqset Waitq set to which the fileproc may belong
1675 * (usually this is the thread's private waitq set)
1676 * Conditions:
1677 * proc_fdlock is held
1678 */
1679 static void
selunlinkfp(struct fileproc * fp,uint64_t wqp_id,struct waitq_set * wqset)1680 selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
1681 {
1682 int valid_set = waitq_set_is_valid(wqset);
1683 int valid_q = !!wqp_id;
1684
1685 /*
1686 * This could be called (from selcount error path) before we setup
1687 * the thread's wqset. Check the wqset passed in, and only unlink if
1688 * the set is valid.
1689 */
1690
1691 /* unlink the underlying waitq from the input set (thread waitq set) */
1692 if (valid_q && valid_set) {
1693 waitq_unlink_by_prepost_id(wqp_id, wqset);
1694 }
1695
1696 /*
1697 * We can always remove the conflict queue from our thread's set: this
1698 * will not affect other threads that potentially need to be awoken on
1699 * the conflict queue during a fileproc_drain - those sets will still
1700 * be linked with the global conflict queue, and the last waiter
1701 * on the fp clears the CONFLICT marker.
1702 */
1703 if (valid_set && (fp->fp_flags & FP_SELCONFLICT)) {
1704 waitq_unlink(&select_conflict_queue, wqset);
1705 }
1706
1707 if (valid_set && (fp->fp_flags & FP_INSELECT)) {
1708 if (fp->fp_guard_attrs) {
1709 if (fp->fp_guard->fpg_wset == wqset) {
1710 fp->fp_guard->fpg_wset = NULL;
1711 fp->fp_flags &= ~FP_INSELECT;
1712 }
1713 } else {
1714 if (fp->fp_wset == wqset) {
1715 fp->fp_wset = NULL;
1716 fp->fp_flags &= ~FP_INSELECT;
1717 }
1718 }
1719 }
1720 }
1721
1722 /**
1723 * connect a fileproc to the given wqset, potentially bridging to a waitq
1724 * pointed to indirectly by wq_data
1725 *
1726 * Parameters:
1727 * fp File proc potentially currently in select
1728 * wq_data Pointer to a pointer to a waitq (could be NULL)
1729 * wqset Waitq set to which the fileproc should now belong
1730 * (usually this is the thread's private waitq set)
1731 *
1732 * Conditions:
1733 * proc_fdlock is held
1734 */
1735 static uint64_t
sellinkfp(struct fileproc * fp,void ** wq_data,struct waitq_set * wqset)1736 sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
1737 {
1738 struct waitq *f_wq = NULL;
1739
1740 if ((fp->fp_flags & FP_INSELECT) == 0) {
1741 if (fp->fp_guard_attrs) {
1742 fp->fp_guard->fpg_wset = wqset;
1743 } else {
1744 fp->fp_wset = wqset;
1745 }
1746 fp->fp_flags |= FP_INSELECT;
1747 } else {
1748 fp->fp_flags |= FP_SELCONFLICT;
1749 waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, NULL);
1750 }
1751
1752 /*
1753 * The wq_data parameter has potentially been set by selrecord called
1754 * from a subsystems fo_select() function. If the subsystem does not
1755 * call selrecord, then wq_data will be NULL
1756 *
1757 * Use memcpy to get the value into a proper pointer because
1758 * wq_data most likely points to a stack variable that could be
1759 * unaligned on 32-bit systems.
1760 */
1761 if (wq_data) {
1762 memcpy(&f_wq, wq_data, sizeof(f_wq));
1763 if (!waitq_is_valid(f_wq)) {
1764 f_wq = NULL;
1765 }
1766 }
1767
1768 /* handles NULL f_wq */
1769 return waitq_get_prepost_id(f_wq);
1770 }
1771
1772
1773 /*
1774 * selscan
1775 *
1776 * Parameters: p Process performing the select
1777 * sel The per-thread select context structure
1778 * nfd The number of file descriptors to scan
1779 * retval The per thread system call return area
1780 * sel_pass Which pass this is; allowed values are
1781 * SEL_FIRSTPASS and SEL_SECONDPASS
1782 * wqset The per thread wait queue set
1783 *
1784 * Returns: 0 Success
1785 * EIO Invalid p->p_fd field XXX Obsolete?
1786 * EBADF One of the files in the bit vector is
1787 * invalid.
1788 */
1789 static int
selscan(struct proc * p,struct _select * sel,struct _select_data * seldata,int nfd,int32_t * retval,int sel_pass,struct waitq_set * wqset)1790 selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
1791 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
1792 {
1793 int msk, i, j, fd;
1794 u_int32_t bits;
1795 struct fileproc *fp;
1796 int n = 0; /* count of bits */
1797 int nc = 0; /* bit vector offset (nc'th bit) */
1798 static int flag[3] = { FREAD, FWRITE, 0 };
1799 u_int32_t *iptr, *optr;
1800 u_int nw;
1801 u_int32_t *ibits, *obits;
1802 waitq_ref_t reserved_link, *rl_ptr = NULL;
1803 int count;
1804 struct vfs_context context = *vfs_context_current();
1805
1806 ibits = sel->ibits;
1807 obits = sel->obits;
1808
1809 nw = howmany(nfd, NFDBITS);
1810
1811 count = seldata->count;
1812
1813 nc = 0;
1814 if (!count) {
1815 *retval = 0;
1816 return 0;
1817 }
1818
1819 if (sel_pass == SEL_FIRSTPASS) {
1820 /*
1821 * Make sure the waitq-set is all clean:
1822 *
1823 * select loops until it finds at least one event, however it
1824 * doesn't mean that the event that woke up select is still
1825 * fired by the time the second pass runs, and then
1826 * select_internal will loop back to a first pass.
1827 */
1828 waitq_set_reset_anon_prepost(wqset);
1829 }
1830
1831 proc_fdlock(p);
1832 for (msk = 0; msk < 3; msk++) {
1833 iptr = (u_int32_t *)&ibits[msk * nw];
1834 optr = (u_int32_t *)&obits[msk * nw];
1835
1836 for (i = 0; i < nfd; i += NFDBITS) {
1837 bits = iptr[i / NFDBITS];
1838
1839 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1840 bits &= ~(1U << j);
1841
1842 fp = fp_get_noref_locked(p, fd);
1843 if (fp == NULL) {
1844 /*
1845 * If we abort because of a bad
1846 * fd, let the caller unwind...
1847 */
1848 proc_fdunlock(p);
1849 return EBADF;
1850 }
1851 if (sel_pass == SEL_SECONDPASS) {
1852 reserved_link = WAITQ_REF_NULL;
1853 rl_ptr = NULL;
1854 selunlinkfp(fp, seldata->wqp[nc], wqset);
1855 } else {
1856 reserved_link = waitq_link_reserve();
1857 rl_ptr = &reserved_link;
1858 waitq_set_lazy_init_link(wqset);
1859 }
1860
1861 context.vc_ucred = fp->f_cred;
1862
1863 /*
1864 * stash this value b/c fo_select may replace
1865 * reserved_link with a pointer to a waitq object
1866 */
1867 waitq_ref_t rsvd = reserved_link;
1868
1869 /* The select; set the bit, if true */
1870 if (fp->f_ops && fp->f_type
1871 && fo_select(fp, flag[msk], rl_ptr, &context)) {
1872 optr[fd / NFDBITS] |= (1U << (fd % NFDBITS));
1873 n++;
1874 }
1875 if (sel_pass == SEL_FIRSTPASS) {
1876 /*
1877 * If the fp's supporting selinfo structure was linked
1878 * to this thread's waitq set, then 'reserved_link'
1879 * will have been updated by selrecord to be a pointer
1880 * to the selinfo's waitq.
1881 */
1882 if (reserved_link.wqr_value == rsvd.wqr_value) {
1883 waitq_link_release(reserved_link);
1884 rl_ptr = NULL; /* fo_select never called selrecord() */
1885 }
1886
1887 /*
1888 * Hook up the thread's waitq set either to
1889 * the fileproc structure, or to the global
1890 * conflict queue: but only on the first
1891 * select pass.
1892 */
1893 seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
1894 }
1895 nc++;
1896 }
1897 }
1898 }
1899 proc_fdunlock(p);
1900
1901 *retval = n;
1902 return 0;
1903 }
1904
1905 static int poll_callback(struct kevent_qos_s *, kevent_ctx_t);
1906
1907 int
poll(struct proc * p,struct poll_args * uap,int32_t * retval)1908 poll(struct proc *p, struct poll_args *uap, int32_t *retval)
1909 {
1910 __pthread_testcancel(1);
1911 return poll_nocancel(p, (struct poll_nocancel_args *)uap, retval);
1912 }
1913
1914
1915 int
poll_nocancel(struct proc * p,struct poll_nocancel_args * uap,int32_t * retval)1916 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1917 {
1918 struct pollfd *fds = NULL;
1919 struct kqueue *kq = NULL;
1920 int error = 0;
1921 u_int nfds = uap->nfds;
1922 u_int rfds = 0;
1923 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE);
1924 size_t ni = nfds * sizeof(struct pollfd);
1925
1926 /*
1927 * This is kinda bogus. We have fd limits, but that is not
1928 * really related to the size of the pollfd array. Make sure
1929 * we let the process use at least FD_SETSIZE entries and at
1930 * least enough for the current limits. We want to be reasonably
1931 * safe, but not overly restrictive.
1932 */
1933 if (nfds > OPEN_MAX ||
1934 (nfds > nofile && (proc_suser(p) || nfds > FD_SETSIZE))) {
1935 return EINVAL;
1936 }
1937
1938 kq = kqueue_alloc(p);
1939 if (kq == NULL) {
1940 return EAGAIN;
1941 }
1942
1943 if (nfds) {
1944 fds = (struct pollfd *)kalloc_data(ni, Z_WAITOK);
1945 if (NULL == fds) {
1946 error = EAGAIN;
1947 goto out;
1948 }
1949
1950 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1951 if (error) {
1952 goto out;
1953 }
1954 }
1955
1956 /* JMM - all this P_SELECT stuff is bogus */
1957 OSBitOrAtomic(P_SELECT, &p->p_flag);
1958 for (u_int i = 0; i < nfds; i++) {
1959 short events = fds[i].events;
1960 __assert_only int rc;
1961
1962 /* per spec, ignore fd values below zero */
1963 if (fds[i].fd < 0) {
1964 fds[i].revents = 0;
1965 continue;
1966 }
1967
1968 /* convert the poll event into a kqueue kevent */
1969 struct kevent_qos_s kev = {
1970 .ident = fds[i].fd,
1971 .flags = EV_ADD | EV_ONESHOT | EV_POLL,
1972 .udata = CAST_USER_ADDR_T(&fds[i])
1973 };
1974
1975 /* Handle input events */
1976 if (events & (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP)) {
1977 kev.filter = EVFILT_READ;
1978 if (events & (POLLPRI | POLLRDBAND)) {
1979 kev.flags |= EV_OOBAND;
1980 }
1981 rc = kevent_register(kq, &kev, NULL);
1982 assert((rc & FILTER_REGISTER_WAIT) == 0);
1983 }
1984
1985 /* Handle output events */
1986 if ((kev.flags & EV_ERROR) == 0 &&
1987 (events & (POLLOUT | POLLWRNORM | POLLWRBAND))) {
1988 kev.filter = EVFILT_WRITE;
1989 rc = kevent_register(kq, &kev, NULL);
1990 assert((rc & FILTER_REGISTER_WAIT) == 0);
1991 }
1992
1993 /* Handle BSD extension vnode events */
1994 if ((kev.flags & EV_ERROR) == 0 &&
1995 (events & (POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE))) {
1996 kev.filter = EVFILT_VNODE;
1997 kev.fflags = 0;
1998 if (events & POLLEXTEND) {
1999 kev.fflags |= NOTE_EXTEND;
2000 }
2001 if (events & POLLATTRIB) {
2002 kev.fflags |= NOTE_ATTRIB;
2003 }
2004 if (events & POLLNLINK) {
2005 kev.fflags |= NOTE_LINK;
2006 }
2007 if (events & POLLWRITE) {
2008 kev.fflags |= NOTE_WRITE;
2009 }
2010 rc = kevent_register(kq, &kev, NULL);
2011 assert((rc & FILTER_REGISTER_WAIT) == 0);
2012 }
2013
2014 if (kev.flags & EV_ERROR) {
2015 fds[i].revents = POLLNVAL;
2016 rfds++;
2017 } else {
2018 fds[i].revents = 0;
2019 }
2020 }
2021
2022 /*
2023 * Did we have any trouble registering?
2024 * If user space passed 0 FDs, then respect any timeout value passed.
2025 * This is an extremely inefficient sleep. If user space passed one or
2026 * more FDs, and we had trouble registering _all_ of them, then bail
2027 * out. If a subset of the provided FDs failed to register, then we
2028 * will still call the kqueue_scan function.
2029 */
2030 if (nfds && (rfds == nfds)) {
2031 goto done;
2032 }
2033
2034 /* scan for, and possibly wait for, the kevents to trigger */
2035 kevent_ctx_t kectx = kevent_get_context(current_thread());
2036 *kectx = (struct kevent_ctx_s){
2037 .kec_process_noutputs = rfds,
2038 .kec_process_flags = KEVENT_FLAG_POLL,
2039 .kec_deadline = 0, /* wait forever */
2040 };
2041
2042 /*
2043 * If any events have trouble registering, an event has fired and we
2044 * shouldn't wait for events in kqueue_scan.
2045 */
2046 if (rfds) {
2047 kectx->kec_process_flags |= KEVENT_FLAG_IMMEDIATE;
2048 } else if (uap->timeout != -1) {
2049 clock_interval_to_deadline(uap->timeout, NSEC_PER_MSEC,
2050 &kectx->kec_deadline);
2051 }
2052
2053 error = kqueue_scan(kq, kectx->kec_process_flags, kectx, poll_callback);
2054 rfds = kectx->kec_process_noutputs;
2055
2056 done:
2057 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
2058 /* poll is not restarted after signals... */
2059 if (error == ERESTART) {
2060 error = EINTR;
2061 }
2062 if (error == 0) {
2063 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
2064 *retval = rfds;
2065 }
2066
2067 out:
2068 kfree_data(fds, ni);
2069
2070 kqueue_dealloc(kq);
2071 return error;
2072 }
2073
2074 static int
poll_callback(struct kevent_qos_s * kevp,kevent_ctx_t kectx)2075 poll_callback(struct kevent_qos_s *kevp, kevent_ctx_t kectx)
2076 {
2077 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
2078 short prev_revents = fds->revents;
2079 short mask = 0;
2080
2081 /* convert the results back into revents */
2082 if (kevp->flags & EV_EOF) {
2083 fds->revents |= POLLHUP;
2084 }
2085 if (kevp->flags & EV_ERROR) {
2086 fds->revents |= POLLERR;
2087 }
2088
2089 switch (kevp->filter) {
2090 case EVFILT_READ:
2091 if (fds->revents & POLLHUP) {
2092 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND);
2093 } else {
2094 mask = (POLLIN | POLLRDNORM);
2095 if (kevp->flags & EV_OOBAND) {
2096 mask |= (POLLPRI | POLLRDBAND);
2097 }
2098 }
2099 fds->revents |= (fds->events & mask);
2100 break;
2101
2102 case EVFILT_WRITE:
2103 if (!(fds->revents & POLLHUP)) {
2104 fds->revents |= (fds->events & (POLLOUT | POLLWRNORM | POLLWRBAND));
2105 }
2106 break;
2107
2108 case EVFILT_VNODE:
2109 if (kevp->fflags & NOTE_EXTEND) {
2110 fds->revents |= (fds->events & POLLEXTEND);
2111 }
2112 if (kevp->fflags & NOTE_ATTRIB) {
2113 fds->revents |= (fds->events & POLLATTRIB);
2114 }
2115 if (kevp->fflags & NOTE_LINK) {
2116 fds->revents |= (fds->events & POLLNLINK);
2117 }
2118 if (kevp->fflags & NOTE_WRITE) {
2119 fds->revents |= (fds->events & POLLWRITE);
2120 }
2121 break;
2122 }
2123
2124 if (fds->revents != 0 && prev_revents == 0) {
2125 kectx->kec_process_noutputs++;
2126 }
2127
2128 return 0;
2129 }
2130
2131 int
seltrue(__unused dev_t dev,__unused int flag,__unused struct proc * p)2132 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
2133 {
2134 return 1;
2135 }
2136
2137 /*
2138 * selcount
2139 *
2140 * Count the number of bits set in the input bit vector, and establish an
2141 * outstanding fp->fp_iocount for each of the descriptors which will be in
2142 * use in the select operation.
2143 *
2144 * Parameters: p The process doing the select
2145 * ibits The input bit vector
2146 * nfd The number of fd's in the vector
2147 * countp Pointer to where to store the bit count
2148 *
2149 * Returns: 0 Success
2150 * EIO Bad per process open file table
2151 * EBADF One of the bits in the input bit vector
2152 * references an invalid fd
2153 *
2154 * Implicit: *countp (modified) Count of fd's
2155 *
2156 * Notes: This function is the first pass under the proc_fdlock() that
2157 * permits us to recognize invalid descriptors in the bit vector;
2158 * the may, however, not remain valid through the drop and
2159 * later reacquisition of the proc_fdlock().
2160 */
2161 static int
selcount(struct proc * p,u_int32_t * ibits,int nfd,int * countp)2162 selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
2163 {
2164 int msk, i, j, fd;
2165 u_int32_t bits;
2166 struct fileproc *fp;
2167 int n = 0;
2168 u_int32_t *iptr;
2169 u_int nw;
2170 int error = 0;
2171 int need_wakeup = 0;
2172
2173 nw = howmany(nfd, NFDBITS);
2174
2175 proc_fdlock(p);
2176 for (msk = 0; msk < 3; msk++) {
2177 iptr = (u_int32_t *)&ibits[msk * nw];
2178 for (i = 0; i < nfd; i += NFDBITS) {
2179 bits = iptr[i / NFDBITS];
2180 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2181 bits &= ~(1U << j);
2182
2183 fp = fp_get_noref_locked(p, fd);
2184 if (fp == NULL) {
2185 *countp = 0;
2186 error = EBADF;
2187 goto bad;
2188 }
2189 os_ref_retain_locked(&fp->fp_iocount);
2190 n++;
2191 }
2192 }
2193 }
2194 proc_fdunlock(p);
2195
2196 *countp = n;
2197 return 0;
2198
2199 bad:
2200 if (n == 0) {
2201 goto out;
2202 }
2203 /* Ignore error return; it's already EBADF */
2204 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup);
2205
2206 out:
2207 proc_fdunlock(p);
2208 if (need_wakeup) {
2209 wakeup(&p->p_fd.fd_fpdrainwait);
2210 }
2211 return error;
2212 }
2213
2214
2215 /*
2216 * seldrop_locked
2217 *
2218 * Drop outstanding wait queue references set up during selscan(); drop the
2219 * outstanding per fileproc fp_iocount picked up during the selcount().
2220 *
2221 * Parameters: p Process performing the select
2222 * ibits Input bit bector of fd's
2223 * nfd Number of fd's
2224 * lim Limit to number of vector entries to
2225 * consider, or -1 for "all"
2226 * inselect True if
2227 * need_wakeup Pointer to flag to set to do a wakeup
2228 * if f_iocont on any descriptor goes to 0
2229 *
2230 * Returns: 0 Success
2231 * EBADF One or more fds in the bit vector
2232 * were invalid, but the rest
2233 * were successfully dropped
2234 *
2235 * Notes: An fd make become bad while the proc_fdlock() is not held,
2236 * if a multithreaded application closes the fd out from under
2237 * the in progress select. In this case, we still have to
2238 * clean up after the set up on the remaining fds.
2239 */
2240 static int
seldrop_locked(struct proc * p,u_int32_t * ibits,int nfd,int lim,int * need_wakeup)2241 seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup)
2242 {
2243 int msk, i, j, nc, fd;
2244 u_int32_t bits;
2245 struct fileproc *fp;
2246 u_int32_t *iptr;
2247 u_int nw;
2248 int error = 0;
2249 uthread_t uth = current_uthread();
2250 struct _select_data *seldata;
2251
2252 *need_wakeup = 0;
2253
2254 nw = howmany(nfd, NFDBITS);
2255 seldata = &uth->uu_save.uus_select_data;
2256
2257 nc = 0;
2258 for (msk = 0; msk < 3; msk++) {
2259 iptr = (u_int32_t *)&ibits[msk * nw];
2260 for (i = 0; i < nfd; i += NFDBITS) {
2261 bits = iptr[i / NFDBITS];
2262 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2263 bits &= ~(1U << j);
2264 /*
2265 * If we've already dropped as many as were
2266 * counted/scanned, then we are done.
2267 */
2268 if (nc >= lim) {
2269 goto done;
2270 }
2271
2272 /*
2273 * We took an I/O reference in selcount,
2274 * so the fp can't possibly be NULL.
2275 */
2276 fp = fp_get_noref_locked_with_iocount(p, fd);
2277 selunlinkfp(fp,
2278 seldata->wqp ? seldata->wqp[nc] : 0,
2279 uth->uu_wqset);
2280
2281 nc++;
2282
2283 const os_ref_count_t refc = os_ref_release_locked(&fp->fp_iocount);
2284 if (0 == refc) {
2285 panic("fp_iocount overdecrement!");
2286 }
2287
2288 if (1 == refc) {
2289 /*
2290 * The last iocount is responsible for clearing
2291 * selconfict flag - even if we didn't set it -
2292 * and is also responsible for waking up anyone
2293 * waiting on iocounts to drain.
2294 */
2295 if (fp->fp_flags & FP_SELCONFLICT) {
2296 fp->fp_flags &= ~FP_SELCONFLICT;
2297 }
2298 if (p->p_fd.fd_fpdrainwait) {
2299 p->p_fd.fd_fpdrainwait = 0;
2300 *need_wakeup = 1;
2301 }
2302 }
2303 }
2304 }
2305 }
2306 done:
2307 return error;
2308 }
2309
2310
2311 static int
seldrop(struct proc * p,u_int32_t * ibits,int nfd,int lim)2312 seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim)
2313 {
2314 int error;
2315 int need_wakeup = 0;
2316
2317 proc_fdlock(p);
2318 error = seldrop_locked(p, ibits, nfd, lim, &need_wakeup);
2319 proc_fdunlock(p);
2320 if (need_wakeup) {
2321 wakeup(&p->p_fd.fd_fpdrainwait);
2322 }
2323 return error;
2324 }
2325
2326 /*
2327 * Record a select request.
2328 */
2329 void
selrecord(__unused struct proc * selector,struct selinfo * sip,void * s_data)2330 selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
2331 {
2332 struct uthread * ut = current_uthread();
2333
2334 /* need to look at collisions */
2335
2336 /* do not record if this is second pass of select */
2337 if (!s_data) {
2338 return;
2339 }
2340
2341 if ((sip->si_flags & SI_INITED) == 0) {
2342 waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO);
2343 sip->si_flags |= SI_INITED;
2344 }
2345
2346 if (ut->uu_wqset == SELSPEC_RECORD_MARKER) {
2347 ((selspec_record_hook_t)s_data)(sip);
2348 } else {
2349 /* on input, s_data points to the 64-bit ID of a reserved link object */
2350 waitq_ref_t *reserved_link = (waitq_ref_t *)s_data;
2351
2352 sip->si_flags |= SI_RECORDED;
2353
2354 /* note: this checks for pre-existing linkage */
2355 waitq_link(&sip->si_waitq, ut->uu_wqset,
2356 WAITQ_SHOULD_LOCK, reserved_link);
2357
2358 /*
2359 * Always consume the reserved link.
2360 * We can always call waitq_link_release() safely because if
2361 * waitq_link is successful, it consumes the link and resets the
2362 * value to 0, in which case our call to release becomes a no-op.
2363 * If waitq_link fails, then the following release call will actually
2364 * release the reserved link object.
2365 */
2366 waitq_link_release(*reserved_link);
2367 *reserved_link = WAITQ_REF_NULL;
2368
2369 /*
2370 * Use the s_data pointer as an output parameter as well
2371 * This avoids changing the prototype for this function which is
2372 * used by many kexts. We need to surface the waitq object
2373 * associated with the selinfo we just added to the thread's select
2374 * set. New waitq sets do not have back-pointers to set members, so
2375 * the only way to clear out set linkage objects is to go from the
2376 * waitq to the set.
2377 */
2378 *(void **)s_data = &sip->si_waitq;
2379 }
2380 }
2381
2382 static void
selwakeup_internal(struct selinfo * sip,long hint,wait_result_t wr)2383 selwakeup_internal(struct selinfo *sip, long hint, wait_result_t wr)
2384 {
2385 if ((sip->si_flags & SI_INITED) == 0) {
2386 return;
2387 }
2388
2389 if (sip->si_flags & SI_RECORDED) {
2390 waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
2391 wr, WAITQ_ALL_PRIORITIES);
2392 sip->si_flags &= ~SI_RECORDED;
2393 }
2394
2395 if (sip->si_flags & SI_SELSPEC) {
2396 /*
2397 * The "primitive" lock is held.
2398 * The knote lock is not held.
2399 *
2400 * All knotes will transition their kn_hook to NULL.
2401 */
2402 lck_spin_lock(&selspec_lock);
2403 KNOTE(&sip->si_note, hint);
2404 klist_init(&sip->si_note);
2405 lck_spin_unlock(&selspec_lock);
2406 sip->si_flags &= ~SI_SELSPEC;
2407 }
2408
2409 if (hint == NOTE_REVOKE) {
2410 /*
2411 * Higher level logic may have a handle on this waitq's
2412 * prepost ID, but that's OK because the waitq_deinit
2413 * will remove/invalidate the prepost object
2414 * (as well as mark the waitq invalid).
2415 *
2416 * This de-couples us from any callers that may have
2417 * a handle to this waitq via the prepost ID.
2418 */
2419 waitq_deinit(&sip->si_waitq);
2420 sip->si_flags &= ~SI_INITED;
2421 } else {
2422 /*
2423 * selinfo users might never call selthreadclear()
2424 * (for example pipes didn't use to).
2425 *
2426 * Fortunately, the waitq will always be unhooked
2427 * from the select sets cleanly, and when `waitq_unlink`
2428 * removes the waitq from the last set it is in,
2429 * it clears the prepost, which avoids a leak.
2430 *
2431 * This is why it is "OK" to have selinfos for which
2432 * waitq_deinit() is never called.
2433 */
2434 }
2435 }
2436
2437
2438 void
selwakeup(struct selinfo * sip)2439 selwakeup(struct selinfo *sip)
2440 {
2441 selwakeup_internal(sip, 0, THREAD_AWAKENED);
2442 }
2443
2444 void
selthreadclear(struct selinfo * sip)2445 selthreadclear(struct selinfo *sip)
2446 {
2447 selwakeup_internal(sip, NOTE_REVOKE, THREAD_RESTART);
2448 }
2449
2450
2451 /*
2452 * gethostuuid
2453 *
2454 * Description: Get the host UUID from IOKit and return it to user space.
2455 *
2456 * Parameters: uuid_buf Pointer to buffer to receive UUID
2457 * timeout Timespec for timout
2458 *
2459 * Returns: 0 Success
2460 * EWOULDBLOCK Timeout is too short
2461 * copyout:EFAULT Bad user buffer
2462 * mac_system_check_info:EPERM Client not allowed to perform this operation
2463 *
2464 * Notes: A timeout seems redundant, since if it's tolerable to not
2465 * have a system UUID in hand, then why ask for one?
2466 */
2467 int
gethostuuid(struct proc * p,struct gethostuuid_args * uap,__unused int32_t * retval)2468 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2469 {
2470 kern_return_t kret;
2471 int error;
2472 mach_timespec_t mach_ts; /* for IOKit call */
2473 __darwin_uuid_t uuid_kern = {}; /* for IOKit call */
2474
2475 /* Check entitlement */
2476 if (!IOCurrentTaskHasEntitlement("com.apple.private.getprivatesysid")) {
2477 #if !defined(XNU_TARGET_OS_OSX)
2478 #if CONFIG_MACF
2479 if ((error = mac_system_check_info(kauth_cred_get(), "hw.uuid")) != 0) {
2480 /* EPERM invokes userspace upcall if present */
2481 return error;
2482 }
2483 #endif
2484 #endif
2485 }
2486
2487 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2488 if (proc_is64bit(p)) {
2489 struct user64_timespec ts;
2490 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2491 if (error) {
2492 return error;
2493 }
2494 mach_ts.tv_sec = (unsigned int)ts.tv_sec;
2495 mach_ts.tv_nsec = (clock_res_t)ts.tv_nsec;
2496 } else {
2497 struct user32_timespec ts;
2498 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2499 if (error) {
2500 return error;
2501 }
2502 mach_ts.tv_sec = ts.tv_sec;
2503 mach_ts.tv_nsec = ts.tv_nsec;
2504 }
2505
2506 /* Call IOKit with the stack buffer to get the UUID */
2507 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2508
2509 /*
2510 * If we get it, copy out the data to the user buffer; note that a
2511 * uuid_t is an array of characters, so this is size invariant for
2512 * 32 vs. 64 bit.
2513 */
2514 if (kret == KERN_SUCCESS) {
2515 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2516 } else {
2517 error = EWOULDBLOCK;
2518 }
2519
2520 return error;
2521 }
2522
2523 /*
2524 * ledger
2525 *
2526 * Description: Omnibus system call for ledger operations
2527 */
2528 int
ledger(struct proc * p,struct ledger_args * args,__unused int32_t * retval)2529 ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
2530 {
2531 #if !CONFIG_MACF
2532 #pragma unused(p)
2533 #endif
2534 int rval, pid, len, error;
2535 #ifdef LEDGER_DEBUG
2536 struct ledger_limit_args lla;
2537 #endif
2538 task_t task;
2539 proc_t proc;
2540
2541 /* Finish copying in the necessary args before taking the proc lock */
2542 error = 0;
2543 len = 0;
2544 if (args->cmd == LEDGER_ENTRY_INFO) {
2545 error = copyin(args->arg3, (char *)&len, sizeof(len));
2546 } else if (args->cmd == LEDGER_TEMPLATE_INFO) {
2547 error = copyin(args->arg2, (char *)&len, sizeof(len));
2548 } else if (args->cmd == LEDGER_LIMIT)
2549 #ifdef LEDGER_DEBUG
2550 { error = copyin(args->arg2, (char *)&lla, sizeof(lla));}
2551 #else
2552 { return EINVAL; }
2553 #endif
2554 else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD)) {
2555 return EINVAL;
2556 }
2557
2558 if (error) {
2559 return error;
2560 }
2561 if (len < 0) {
2562 return EINVAL;
2563 }
2564
2565 rval = 0;
2566 if (args->cmd != LEDGER_TEMPLATE_INFO) {
2567 pid = (int)args->arg1;
2568 proc = proc_find(pid);
2569 if (proc == NULL) {
2570 return ESRCH;
2571 }
2572
2573 #if CONFIG_MACF
2574 error = mac_proc_check_ledger(p, proc, args->cmd);
2575 if (error) {
2576 proc_rele(proc);
2577 return error;
2578 }
2579 #endif
2580
2581 task = proc->task;
2582 }
2583
2584 switch (args->cmd) {
2585 #ifdef LEDGER_DEBUG
2586 case LEDGER_LIMIT: {
2587 if (!kauth_cred_issuser(kauth_cred_get())) {
2588 rval = EPERM;
2589 }
2590 rval = ledger_limit(task, &lla);
2591 proc_rele(proc);
2592 break;
2593 }
2594 #endif
2595 case LEDGER_INFO: {
2596 struct ledger_info info = {};
2597
2598 rval = ledger_info(task, &info);
2599 proc_rele(proc);
2600 if (rval == 0) {
2601 rval = copyout(&info, args->arg2,
2602 sizeof(info));
2603 }
2604 break;
2605 }
2606
2607 case LEDGER_ENTRY_INFO: {
2608 void *buf;
2609 int sz;
2610
2611 #if CONFIG_MEMORYSTATUS
2612 task_ledger_settle_dirty_time(task);
2613 #endif /* CONFIG_MEMORYSTATUS */
2614
2615 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
2616 proc_rele(proc);
2617 if ((rval == 0) && (len >= 0)) {
2618 sz = len * sizeof(struct ledger_entry_info);
2619 rval = copyout(buf, args->arg2, sz);
2620 kfree_data(buf, sz);
2621 }
2622 if (rval == 0) {
2623 rval = copyout(&len, args->arg3, sizeof(len));
2624 }
2625 break;
2626 }
2627
2628 case LEDGER_TEMPLATE_INFO: {
2629 void *buf;
2630 int sz;
2631
2632 rval = ledger_template_info(&buf, &len);
2633 if ((rval == 0) && (len >= 0)) {
2634 sz = len * sizeof(struct ledger_template_info);
2635 rval = copyout(buf, args->arg1, sz);
2636 kfree_data(buf, sz);
2637 }
2638 if (rval == 0) {
2639 rval = copyout(&len, args->arg2, sizeof(len));
2640 }
2641 break;
2642 }
2643
2644 default:
2645 panic("ledger syscall logic error -- command type %d", args->cmd);
2646 proc_rele(proc);
2647 rval = EINVAL;
2648 }
2649
2650 return rval;
2651 }
2652
2653 int
telemetry(__unused struct proc * p,struct telemetry_args * args,__unused int32_t * retval)2654 telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
2655 {
2656 int error = 0;
2657
2658 switch (args->cmd) {
2659 #if CONFIG_TELEMETRY
2660 case TELEMETRY_CMD_TIMER_EVENT:
2661 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
2662 break;
2663 case TELEMETRY_CMD_PMI_SETUP:
2664 error = telemetry_pmi_setup((enum telemetry_pmi)args->deadline, args->interval);
2665 break;
2666 #endif /* CONFIG_TELEMETRY */
2667 case TELEMETRY_CMD_VOUCHER_NAME:
2668 if (thread_set_voucher_name((mach_port_name_t)args->deadline)) {
2669 error = EINVAL;
2670 }
2671 break;
2672
2673 default:
2674 error = EINVAL;
2675 break;
2676 }
2677
2678 return error;
2679 }
2680
2681 /*
2682 * Logging
2683 *
2684 * Description: syscall to access kernel logging from userspace
2685 *
2686 * Args:
2687 * tag - used for syncing with userspace on the version.
2688 * flags - flags used by the syscall.
2689 * buffer - userspace address of string to copy.
2690 * size - size of buffer.
2691 */
2692 int
log_data(__unused struct proc * p,struct log_data_args * args,int * retval)2693 log_data(__unused struct proc *p, struct log_data_args *args, int *retval)
2694 {
2695 unsigned int tag = args->tag;
2696 unsigned int flags = args->flags;
2697 user_addr_t buffer = args->buffer;
2698 unsigned int size = args->size;
2699 int ret = 0;
2700 *retval = 0;
2701
2702 /* Only DEXTs are suppose to use this syscall. */
2703 if (!task_is_driver(current_task())) {
2704 return EPERM;
2705 }
2706
2707 /*
2708 * Tag synchronize the syscall version with userspace.
2709 * Tag == 0 => flags == OS_LOG_TYPE
2710 */
2711 if (tag != 0) {
2712 return EINVAL;
2713 }
2714
2715 /*
2716 * OS_LOG_TYPE are defined in libkern/os/log.h
2717 * In userspace they are defined in libtrace/os/log.h
2718 */
2719 if (flags != OS_LOG_TYPE_DEFAULT &&
2720 flags != OS_LOG_TYPE_INFO &&
2721 flags != OS_LOG_TYPE_DEBUG &&
2722 flags != OS_LOG_TYPE_ERROR &&
2723 flags != OS_LOG_TYPE_FAULT) {
2724 return EINVAL;
2725 }
2726
2727 if (size == 0) {
2728 return EINVAL;
2729 }
2730
2731 /* truncate to OS_LOG_DATA_MAX_SIZE */
2732 if (size > OS_LOG_DATA_MAX_SIZE) {
2733 printf("%s: WARNING msg is going to be truncated from %u to %u\n",
2734 __func__, size, OS_LOG_DATA_MAX_SIZE);
2735 size = OS_LOG_DATA_MAX_SIZE;
2736 }
2737
2738 char *log_msg = (char *)kalloc_data(size, Z_WAITOK);
2739 if (!log_msg) {
2740 return ENOMEM;
2741 }
2742
2743 if (copyin(buffer, log_msg, size) != 0) {
2744 ret = EFAULT;
2745 goto out;
2746 }
2747 log_msg[size - 1] = '\0';
2748
2749 /*
2750 * This will log to dmesg and logd.
2751 * The call will fail if the current
2752 * process is not a driverKit process.
2753 */
2754 os_log_driverKit(&ret, OS_LOG_DEFAULT, (os_log_type_t)flags, "%s", log_msg);
2755
2756 out:
2757 if (log_msg != NULL) {
2758 kfree_data(log_msg, size);
2759 }
2760
2761 return ret;
2762 }
2763
2764 #if DEVELOPMENT || DEBUG
2765 static int
2766 sysctl_waitq_set_nelem SYSCTL_HANDLER_ARGS
2767 {
2768 #pragma unused(oidp, arg1, arg2)
2769 int nelem;
2770
2771 /* Read only */
2772 if (req->newptr != USER_ADDR_NULL) {
2773 return EPERM;
2774 }
2775
2776 nelem = sysctl_helper_waitq_set_nelem();
2777
2778 return SYSCTL_OUT(req, &nelem, sizeof(nelem));
2779 }
2780
2781 SYSCTL_PROC(_kern, OID_AUTO, n_ltable_entries, CTLFLAG_RD | CTLFLAG_LOCKED,
2782 0, 0, sysctl_waitq_set_nelem, "I", "ltable elementis currently used");
2783
2784
2785 static int
2786 sysctl_mpsc_test_pingpong SYSCTL_HANDLER_ARGS
2787 {
2788 #pragma unused(oidp, arg1, arg2)
2789 uint64_t value = 0;
2790 int error;
2791
2792 error = SYSCTL_IN(req, &value, sizeof(value));
2793 if (error) {
2794 return error;
2795 }
2796
2797 if (error == 0 && req->newptr) {
2798 error = mpsc_test_pingpong(value, &value);
2799 if (error == 0) {
2800 error = SYSCTL_OUT(req, &value, sizeof(value));
2801 }
2802 }
2803
2804 return error;
2805 }
2806 SYSCTL_PROC(_kern, OID_AUTO, mpsc_test_pingpong, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2807 0, 0, sysctl_mpsc_test_pingpong, "Q", "MPSC tests: pingpong");
2808
2809 #endif /* DEVELOPMENT || DEBUG */
2810
2811 /*Remote Time api*/
2812 SYSCTL_NODE(_machdep, OID_AUTO, remotetime, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "Remote time api");
2813
2814 #if DEVELOPMENT || DEBUG
2815 #if CONFIG_MACH_BRIDGE_SEND_TIME
2816 extern _Atomic uint32_t bt_init_flag;
2817 extern uint32_t mach_bridge_timer_enable(uint32_t, int);
2818
2819 SYSCTL_INT(_machdep_remotetime, OID_AUTO, bridge_timer_init_flag,
2820 CTLFLAG_RD | CTLFLAG_LOCKED, &bt_init_flag, 0, "");
2821
2822 static int sysctl_mach_bridge_timer_enable SYSCTL_HANDLER_ARGS
2823 {
2824 #pragma unused(oidp, arg1, arg2)
2825 uint32_t value = 0;
2826 int error = 0;
2827 /* User is querying buffer size */
2828 if (req->oldptr == USER_ADDR_NULL && req->newptr == USER_ADDR_NULL) {
2829 req->oldidx = sizeof(value);
2830 return 0;
2831 }
2832 if (os_atomic_load(&bt_init_flag, acquire)) {
2833 if (req->newptr) {
2834 int new_value = 0;
2835 error = SYSCTL_IN(req, &new_value, sizeof(new_value));
2836 if (error) {
2837 return error;
2838 }
2839 if (new_value == 0 || new_value == 1) {
2840 value = mach_bridge_timer_enable(new_value, 1);
2841 } else {
2842 return EPERM;
2843 }
2844 } else {
2845 value = mach_bridge_timer_enable(0, 0);
2846 }
2847 }
2848 error = SYSCTL_OUT(req, &value, sizeof(value));
2849 return error;
2850 }
2851
2852 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, bridge_timer_enable,
2853 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2854 0, 0, sysctl_mach_bridge_timer_enable, "I", "");
2855
2856 #endif /* CONFIG_MACH_BRIDGE_SEND_TIME */
2857
2858 static int sysctl_mach_bridge_remote_time SYSCTL_HANDLER_ARGS
2859 {
2860 #pragma unused(oidp, arg1, arg2)
2861 uint64_t ltime = 0, rtime = 0;
2862 if (req->oldptr == USER_ADDR_NULL) {
2863 req->oldidx = sizeof(rtime);
2864 return 0;
2865 }
2866 if (req->newptr) {
2867 int error = SYSCTL_IN(req, <ime, sizeof(ltime));
2868 if (error) {
2869 return error;
2870 }
2871 }
2872 rtime = mach_bridge_remote_time(ltime);
2873 return SYSCTL_OUT(req, &rtime, sizeof(rtime));
2874 }
2875 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, mach_bridge_remote_time,
2876 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2877 0, 0, sysctl_mach_bridge_remote_time, "Q", "");
2878
2879 #endif /* DEVELOPMENT || DEBUG */
2880
2881 #if CONFIG_MACH_BRIDGE_RECV_TIME
2882 extern struct bt_params bt_params_get_latest(void);
2883
2884 static int sysctl_mach_bridge_conversion_params SYSCTL_HANDLER_ARGS
2885 {
2886 #pragma unused(oidp, arg1, arg2)
2887 struct bt_params params = {};
2888 if (req->oldptr == USER_ADDR_NULL) {
2889 req->oldidx = sizeof(struct bt_params);
2890 return 0;
2891 }
2892 if (req->newptr) {
2893 return EPERM;
2894 }
2895 params = bt_params_get_latest();
2896 return SYSCTL_OUT(req, ¶ms, MIN(sizeof(params), req->oldlen));
2897 }
2898
2899 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, conversion_params,
2900 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0,
2901 0, sysctl_mach_bridge_conversion_params, "S,bt_params", "");
2902
2903 #endif /* CONFIG_MACH_BRIDGE_RECV_TIME */
2904
2905 #if DEVELOPMENT || DEBUG
2906
2907 #include <pexpert/pexpert.h>
2908 extern int32_t sysctl_get_bound_cpuid(void);
2909 extern kern_return_t sysctl_thread_bind_cpuid(int32_t cpuid);
2910 static int
2911 sysctl_kern_sched_thread_bind_cpu SYSCTL_HANDLER_ARGS
2912 {
2913 #pragma unused(oidp, arg1, arg2)
2914
2915 /*
2916 * DO NOT remove this bootarg guard or make this non-development.
2917 * This kind of binding should only be used for tests and
2918 * experiments in a custom configuration, never shipping code.
2919 */
2920
2921 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
2922 return ENOENT;
2923 }
2924
2925 int32_t cpuid = sysctl_get_bound_cpuid();
2926
2927 int32_t new_value;
2928 int changed;
2929 int error = sysctl_io_number(req, cpuid, sizeof cpuid, &new_value, &changed);
2930 if (error) {
2931 return error;
2932 }
2933
2934 if (changed) {
2935 kern_return_t kr = sysctl_thread_bind_cpuid(new_value);
2936
2937 if (kr == KERN_NOT_SUPPORTED) {
2938 return ENOTSUP;
2939 }
2940
2941 if (kr == KERN_INVALID_VALUE) {
2942 return ERANGE;
2943 }
2944 }
2945
2946 return error;
2947 }
2948
2949 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cpu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2950 0, 0, sysctl_kern_sched_thread_bind_cpu, "I", "");
2951
2952 #if __AMP__
2953 extern char sysctl_get_bound_cluster_type(void);
2954 extern void sysctl_thread_bind_cluster_type(char cluster_type);
2955 static int
2956 sysctl_kern_sched_thread_bind_cluster_type SYSCTL_HANDLER_ARGS
2957 {
2958 #pragma unused(oidp, arg1, arg2)
2959 char buff[4];
2960
2961 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
2962 return ENOENT;
2963 }
2964
2965 int error = SYSCTL_IN(req, buff, 1);
2966 if (error) {
2967 return error;
2968 }
2969 char cluster_type = buff[0];
2970
2971 if (!req->newptr) {
2972 goto out;
2973 }
2974
2975 sysctl_thread_bind_cluster_type(cluster_type);
2976 out:
2977 cluster_type = sysctl_get_bound_cluster_type();
2978 buff[0] = cluster_type;
2979
2980 return SYSCTL_OUT(req, buff, 1);
2981 }
2982
2983 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
2984 0, 0, sysctl_kern_sched_thread_bind_cluster_type, "A", "");
2985
2986 extern char sysctl_get_task_cluster_type(void);
2987 extern void sysctl_task_set_cluster_type(char cluster_type);
2988 static int
2989 sysctl_kern_sched_task_set_cluster_type SYSCTL_HANDLER_ARGS
2990 {
2991 #pragma unused(oidp, arg1, arg2)
2992 char buff[4];
2993
2994 if (!PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
2995 return ENOENT;
2996 }
2997
2998 int error = SYSCTL_IN(req, buff, 1);
2999 if (error) {
3000 return error;
3001 }
3002 char cluster_type = buff[0];
3003
3004 if (!req->newptr) {
3005 goto out;
3006 }
3007
3008 sysctl_task_set_cluster_type(cluster_type);
3009 out:
3010 cluster_type = sysctl_get_task_cluster_type();
3011 buff[0] = cluster_type;
3012
3013 return SYSCTL_OUT(req, buff, 1);
3014 }
3015
3016 SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
3017 0, 0, sysctl_kern_sched_task_set_cluster_type, "A", "");
3018
3019 extern kern_return_t thread_bind_cluster_id(thread_t thread, uint32_t cluster_id, thread_bind_option_t options);
3020 extern uint32_t thread_bound_cluster_id(thread_t);
3021 static int
3022 sysctl_kern_sched_thread_bind_cluster_id SYSCTL_HANDLER_ARGS
3023 {
3024 #pragma unused(oidp, arg1, arg2)
3025 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
3026 return ENOENT;
3027 }
3028
3029 thread_t self = current_thread();
3030 uint32_t old_value = thread_bound_cluster_id(self);
3031 uint32_t new_value;
3032
3033 int error = SYSCTL_IN(req, &new_value, sizeof(new_value));
3034 if (error) {
3035 return error;
3036 }
3037 if (new_value != old_value) {
3038 /*
3039 * This sysctl binds the thread to the cluster without any flags,
3040 * which means it will be hard bound and not check eligibility.
3041 */
3042 thread_bind_cluster_id(self, new_value, 0);
3043 }
3044 return SYSCTL_OUT(req, &old_value, sizeof(old_value));
3045 }
3046
3047 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cluster_id, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3048 0, 0, sysctl_kern_sched_thread_bind_cluster_id, "I", "");
3049
3050 #if CONFIG_SCHED_EDGE
3051
3052 extern int sched_edge_restrict_ut;
3053 SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_ut, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict UT Threads");
3054 extern int sched_edge_restrict_bg;
3055 SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_bg, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict BG Threads");
3056 extern int sched_edge_migrate_ipi_immediate;
3057 SYSCTL_INT(_kern, OID_AUTO, sched_edge_migrate_ipi_immediate, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_migrate_ipi_immediate, 0, "Edge Scheduler uses immediate IPIs for migration event based on execution latency");
3058
3059 #endif /* CONFIG_SCHED_EDGE */
3060
3061 #endif /* __AMP__ */
3062
3063 #if INTERRUPT_MASKED_DEBUG
3064
3065 SYSCTL_INT(_kern, OID_AUTO, interrupt_masked_threshold_mt, CTLFLAG_RW | CTLFLAG_LOCKED,
3066 &interrupt_masked_timeout, 0,
3067 "Interrupt masked duration after which a tracepoint is emitted or the device panics (in mach timebase units)");
3068
3069 SYSCTL_INT(_kern, OID_AUTO, interrupt_masked_debug_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
3070 &interrupt_masked_debug_mode, 0,
3071 "Enable interrupt masked tracing or panic (0: off, 1: trace, 2: panic)");
3072
3073 #endif /* INTERRUPT_MASKED_DEBUG */
3074
3075 #if SCHED_PREEMPTION_DISABLE_DEBUG
3076
3077 SYSCTL_QUAD(_kern, OID_AUTO, sched_preemption_disable_threshold_mt, CTLFLAG_RW | CTLFLAG_LOCKED,
3078 &sched_preemption_disable_threshold_mt,
3079 "Preemption disablement duration after which a tracepoint is emitted or the device panics (in mach timebase units)");
3080
3081 SYSCTL_INT(_kern, OID_AUTO, sched_preemption_disable_debug_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
3082 &sched_preemption_disable_debug_mode, 0,
3083 "Enable preemption disablement tracing or panic (0: off, 1: trace, 2: panic)");
3084
3085 PERCPU_DECL(uint64_t, preemption_disable_max_mt);
3086
3087 static int
sysctl_sched_preemption_disable_stats(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3088 sysctl_sched_preemption_disable_stats(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3089 {
3090 uint64_t stats[MAX_CPUS]; // maximum per CPU
3091
3092 /*
3093 * No synchronization here. The individual values are pretty much
3094 * independent, and reading/writing them is atomic.
3095 */
3096
3097 static_assert(__LP64__); /* below is racy on armv7k, reminder to change if needed there. */
3098
3099 int cpu = 0;
3100 percpu_foreach(max_stat, preemption_disable_max_mt) {
3101 stats[cpu++] = *max_stat;
3102 }
3103
3104 if (req->newlen > 0) {
3105 // writing just resets all stats.
3106 percpu_foreach(max_stat, preemption_disable_max_mt) {
3107 *max_stat = 0;
3108 }
3109 }
3110
3111 return sysctl_io_opaque(req, stats, cpu * sizeof(uint64_t), NULL);
3112 }
3113
3114 SYSCTL_PROC(_kern, OID_AUTO, sched_preemption_disable_stats,
3115 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_LOCKED,
3116 0, 0, sysctl_sched_preemption_disable_stats, "I", "Preemption disablement statistics");
3117
3118 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
3119
3120
3121 /* used for testing by exception_tests */
3122 extern uint32_t ipc_control_port_options;
3123 SYSCTL_INT(_kern, OID_AUTO, ipc_control_port_options,
3124 CTLFLAG_RD | CTLFLAG_LOCKED, &ipc_control_port_options, 0, "");
3125
3126 #endif /* DEVELOPMENT || DEBUG */
3127
3128 extern uint32_t task_exc_guard_default;
3129
3130 SYSCTL_INT(_kern, OID_AUTO, task_exc_guard_default,
3131 CTLFLAG_RD | CTLFLAG_LOCKED, &task_exc_guard_default, 0, "");
3132
3133
3134 static int
3135 sysctl_kern_tcsm_available SYSCTL_HANDLER_ARGS
3136 {
3137 #pragma unused(oidp, arg1, arg2)
3138 uint32_t value = machine_csv(CPUVN_CI) ? 1 : 0;
3139
3140 if (req->newptr) {
3141 return EINVAL;
3142 }
3143
3144 return SYSCTL_OUT(req, &value, sizeof(value));
3145 }
3146 SYSCTL_PROC(_kern, OID_AUTO, tcsm_available,
3147 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3148 0, 0, sysctl_kern_tcsm_available, "I", "");
3149
3150
3151 static int
3152 sysctl_kern_tcsm_enable SYSCTL_HANDLER_ARGS
3153 {
3154 #pragma unused(oidp, arg1, arg2)
3155 uint32_t soflags = 0;
3156 uint32_t old_value = thread_get_no_smt() ? 1 : 0;
3157
3158 int error = SYSCTL_IN(req, &soflags, sizeof(soflags));
3159 if (error) {
3160 return error;
3161 }
3162
3163 if (soflags && machine_csv(CPUVN_CI)) {
3164 thread_set_no_smt(true);
3165 machine_tecs(current_thread());
3166 }
3167
3168 return SYSCTL_OUT(req, &old_value, sizeof(old_value));
3169 }
3170 SYSCTL_PROC(_kern, OID_AUTO, tcsm_enable,
3171 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3172 0, 0, sysctl_kern_tcsm_enable, "I", "");
3173
3174
3175 #if DEVELOPMENT || DEBUG
3176 extern void sysctl_task_set_no_smt(char no_smt);
3177 extern char sysctl_task_get_no_smt(void);
3178
3179 static int
3180 sysctl_kern_sched_task_set_no_smt SYSCTL_HANDLER_ARGS
3181 {
3182 #pragma unused(oidp, arg1, arg2)
3183 char buff[4];
3184
3185 int error = SYSCTL_IN(req, buff, 1);
3186 if (error) {
3187 return error;
3188 }
3189 char no_smt = buff[0];
3190
3191 if (!req->newptr) {
3192 goto out;
3193 }
3194
3195 sysctl_task_set_no_smt(no_smt);
3196 out:
3197 no_smt = sysctl_task_get_no_smt();
3198 buff[0] = no_smt;
3199
3200 return SYSCTL_OUT(req, buff, 1);
3201 }
3202
3203 SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_no_smt, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3204 0, 0, sysctl_kern_sched_task_set_no_smt, "A", "");
3205
3206 static int
sysctl_kern_sched_thread_set_no_smt(__unused struct sysctl_oid * oidp,__unused void * arg1,__unused int arg2,struct sysctl_req * req)3207 sysctl_kern_sched_thread_set_no_smt(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3208 {
3209 int new_value, changed;
3210 int old_value = thread_get_no_smt() ? 1 : 0;
3211 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3212
3213 if (changed) {
3214 thread_set_no_smt(!!new_value);
3215 }
3216
3217 return error;
3218 }
3219
3220 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_set_no_smt,
3221 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3222 0, 0, sysctl_kern_sched_thread_set_no_smt, "I", "");
3223
3224
3225 static int
3226 sysctl_kern_debug_get_preoslog SYSCTL_HANDLER_ARGS
3227 {
3228 #pragma unused(oidp, arg1, arg2)
3229 static bool oneshot_executed = false;
3230 size_t preoslog_size = 0;
3231 const char *preoslog = NULL;
3232
3233 // DumpPanic pases a non-zero write value when it needs oneshot behaviour
3234 if (req->newptr) {
3235 uint8_t oneshot = 0;
3236 int error = SYSCTL_IN(req, &oneshot, sizeof(oneshot));
3237 if (error) {
3238 return error;
3239 }
3240
3241 if (oneshot) {
3242 if (!OSCompareAndSwap8(false, true, &oneshot_executed)) {
3243 return EPERM;
3244 }
3245 }
3246 }
3247
3248 preoslog = sysctl_debug_get_preoslog(&preoslog_size);
3249 if (preoslog == NULL || preoslog_size == 0) {
3250 return 0;
3251 }
3252
3253 if (req->oldptr == USER_ADDR_NULL) {
3254 req->oldidx = preoslog_size;
3255 return 0;
3256 }
3257
3258 return SYSCTL_OUT(req, preoslog, preoslog_size);
3259 }
3260
3261 SYSCTL_PROC(_kern, OID_AUTO, preoslog, CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_LOCKED,
3262 0, 0, sysctl_kern_debug_get_preoslog, "-", "");
3263
3264 static int
3265 sysctl_kern_task_set_filter_msg_flag SYSCTL_HANDLER_ARGS
3266 {
3267 #pragma unused(oidp, arg1, arg2)
3268 int new_value, changed;
3269 int old_value = task_get_filter_msg_flag(current_task()) ? 1 : 0;
3270 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3271
3272 if (changed) {
3273 task_set_filter_msg_flag(current_task(), !!new_value);
3274 }
3275
3276 return error;
3277 }
3278
3279 SYSCTL_PROC(_kern, OID_AUTO, task_set_filter_msg_flag, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3280 0, 0, sysctl_kern_task_set_filter_msg_flag, "I", "");
3281
3282 #if CONFIG_PROC_RESOURCE_LIMITS
3283
3284 extern mach_port_name_t current_task_get_fatal_port_name(void);
3285
3286 static int
3287 sysctl_kern_task_get_fatal_port SYSCTL_HANDLER_ARGS
3288 {
3289 #pragma unused(oidp, arg1, arg2)
3290 int port = 0;
3291 int flag = 0;
3292
3293 if (req->oldptr == USER_ADDR_NULL) {
3294 req->oldidx = sizeof(mach_port_t);
3295 return 0;
3296 }
3297
3298 int error = SYSCTL_IN(req, &flag, sizeof(flag));
3299 if (error) {
3300 return error;
3301 }
3302
3303 if (flag == 1) {
3304 port = (int)current_task_get_fatal_port_name();
3305 }
3306 return SYSCTL_OUT(req, &port, sizeof(port));
3307 }
3308
3309 SYSCTL_PROC(_machdep, OID_AUTO, task_get_fatal_port, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3310 0, 0, sysctl_kern_task_get_fatal_port, "I", "");
3311
3312 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
3313
3314 extern unsigned int ipc_table_max_entries(void);
3315
3316 static int
3317 sysctl_mach_max_port_table_size SYSCTL_HANDLER_ARGS
3318 {
3319 #pragma unused(oidp, arg1, arg2)
3320 int old_value = ipc_table_max_entries();
3321 int error = sysctl_io_number(req, old_value, sizeof(int), NULL, NULL);
3322
3323 return error;
3324 }
3325
3326 SYSCTL_PROC(_machdep, OID_AUTO, max_port_table_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3327 0, 0, sysctl_mach_max_port_table_size, "I", "");
3328
3329 #endif /* DEVELOPMENT || DEBUG */
3330
3331 #if defined(CONFIG_KDP_INTERACTIVE_DEBUGGING) && defined(CONFIG_KDP_COREDUMP_ENCRYPTION)
3332
3333 #define COREDUMP_ENCRYPTION_KEY_ENTITLEMENT "com.apple.private.coredump-encryption-key"
3334
3335 static int
3336 sysctl_coredump_encryption_key_update SYSCTL_HANDLER_ARGS
3337 {
3338 kern_return_t ret = KERN_SUCCESS;
3339 int error = 0;
3340 struct kdp_core_encryption_key_descriptor key_descriptor = { MACH_CORE_FILEHEADER_V2_FLAG_NEXT_COREFILE_KEY_FORMAT_NIST_P256, 0, NULL };
3341
3342 /* Need to be root and have entitlement */
3343 if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement(COREDUMP_ENCRYPTION_KEY_ENTITLEMENT)) {
3344 return EPERM;
3345 }
3346
3347 // Sanity-check the given key length
3348 if (req->newlen > UINT16_MAX) {
3349 return EINVAL;
3350 }
3351
3352 // It is allowed for the caller to pass in a NULL buffer. This indicates that they want us to forget about any public key
3353 // we might have.
3354 if (req->newptr) {
3355 key_descriptor.kcekd_size = (uint16_t) req->newlen;
3356
3357 ret = kmem_alloc(kernel_map, (vm_offset_t*) &(key_descriptor.kcekd_key), key_descriptor.kcekd_size, VM_KERN_MEMORY_DIAG);
3358 if (KERN_SUCCESS != ret) {
3359 return ENOMEM;
3360 }
3361
3362 error = SYSCTL_IN(req, key_descriptor.kcekd_key, key_descriptor.kcekd_size);
3363 if (error) {
3364 return error;
3365 }
3366 }
3367
3368 // If successful, kdp_core will take ownership of the 'kcekd_key' pointer
3369 ret = IOProvideCoreFileAccess(kdp_core_handle_new_encryption_key, (void *)&key_descriptor);
3370 if (KERN_SUCCESS != ret) {
3371 printf("Failed to handle the new encryption key. Error 0x%x", ret);
3372 if (key_descriptor.kcekd_key) {
3373 kmem_free(kernel_map, (vm_offset_t) key_descriptor.kcekd_key, key_descriptor.kcekd_size);
3374 }
3375 return EFAULT;
3376 }
3377
3378 return 0;
3379 }
3380
3381 SYSCTL_PROC(_kern, OID_AUTO, coredump_encryption_key, CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3382 0, 0, &sysctl_coredump_encryption_key_update, "-", "Set a new encryption key for coredumps");
3383
3384 #endif /* CONFIG_KDP_INTERACTIVE_DEBUGGING && CONFIG_KDP_COREDUMP_ENCRYPTION*/
3385