1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.4 (Berkeley) 1/4/94
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69 /* HISTORY
70 * 04-Aug-97 Umesh Vaishampayan ([email protected])
71 * Added current_proc_EXTERNAL() function for the use of kernel
72 * lodable modules.
73 *
74 * 05-Jun-95 Mac Gillon (mgillon) at NeXT
75 * New version based on 3.3NS and 4.4
76 */
77
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/kernel.h>
82 #include <sys/proc_internal.h>
83 #include <sys/acct.h>
84 #include <sys/wait.h>
85 #include <sys/file_internal.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/lock.h>
89 #include <sys/mbuf.h>
90 #include <sys/ioctl.h>
91 #include <sys/tty.h>
92 #include <sys/signalvar.h>
93 #include <sys/syslog.h>
94 #include <sys/sysctl.h>
95 #include <sys/sysproto.h>
96 #include <sys/kauth.h>
97 #include <sys/codesign.h>
98 #include <sys/kernel_types.h>
99 #include <sys/ubc.h>
100 #include <kern/kalloc.h>
101 #include <kern/task.h>
102 #include <kern/coalition.h>
103 #include <sys/coalition.h>
104 #include <kern/assert.h>
105 #include <kern/sched_prim.h>
106 #include <vm/vm_protos.h>
107 #include <vm/vm_map.h> /* vm_map_switch_protect() */
108 #include <vm/vm_pageout.h>
109 #include <mach/task.h>
110 #include <mach/message.h>
111 #include <sys/priv.h>
112 #include <sys/proc_info.h>
113 #include <sys/bsdtask_info.h>
114 #include <sys/persona.h>
115 #include <sys/sysent.h>
116 #include <sys/reason.h>
117 #include <sys/proc_require.h>
118 #include <IOKit/IOBSD.h> /* IOTaskHasEntitlement() */
119 #include <kern/ipc_kobject.h> /* ipc_kobject_set_kobjidx() */
120 #include <kern/ast.h> /* proc_filedesc_ast */
121 #include <libkern/amfi/amfi.h>
122 #include <mach-o/loader.h>
123
124 #ifdef CONFIG_32BIT_TELEMETRY
125 #include <sys/kasl.h>
126 #endif /* CONFIG_32BIT_TELEMETRY */
127
128 #if CONFIG_CSR
129 #include <sys/csr.h>
130 #endif
131
132 #include <sys/kern_memorystatus.h>
133
134 #if CONFIG_MACF
135 #include <security/mac_framework.h>
136 #include <security/mac_mach_internal.h>
137 #endif
138
139 #include <libkern/crypto/sha1.h>
140
141 #ifdef CONFIG_32BIT_TELEMETRY
142 #define MAX_32BIT_EXEC_SIG_SIZE 160
143 #endif /* CONFIG_32BIT_TELEMETRY */
144
145 /*
146 * Structure associated with user cacheing.
147 */
148 struct uidinfo {
149 LIST_ENTRY(uidinfo) ui_hash;
150 uid_t ui_uid;
151 size_t ui_proccnt;
152 };
153 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
154 static LIST_HEAD(uihashhead, uidinfo) * uihashtbl;
155 static u_long uihash; /* size of hash table - 1 */
156
157 /*
158 * Other process lists
159 */
160 #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
161 static SECURITY_READ_ONLY_LATE(struct proc_hp *) pidhashtbl;
162 static SECURITY_READ_ONLY_LATE(u_long) pidhash;
163 #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash])
164 static SECURITY_READ_ONLY_LATE(struct pgrp_hp *) pgrphashtbl;
165 static SECURITY_READ_ONLY_LATE(u_long) pgrphash;
166 SECURITY_READ_ONLY_LATE(struct sesshashhead *) sesshashtbl;
167 SECURITY_READ_ONLY_LATE(u_long) sesshash;
168
169 #if PROC_REF_DEBUG
170 /* disable panics on leaked proc refs across syscall boundary */
171 static TUNABLE(bool, proc_ref_tracking_disabled, "-disable_procref_tracking", false);
172 #endif
173
174 struct proclist allproc = LIST_HEAD_INITIALIZER(allproc);
175 struct proclist zombproc = LIST_HEAD_INITIALIZER(zombproc);
176 extern struct tty cons;
177
178 extern int cs_debug;
179
180 #if DEVELOPMENT || DEBUG
181 static TUNABLE(bool, syscallfilter_disable, "-disable_syscallfilter", false);
182 #endif // DEVELOPMENT || DEBUG
183
184 #if DEBUG
185 #define __PROC_INTERNAL_DEBUG 1
186 #endif
187 #if CONFIG_COREDUMP
188 /* Name to give to core files */
189 #if defined(XNU_TARGET_OS_BRIDGE)
190 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/internal/%N.core"};
191 #elif defined(XNU_TARGET_OS_OSX)
192 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/cores/core.%P"};
193 #else
194 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/cores/%N.core"};
195 #endif
196 #endif
197
198 #if PROC_REF_DEBUG
199 #include <kern/backtrace.h>
200 #endif
201
202 static LCK_MTX_DECLARE_ATTR(proc_klist_mlock, &proc_mlock_grp, &proc_lck_attr);
203
204 ZONE_DEFINE(pgrp_zone, "pgrp",
205 sizeof(struct pgrp), ZC_ZFREE_CLEARMEM);
206 ZONE_DEFINE(session_zone, "session",
207 sizeof(struct session), ZC_ZFREE_CLEARMEM);
208 ZONE_DEFINE_ID(ZONE_ID_PROC_RO, "proc_ro", struct proc_ro,
209 ZC_READONLY | ZC_ZFREE_CLEARMEM);
210
211 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
212
213 static void orphanpg(struct pgrp * pg);
214 void proc_name_kdp(proc_t t, char * buf, int size);
215 boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
216 boolean_t current_thread_aborted(void);
217 int proc_threadname_kdp(void * uth, char * buf, size_t size);
218 void proc_starttime_kdp(void * p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime);
219 void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
220 char * proc_name_address(void * p);
221 char * proc_longname_address(void *);
222
223 static void pgrp_destroy(struct pgrp *pgrp);
224 static void pgrp_replace(proc_t p, struct pgrp *pgrp);
225 static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken);
226 static boolean_t proc_parent_is_currentproc(proc_t p);
227
228 extern void task_filedesc_ast(task_t task, int current_size, int soft_limit, int hard_limit);
229
230 struct fixjob_iterargs {
231 struct pgrp * pg;
232 struct session * mysession;
233 int entering;
234 };
235
236 int fixjob_callback(proc_t, void *);
237
238 uint64_t
get_current_unique_pid(void)239 get_current_unique_pid(void)
240 {
241 proc_t p = current_proc();
242
243 if (p) {
244 return proc_uniqueid(p);
245 } else {
246 return 0;
247 }
248 }
249
250 /*
251 * Initialize global process hashing structures.
252 */
253 static void
procinit(void)254 procinit(void)
255 {
256 pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
257 pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
258 sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash);
259 uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
260 }
261 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, procinit);
262
263 /*
264 * Change the count associated with number of processes
265 * a given user is using. This routine protects the uihash
266 * with the list lock
267 */
268 size_t
chgproccnt(uid_t uid,int diff)269 chgproccnt(uid_t uid, int diff)
270 {
271 struct uidinfo *uip;
272 struct uidinfo *newuip = NULL;
273 struct uihashhead *uipp;
274 size_t retval;
275
276 again:
277 proc_list_lock();
278 uipp = UIHASH(uid);
279 for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next) {
280 if (uip->ui_uid == uid) {
281 break;
282 }
283 }
284 if (uip) {
285 uip->ui_proccnt += diff;
286 if (uip->ui_proccnt > 0) {
287 retval = uip->ui_proccnt;
288 proc_list_unlock();
289 goto out;
290 }
291 LIST_REMOVE(uip, ui_hash);
292 retval = 0;
293 proc_list_unlock();
294 kfree_type(struct uidinfo, uip);
295 goto out;
296 }
297 if (diff <= 0) {
298 if (diff == 0) {
299 retval = 0;
300 proc_list_unlock();
301 goto out;
302 }
303 panic("chgproccnt: lost user");
304 }
305 if (newuip != NULL) {
306 uip = newuip;
307 newuip = NULL;
308 LIST_INSERT_HEAD(uipp, uip, ui_hash);
309 uip->ui_uid = uid;
310 uip->ui_proccnt = diff;
311 retval = diff;
312 proc_list_unlock();
313 goto out;
314 }
315 proc_list_unlock();
316 newuip = kalloc_type(struct uidinfo, Z_WAITOK | Z_NOFAIL);
317 goto again;
318 out:
319 kfree_type(struct uidinfo, newuip);
320 return retval;
321 }
322
323 /*
324 * Is p an inferior of the current process?
325 */
326 int
inferior(proc_t p)327 inferior(proc_t p)
328 {
329 int retval = 0;
330
331 proc_list_lock();
332 for (; p != current_proc(); p = p->p_pptr) {
333 if (proc_getpid(p) == 0) {
334 goto out;
335 }
336 }
337 retval = 1;
338 out:
339 proc_list_unlock();
340 return retval;
341 }
342
343 /*
344 * Is p an inferior of t ?
345 */
346 int
isinferior(proc_t p,proc_t t)347 isinferior(proc_t p, proc_t t)
348 {
349 int retval = 0;
350 int nchecked = 0;
351 proc_t start = p;
352
353 /* if p==t they are not inferior */
354 if (p == t) {
355 return 0;
356 }
357
358 proc_list_lock();
359 for (; p != t; p = p->p_pptr) {
360 nchecked++;
361
362 /* Detect here if we're in a cycle */
363 if ((proc_getpid(p) == 0) || (p->p_pptr == start) || (nchecked >= nprocs)) {
364 goto out;
365 }
366 }
367 retval = 1;
368 out:
369 proc_list_unlock();
370 return retval;
371 }
372
373 int
proc_isinferior(int pid1,int pid2)374 proc_isinferior(int pid1, int pid2)
375 {
376 proc_t p = PROC_NULL;
377 proc_t t = PROC_NULL;
378 int retval = 0;
379
380 if (((p = proc_find(pid1)) != (proc_t)0) && ((t = proc_find(pid2)) != (proc_t)0)) {
381 retval = isinferior(p, t);
382 }
383
384 if (p != PROC_NULL) {
385 proc_rele(p);
386 }
387 if (t != PROC_NULL) {
388 proc_rele(t);
389 }
390
391 return retval;
392 }
393
394 /*
395 * Returns process identity of a given process. Calling this function is not
396 * racy for a current process or if a reference to the process is held.
397 */
398 struct proc_ident
proc_ident(proc_t p)399 proc_ident(proc_t p)
400 {
401 struct proc_ident ident = {
402 .p_pid = proc_pid(p),
403 .p_uniqueid = proc_uniqueid(p),
404 .p_idversion = proc_pidversion(p),
405 };
406
407 return ident;
408 }
409
410 proc_t
proc_find_ident(struct proc_ident const * ident)411 proc_find_ident(struct proc_ident const *ident)
412 {
413 proc_t proc = PROC_NULL;
414
415 proc = proc_find(ident->p_pid);
416 if (proc == PROC_NULL) {
417 return PROC_NULL;
418 }
419
420 if (proc_uniqueid(proc) != ident->p_uniqueid ||
421 proc_pidversion(proc) != ident->p_idversion) {
422 proc_rele(proc);
423 return PROC_NULL;
424 }
425
426 return proc;
427 }
428
429 void
uthread_reset_proc_refcount(uthread_t uth)430 uthread_reset_proc_refcount(uthread_t uth)
431 {
432 uth->uu_proc_refcount = 0;
433
434 #if PROC_REF_DEBUG
435 if (proc_ref_tracking_disabled) {
436 return;
437 }
438
439 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
440 uint32_t n = uth->uu_proc_ref_info->upri_pindex;
441
442 uth->uu_proc_ref_info->upri_pindex = 0;
443
444 if (n) {
445 for (unsigned i = 0; i < n; i++) {
446 btref_put(upri->upri_proc_stacks[i]);
447 }
448 bzero(upri->upri_proc_stacks, sizeof(btref_t) * n);
449 bzero(upri->upri_proc_ps, sizeof(proc_t) * n);
450 }
451 #endif
452 }
453
454 #if PROC_REF_DEBUG
455 void
uthread_init_proc_refcount(uthread_t uth)456 uthread_init_proc_refcount(uthread_t uth)
457 {
458 if (proc_ref_tracking_disabled) {
459 return;
460 }
461
462 uth->uu_proc_ref_info = kalloc_type(struct uthread_proc_ref_info,
463 Z_ZERO | Z_WAITOK | Z_NOFAIL);
464 }
465
466 void
uthread_destroy_proc_refcount(uthread_t uth)467 uthread_destroy_proc_refcount(uthread_t uth)
468 {
469 if (proc_ref_tracking_disabled) {
470 return;
471 }
472
473 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
474 uint32_t n = uth->uu_proc_ref_info->upri_pindex;
475
476 for (unsigned i = 0; i < n; i++) {
477 btref_put(upri->upri_proc_stacks[i]);
478 }
479
480 kfree_type(struct uthread_proc_ref_info, uth->uu_proc_ref_info);
481 }
482
483 void
uthread_assert_zero_proc_refcount(uthread_t uth)484 uthread_assert_zero_proc_refcount(uthread_t uth)
485 {
486 if (proc_ref_tracking_disabled) {
487 return;
488 }
489
490 if (__improbable(uth->uu_proc_refcount != 0)) {
491 panic("Unexpected non zero uu_proc_refcount = %d (%p)",
492 uth->uu_proc_refcount, uth);
493 }
494 }
495 #endif
496
497 bool
proc_list_exited(proc_t p)498 proc_list_exited(proc_t p)
499 {
500 return os_ref_get_raw_mask(&p->p_refcount) & P_REF_DEAD;
501 }
502
503 #if CONFIG_DEBUG_SYSCALL_REJECTION
504 uint64_t*
uthread_get_syscall_rejection_mask(void * uthread)505 uthread_get_syscall_rejection_mask(void *uthread)
506 {
507 uthread_t uth = (uthread_t) uthread;
508 return uth->syscall_rejection_mask;
509 }
510 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
511
512 #if PROC_REF_DEBUG
513 __attribute__((noinline))
514 #endif /* PROC_REF_DEBUG */
515 static void
record_procref(proc_t p __unused,int count)516 record_procref(proc_t p __unused, int count)
517 {
518 uthread_t uth;
519
520 uth = current_uthread();
521 uth->uu_proc_refcount += count;
522
523 #if PROC_REF_DEBUG
524 if (proc_ref_tracking_disabled) {
525 return;
526 }
527 struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
528
529 if (upri->upri_pindex < NUM_PROC_REFS_TO_TRACK) {
530 upri->upri_proc_stacks[upri->upri_pindex] =
531 btref_get(__builtin_frame_address(0), BTREF_GET_NOWAIT);
532 upri->upri_proc_ps[upri->upri_pindex] = p;
533 upri->upri_pindex++;
534 }
535 #endif /* PROC_REF_DEBUG */
536 }
537
538 /*!
539 * @function proc_ref_try_fast()
540 *
541 * @brief
542 * Tries to take a proc ref, unless it is in flux (being made, or dead).
543 *
544 * @returns
545 * - the new refcount value (including bits) on success,
546 * - 0 on failure.
547 */
548 static inline uint32_t
proc_ref_try_fast(proc_t p)549 proc_ref_try_fast(proc_t p)
550 {
551 uint32_t bits;
552
553 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
554
555 bits = os_ref_retain_try_mask(&p->p_refcount, P_REF_BITS,
556 P_REF_NEW | P_REF_DEAD, NULL);
557 if (bits) {
558 record_procref(p, 1);
559 }
560 return bits;
561 }
562
563 /*!
564 * @function proc_ref_wait()
565 *
566 * @brief
567 * Waits for the specified bits to clear, on the specified event.
568 */
569 __attribute__((noinline))
570 static void
proc_ref_wait(proc_t p,event_t event,proc_ref_bits_t mask,bool locked)571 proc_ref_wait(proc_t p, event_t event, proc_ref_bits_t mask, bool locked)
572 {
573 assert_wait(event, THREAD_UNINT | THREAD_WAIT_NOREPORT);
574
575 if (os_ref_get_raw_mask(&p->p_refcount) & mask) {
576 uthread_t uth = current_uthread();
577
578 if (locked) {
579 proc_list_unlock();
580 }
581 uth->uu_wchan = event;
582 uth->uu_wmesg = "proc_refwait";
583 thread_block(THREAD_CONTINUE_NULL);
584 uth->uu_wchan = NULL;
585 uth->uu_wmesg = NULL;
586 if (locked) {
587 proc_list_lock();
588 }
589 } else {
590 clear_wait(current_thread(), THREAD_AWAKENED);
591 }
592 }
593
594 /*!
595 * @function proc_ref_wait_for_exec()
596 *
597 * @brief
598 * Routine called by processes trying to acquire a ref while
599 * an exec is in flight.
600 *
601 * @discussion
602 * This function is called with a proc ref held on the proc,
603 * which will be given up until the @c P_REF_*_EXEC flags clear.
604 *
605 * @param p the proc, the caller owns a proc ref
606 * @param bits the result of @c proc_ref_try_fast() prior to calling this.
607 * @param locked whether the caller holds the @c proc_list_lock().
608 */
609 __attribute__((noinline))
610 static proc_t
proc_ref_wait_for_exec(proc_t p,uint32_t bits,int locked)611 proc_ref_wait_for_exec(proc_t p, uint32_t bits, int locked)
612 {
613 const proc_ref_bits_t mask = P_REF_WILL_EXEC | P_REF_IN_EXEC;
614
615 /*
616 * the proc is in the middle of exec,
617 * trade our ref for a "wait ref",
618 * and wait for the proc_refwake_did_exec() call.
619 *
620 * Note: it's very unlikely that we'd loop back into the wait,
621 * it would only happen if the target proc would be
622 * in exec again by the time we woke up.
623 */
624 os_ref_retain_raw(&p->p_waitref, &p_refgrp);
625
626 do {
627 proc_rele(p);
628 proc_ref_wait(p, &p->p_waitref, mask, locked);
629 bits = proc_ref_try_fast(p);
630 } while (__improbable(bits & mask));
631
632 proc_wait_release(p);
633
634 return bits ? p : PROC_NULL;
635 }
636
637 static inline bool
proc_ref_needs_wait_for_exec(uint32_t bits)638 proc_ref_needs_wait_for_exec(uint32_t bits)
639 {
640 if (__probable((bits & (P_REF_WILL_EXEC | P_REF_IN_EXEC)) == 0)) {
641 return false;
642 }
643
644 if (bits & P_REF_IN_EXEC) {
645 return true;
646 }
647
648 /*
649 * procs can't have outstanding refs while execing.
650 *
651 * In order to achieve, that, proc_refdrain_will_exec()
652 * will drain outstanding references. It signals its intent
653 * with the P_REF_WILL_EXEC flag, and moves to P_REF_IN_EXEC
654 * when this is achieved.
655 *
656 * Most threads will block in proc_ref() when any of those
657 * flags is set. However, threads that already have
658 * an oustanding ref on this proc might want another
659 * before dropping them. To avoid deadlocks, we need
660 * to let threads with any oustanding reference take one
661 * when only P_REF_WILL_EXEC is set (which causes exec
662 * to be delayed).
663 *
664 * Note: the current thread will _always_ appear like it holds
665 * one ref due to having taken one speculatively.
666 */
667 assert(current_uthread()->uu_proc_refcount >= 1);
668 return current_uthread()->uu_proc_refcount == 1;
669 }
670
671 int
proc_rele(proc_t p)672 proc_rele(proc_t p)
673 {
674 uint32_t o_bits, n_bits;
675
676 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
677
678 os_atomic_rmw_loop(&p->p_refcount, o_bits, n_bits, release, {
679 n_bits = o_bits - (1u << P_REF_BITS);
680 if ((n_bits >> P_REF_BITS) == 1) {
681 n_bits &= ~P_REF_DRAINING;
682 }
683 });
684 record_procref(p, -1);
685
686 /*
687 * p might be freed after this point.
688 */
689
690 if (__improbable((o_bits & P_REF_DRAINING) && !(n_bits & P_REF_DRAINING))) {
691 /*
692 * This wakeup can cause spurious ones,
693 * but proc_refdrain() can deal with those.
694 *
695 * Because the proc_zone memory is sequestered,
696 * this is safe to wakeup a possible "freed" address.
697 */
698 wakeup(&p->p_refcount);
699 }
700 return 0;
701 }
702
703 proc_t
proc_self(void)704 proc_self(void)
705 {
706 proc_t p = current_proc();
707
708 /*
709 * Do not go through the logic of "wait for exec", it is meaningless.
710 * Only fail taking a ref for oneself if the proc is about to die.
711 */
712 return proc_ref_try_fast(p) ? p : PROC_NULL;
713 }
714
715 proc_t
proc_ref(proc_t p,int locked)716 proc_ref(proc_t p, int locked)
717 {
718 uint32_t bits;
719
720 bits = proc_ref_try_fast(p);
721 if (__improbable(!bits)) {
722 return PROC_NULL;
723 }
724
725 if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
726 return proc_ref_wait_for_exec(p, bits, locked);
727 }
728
729 return p;
730 }
731
732 static void
proc_free(void * _p)733 proc_free(void *_p)
734 {
735 proc_t p = _p;
736 proc_t pn = hazard_ptr_serialized_load(&p->p_hash);
737
738 if (pn) {
739 /* release the reference taken in phash_remove_locked() */
740 proc_wait_release(pn);
741 }
742 zfree_id(ZONE_ID_PROC, p);
743 }
744
745 void
proc_wait_release(proc_t p)746 proc_wait_release(proc_t p)
747 {
748 if (__probable(os_ref_release_raw(&p->p_waitref, &p_refgrp) == 0)) {
749 hazard_retire(p, sizeof(*p), proc_free);
750 }
751 }
752
753 proc_t
proc_find_zombref(int pid)754 proc_find_zombref(int pid)
755 {
756 proc_t p;
757
758 proc_list_lock();
759
760 again:
761 p = phash_find_locked(pid);
762
763 /* should we bail? */
764 if ((p == PROC_NULL) || !proc_list_exited(p)) {
765 proc_list_unlock();
766 return PROC_NULL;
767 }
768
769 /* If someone else is controlling the (unreaped) zombie - wait */
770 if ((p->p_listflag & P_LIST_WAITING) != 0) {
771 (void)msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
772 goto again;
773 }
774 p->p_listflag |= P_LIST_WAITING;
775
776 proc_list_unlock();
777
778 return p;
779 }
780
781 void
proc_drop_zombref(proc_t p)782 proc_drop_zombref(proc_t p)
783 {
784 proc_list_lock();
785 if ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
786 p->p_listflag &= ~P_LIST_WAITING;
787 wakeup(&p->p_stat);
788 }
789 proc_list_unlock();
790 }
791
792
793 void
proc_refdrain(proc_t p)794 proc_refdrain(proc_t p)
795 {
796 uint32_t bits = os_ref_get_raw_mask(&p->p_refcount);
797
798 assert(proc_list_exited(p));
799
800 while ((bits >> P_REF_BITS) > 1) {
801 if (os_atomic_cmpxchgv(&p->p_refcount, bits,
802 bits | P_REF_DRAINING, &bits, relaxed)) {
803 proc_ref_wait(p, &p->p_refcount, P_REF_DRAINING, false);
804 }
805 }
806 }
807
808 proc_t
proc_refdrain_will_exec(proc_t p)809 proc_refdrain_will_exec(proc_t p)
810 {
811 const proc_ref_bits_t will_exec_mask = P_REF_WILL_EXEC | P_REF_DRAINING;
812
813 /*
814 * All the calls to proc_ref will wait
815 * for the flag to get cleared before returning a ref.
816 *
817 * (except for the case documented in proc_ref_needs_wait_for_exec()).
818 */
819
820 if (p == initproc) {
821 /* Do not wait in ref drain for launchd exec */
822 os_atomic_or(&p->p_refcount, P_REF_IN_EXEC, relaxed);
823 } else {
824 for (;;) {
825 uint32_t o_ref, n_ref;
826
827 os_atomic_rmw_loop(&p->p_refcount, o_ref, n_ref, relaxed, {
828 if ((o_ref >> P_REF_BITS) == 1) {
829 /*
830 * We drained successfully,
831 * move on to P_REF_IN_EXEC
832 */
833 n_ref = o_ref & ~will_exec_mask;
834 n_ref |= P_REF_IN_EXEC;
835 } else {
836 /*
837 * Outstanding refs exit,
838 * mark our desire to stall
839 * proc_ref() callers with
840 * P_REF_WILL_EXEC.
841 */
842 n_ref = o_ref | will_exec_mask;
843 }
844 });
845
846 if (n_ref & P_REF_IN_EXEC) {
847 break;
848 }
849
850 proc_ref_wait(p, &p->p_refcount, P_REF_DRAINING, false);
851 }
852 }
853
854 /* Return a ref to the caller */
855 os_ref_retain_mask(&p->p_refcount, P_REF_BITS, NULL);
856 record_procref(p, 1);
857
858 return p;
859 }
860
861 void
proc_refwake_did_exec(proc_t p)862 proc_refwake_did_exec(proc_t p)
863 {
864 os_atomic_andnot(&p->p_refcount, P_REF_IN_EXEC, release);
865 wakeup(&p->p_waitref);
866 }
867
868 proc_t
proc_parentholdref(proc_t p)869 proc_parentholdref(proc_t p)
870 {
871 proc_t parent = PROC_NULL;
872 proc_t pp;
873 int loopcnt = 0;
874
875
876 proc_list_lock();
877 loop:
878 pp = p->p_pptr;
879 if ((pp == PROC_NULL) || (pp->p_stat == SZOMB) || ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED))) {
880 parent = PROC_NULL;
881 goto out;
882 }
883
884 if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == P_LIST_CHILDDRSTART) {
885 pp->p_listflag |= P_LIST_CHILDDRWAIT;
886 msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
887 loopcnt++;
888 if (loopcnt == 5) {
889 parent = PROC_NULL;
890 goto out;
891 }
892 goto loop;
893 }
894
895 if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == 0) {
896 pp->p_parentref++;
897 parent = pp;
898 goto out;
899 }
900
901 out:
902 proc_list_unlock();
903 return parent;
904 }
905 int
proc_parentdropref(proc_t p,int listlocked)906 proc_parentdropref(proc_t p, int listlocked)
907 {
908 if (listlocked == 0) {
909 proc_list_lock();
910 }
911
912 if (p->p_parentref > 0) {
913 p->p_parentref--;
914 if ((p->p_parentref == 0) && ((p->p_listflag & P_LIST_PARENTREFWAIT) == P_LIST_PARENTREFWAIT)) {
915 p->p_listflag &= ~P_LIST_PARENTREFWAIT;
916 wakeup(&p->p_parentref);
917 }
918 } else {
919 panic("proc_parentdropref -ve ref");
920 }
921 if (listlocked == 0) {
922 proc_list_unlock();
923 }
924
925 return 0;
926 }
927
928 void
proc_childdrainstart(proc_t p)929 proc_childdrainstart(proc_t p)
930 {
931 #if __PROC_INTERNAL_DEBUG
932 if ((p->p_listflag & P_LIST_CHILDDRSTART) == P_LIST_CHILDDRSTART) {
933 panic("proc_childdrainstart: childdrain already started");
934 }
935 #endif
936 p->p_listflag |= P_LIST_CHILDDRSTART;
937 /* wait for all that hold parentrefs to drop */
938 while (p->p_parentref > 0) {
939 p->p_listflag |= P_LIST_PARENTREFWAIT;
940 msleep(&p->p_parentref, &proc_list_mlock, 0, "proc_childdrainstart", 0);
941 }
942 }
943
944
945 void
proc_childdrainend(proc_t p)946 proc_childdrainend(proc_t p)
947 {
948 #if __PROC_INTERNAL_DEBUG
949 if (p->p_childrencnt > 0) {
950 panic("exiting: children stil hanging around");
951 }
952 #endif
953 p->p_listflag |= P_LIST_CHILDDRAINED;
954 if ((p->p_listflag & (P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT)) != 0) {
955 p->p_listflag &= ~(P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT);
956 wakeup(&p->p_childrencnt);
957 }
958 }
959
960 void
proc_checkdeadrefs(__unused proc_t p)961 proc_checkdeadrefs(__unused proc_t p)
962 {
963 uint32_t bits;
964
965 bits = os_ref_release_raw_mask(&p->p_refcount, P_REF_BITS, NULL);
966 if (bits != P_REF_DEAD) {
967 panic("proc being freed and unexpected refcount %p:%d:0x%x", p,
968 bits >> P_REF_BITS, bits & P_REF_MASK);
969 }
970 #if __PROC_INTERNAL_DEBUG
971 if (p->p_childrencnt != 0) {
972 panic("proc being freed and pending children cnt %p:%d", p, p->p_childrencnt);
973 }
974 if (p->p_parentref != 0) {
975 panic("proc being freed and pending parentrefs %p:%d", p, p->p_parentref);
976 }
977 #endif
978 }
979
980
981 __attribute__((always_inline, visibility("hidden")))
982 void
proc_require(proc_t proc,proc_require_flags_t flags)983 proc_require(proc_t proc, proc_require_flags_t flags)
984 {
985 if ((flags & PROC_REQUIRE_ALLOW_NULL) && proc == PROC_NULL) {
986 return;
987 }
988 if ((flags & PROC_REQUIRE_ALLOW_KERNPROC) && proc == &proc0) {
989 return;
990 }
991 zone_id_require(ZONE_ID_PROC, sizeof(struct proc), proc);
992 }
993
994 pid_t
proc_getpid(proc_t p)995 proc_getpid(proc_t p)
996 {
997 if (p == &proc0) {
998 return 0;
999 }
1000
1001 return p->p_pid;
1002 }
1003
1004 int
proc_pid(proc_t p)1005 proc_pid(proc_t p)
1006 {
1007 if (p != NULL) {
1008 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
1009 return proc_getpid(p);
1010 }
1011 return -1;
1012 }
1013
1014 int
proc_ppid(proc_t p)1015 proc_ppid(proc_t p)
1016 {
1017 if (p != NULL) {
1018 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
1019 return p->p_ppid;
1020 }
1021 return -1;
1022 }
1023
1024 int
proc_original_ppid(proc_t p)1025 proc_original_ppid(proc_t p)
1026 {
1027 if (p != NULL) {
1028 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
1029 return p->p_original_ppid;
1030 }
1031 return -1;
1032 }
1033
1034 int
proc_starttime(proc_t p,struct timeval * tv)1035 proc_starttime(proc_t p, struct timeval *tv)
1036 {
1037 if (p != NULL && tv != NULL) {
1038 tv->tv_sec = p->p_start.tv_sec;
1039 tv->tv_usec = p->p_start.tv_usec;
1040 return 0;
1041 }
1042 return EINVAL;
1043 }
1044
1045 int
proc_selfpid(void)1046 proc_selfpid(void)
1047 {
1048 return proc_getpid(current_proc());
1049 }
1050
1051 int
proc_selfppid(void)1052 proc_selfppid(void)
1053 {
1054 return current_proc()->p_ppid;
1055 }
1056
1057 uint64_t
proc_selfcsflags(void)1058 proc_selfcsflags(void)
1059 {
1060 return proc_getcsflags(current_proc());
1061 }
1062
1063 int
proc_csflags(proc_t p,uint64_t * flags)1064 proc_csflags(proc_t p, uint64_t *flags)
1065 {
1066 if (p && flags) {
1067 proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
1068 *flags = proc_getcsflags(p);
1069 return 0;
1070 }
1071 return EINVAL;
1072 }
1073
1074 boolean_t
proc_is_simulated(const proc_t p)1075 proc_is_simulated(const proc_t p)
1076 {
1077 #ifdef XNU_TARGET_OS_OSX
1078 if (p != NULL) {
1079 switch (proc_platform(p)) {
1080 case PLATFORM_IOSSIMULATOR:
1081 case PLATFORM_TVOSSIMULATOR:
1082 case PLATFORM_WATCHOSSIMULATOR:
1083 return TRUE;
1084 default:
1085 return FALSE;
1086 }
1087 }
1088 #else /* !XNU_TARGET_OS_OSX */
1089 (void)p;
1090 #endif
1091 return FALSE;
1092 }
1093
1094 uint32_t
proc_platform(const proc_t p)1095 proc_platform(const proc_t p)
1096 {
1097 if (p != NULL) {
1098 return proc_get_ro(p)->p_platform_data.p_platform;
1099 }
1100 return (uint32_t)-1;
1101 }
1102
1103 uint32_t
proc_min_sdk(proc_t p)1104 proc_min_sdk(proc_t p)
1105 {
1106 if (p != NULL) {
1107 return proc_get_ro(p)->p_platform_data.p_min_sdk;
1108 }
1109 return (uint32_t)-1;
1110 }
1111
1112 uint32_t
proc_sdk(proc_t p)1113 proc_sdk(proc_t p)
1114 {
1115 if (p != NULL) {
1116 return proc_get_ro(p)->p_platform_data.p_sdk;
1117 }
1118 return (uint32_t)-1;
1119 }
1120
1121 void
proc_setplatformdata(proc_t p,uint32_t platform,uint32_t min_sdk,uint32_t sdk)1122 proc_setplatformdata(proc_t p, uint32_t platform, uint32_t min_sdk, uint32_t sdk)
1123 {
1124 proc_ro_t ro;
1125 struct proc_platform_ro_data platform_data;
1126
1127 ro = proc_get_ro(p);
1128 platform_data = ro->p_platform_data;
1129 platform_data.p_platform = platform;
1130 platform_data.p_min_sdk = min_sdk;
1131 platform_data.p_sdk = sdk;
1132
1133 zalloc_ro_update_field(ZONE_ID_PROC_RO, ro, p_platform_data, &platform_data);
1134 }
1135
1136 #if CONFIG_DTRACE
1137 int
dtrace_proc_selfpid(void)1138 dtrace_proc_selfpid(void)
1139 {
1140 return proc_selfpid();
1141 }
1142
1143 int
dtrace_proc_selfppid(void)1144 dtrace_proc_selfppid(void)
1145 {
1146 return proc_selfppid();
1147 }
1148
1149 uid_t
dtrace_proc_selfruid(void)1150 dtrace_proc_selfruid(void)
1151 {
1152 return current_proc()->p_ruid;
1153 }
1154 #endif /* CONFIG_DTRACE */
1155
1156 /*!
1157 * @function proc_parent()
1158 *
1159 * @brief
1160 * Returns a ref on the parent of @c p.
1161 *
1162 * @discussion
1163 * Returns a reference on the parent, or @c PROC_NULL
1164 * if both @c p and its parent are zombies.
1165 *
1166 * If the parent is currently dying, then this function waits
1167 * for the situation to be resolved.
1168 *
1169 * This function never returns @c PROC_NULL if @c p isn't
1170 * a zombie (@c p_stat is @c SZOMB) yet.
1171 */
1172 proc_t
proc_parent(proc_t p)1173 proc_parent(proc_t p)
1174 {
1175 proc_t parent;
1176 proc_t pp;
1177
1178 proc_list_lock();
1179 loop:
1180 pp = p->p_pptr;
1181 parent = proc_ref(pp, true);
1182 if (parent == PROC_NULL && ((pp->p_listflag & P_LIST_CHILDDRAINED) == 0)) {
1183 /*
1184 * If we can't get a reference on the parent,
1185 * wait for all children to have been reparented.
1186 */
1187 pp->p_listflag |= P_LIST_CHILDLKWAIT;
1188 msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
1189 goto loop;
1190 }
1191 proc_list_unlock();
1192 return parent;
1193 }
1194
1195 static boolean_t
proc_parent_is_currentproc(proc_t p)1196 proc_parent_is_currentproc(proc_t p)
1197 {
1198 boolean_t ret = FALSE;
1199
1200 proc_list_lock();
1201 if (p->p_pptr == current_proc()) {
1202 ret = TRUE;
1203 }
1204
1205 proc_list_unlock();
1206 return ret;
1207 }
1208
1209 void
proc_name(int pid,char * buf,int size)1210 proc_name(int pid, char * buf, int size)
1211 {
1212 proc_t p;
1213
1214 if (size <= 0) {
1215 return;
1216 }
1217
1218 bzero(buf, size);
1219
1220 if ((p = proc_find(pid)) != PROC_NULL) {
1221 strlcpy(buf, &p->p_comm[0], size);
1222 proc_rele(p);
1223 }
1224 }
1225
1226 void
proc_name_kdp(proc_t p,char * buf,int size)1227 proc_name_kdp(proc_t p, char * buf, int size)
1228 {
1229 if (p == PROC_NULL) {
1230 return;
1231 }
1232
1233 if ((size_t)size > sizeof(p->p_comm)) {
1234 strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size));
1235 } else {
1236 strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
1237 }
1238 }
1239
1240 boolean_t
proc_binary_uuid_kdp(task_t task,uuid_t uuid)1241 proc_binary_uuid_kdp(task_t task, uuid_t uuid)
1242 {
1243 proc_t p = get_bsdtask_info(task);
1244 if (p == PROC_NULL) {
1245 return FALSE;
1246 }
1247
1248 proc_getexecutableuuid(p, uuid, sizeof(uuid_t));
1249
1250 return TRUE;
1251 }
1252
1253 int
proc_threadname_kdp(void * uth,char * buf,size_t size)1254 proc_threadname_kdp(void * uth, char * buf, size_t size)
1255 {
1256 if (size < MAXTHREADNAMESIZE) {
1257 /* this is really just a protective measure for the future in
1258 * case the thread name size in stackshot gets out of sync with
1259 * the BSD max thread name size. Note that bsd_getthreadname
1260 * doesn't take input buffer size into account. */
1261 return -1;
1262 }
1263
1264 if (uth != NULL) {
1265 bsd_getthreadname(uth, buf);
1266 }
1267 return 0;
1268 }
1269
1270
1271 /* note that this function is generally going to be called from stackshot,
1272 * and the arguments will be coming from a struct which is declared packed
1273 * thus the input arguments will in general be unaligned. We have to handle
1274 * that here. */
1275 void
proc_starttime_kdp(void * p,unaligned_u64 * tv_sec,unaligned_u64 * tv_usec,unaligned_u64 * abstime)1276 proc_starttime_kdp(void *p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime)
1277 {
1278 proc_t pp = (proc_t)p;
1279 if (pp != PROC_NULL) {
1280 if (tv_sec != NULL) {
1281 *tv_sec = pp->p_start.tv_sec;
1282 }
1283 if (tv_usec != NULL) {
1284 *tv_usec = pp->p_start.tv_usec;
1285 }
1286 if (abstime != NULL) {
1287 if (pp->p_stats != NULL) {
1288 *abstime = pp->p_stats->ps_start;
1289 } else {
1290 *abstime = 0;
1291 }
1292 }
1293 }
1294 }
1295
1296 void
proc_archinfo_kdp(void * p,cpu_type_t * cputype,cpu_subtype_t * cpusubtype)1297 proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype)
1298 {
1299 proc_t pp = (proc_t)p;
1300 if (pp != PROC_NULL) {
1301 *cputype = pp->p_cputype;
1302 *cpusubtype = pp->p_cpusubtype;
1303 }
1304 }
1305
1306 char *
proc_name_address(void * p)1307 proc_name_address(void *p)
1308 {
1309 return &((proc_t)p)->p_comm[0];
1310 }
1311
1312 char *
proc_longname_address(void * p)1313 proc_longname_address(void *p)
1314 {
1315 return &((proc_t)p)->p_name[0];
1316 }
1317
1318 char *
proc_best_name(proc_t p)1319 proc_best_name(proc_t p)
1320 {
1321 if (p->p_name[0] != '\0') {
1322 return &p->p_name[0];
1323 }
1324 return &p->p_comm[0];
1325 }
1326
1327 void
proc_selfname(char * buf,int size)1328 proc_selfname(char * buf, int size)
1329 {
1330 proc_t p;
1331
1332 if ((p = current_proc()) != (proc_t)0) {
1333 strlcpy(buf, &p->p_name[0], size);
1334 }
1335 }
1336
1337 void
proc_signal(int pid,int signum)1338 proc_signal(int pid, int signum)
1339 {
1340 proc_t p;
1341
1342 if ((p = proc_find(pid)) != PROC_NULL) {
1343 psignal(p, signum);
1344 proc_rele(p);
1345 }
1346 }
1347
1348 int
proc_issignal(int pid,sigset_t mask)1349 proc_issignal(int pid, sigset_t mask)
1350 {
1351 proc_t p;
1352 int error = 0;
1353
1354 if ((p = proc_find(pid)) != PROC_NULL) {
1355 error = proc_pendingsignals(p, mask);
1356 proc_rele(p);
1357 }
1358
1359 return error;
1360 }
1361
1362 int
proc_noremotehang(proc_t p)1363 proc_noremotehang(proc_t p)
1364 {
1365 int retval = 0;
1366
1367 if (p) {
1368 retval = p->p_flag & P_NOREMOTEHANG;
1369 }
1370 return retval? 1: 0;
1371 }
1372
1373 int
proc_exiting(proc_t p)1374 proc_exiting(proc_t p)
1375 {
1376 int retval = 0;
1377
1378 if (p) {
1379 retval = p->p_lflag & P_LEXIT;
1380 }
1381 return retval? 1: 0;
1382 }
1383
1384 int
proc_in_teardown(proc_t p)1385 proc_in_teardown(proc_t p)
1386 {
1387 int retval = 0;
1388
1389 if (p) {
1390 retval = p->p_lflag & P_LPEXIT;
1391 }
1392 return retval? 1: 0;
1393 }
1394
1395 int
proc_lvfork(proc_t p __unused)1396 proc_lvfork(proc_t p __unused)
1397 {
1398 return 0;
1399 }
1400
1401 int
proc_increment_ru_oublock(proc_t p,long * origvalp)1402 proc_increment_ru_oublock(proc_t p, long *origvalp)
1403 {
1404 long origval;
1405
1406 if (p && p->p_stats) {
1407 origval = OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
1408 if (origvalp) {
1409 *origvalp = origval;
1410 }
1411 return 0;
1412 }
1413
1414 return EINVAL;
1415 }
1416
1417 int
proc_isabortedsignal(proc_t p)1418 proc_isabortedsignal(proc_t p)
1419 {
1420 if ((p != kernproc) && current_thread_aborted() &&
1421 (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
1422 (p->p_sigacts.ps_sig < 1) || (p->p_sigacts.ps_sig >= NSIG) ||
1423 !hassigprop(p->p_sigacts.ps_sig, SA_CORE))) {
1424 return 1;
1425 }
1426
1427 return 0;
1428 }
1429
1430 int
proc_forcequota(proc_t p)1431 proc_forcequota(proc_t p)
1432 {
1433 int retval = 0;
1434
1435 if (p) {
1436 retval = p->p_flag & P_FORCEQUOTA;
1437 }
1438 return retval? 1: 0;
1439 }
1440
1441 int
proc_suser(proc_t p)1442 proc_suser(proc_t p)
1443 {
1444 kauth_cred_t my_cred;
1445 int error;
1446
1447 my_cred = kauth_cred_proc_ref(p);
1448 error = suser(my_cred, &p->p_acflag);
1449 kauth_cred_unref(&my_cred);
1450 return error;
1451 }
1452
1453 task_t
proc_task(proc_t proc)1454 proc_task(proc_t proc)
1455 {
1456 return (task_t)proc->task;
1457 }
1458
1459 void
proc_set_task(proc_t proc,task_t task)1460 proc_set_task(proc_t proc, task_t task)
1461 {
1462 proc->task = task;
1463 }
1464
1465 /*
1466 * Obtain the first thread in a process
1467 *
1468 * XXX This is a bad thing to do; it exists predominantly to support the
1469 * XXX use of proc_t's in places that should really be using
1470 * XXX thread_t's instead. This maintains historical behaviour, but really
1471 * XXX needs an audit of the context (proxy vs. not) to clean up.
1472 */
1473 thread_t
proc_thread(proc_t proc)1474 proc_thread(proc_t proc)
1475 {
1476 LCK_MTX_ASSERT(&proc->p_mlock, LCK_MTX_ASSERT_OWNED);
1477
1478 uthread_t uth = TAILQ_FIRST(&proc->p_uthlist);
1479
1480 if (uth != NULL) {
1481 return get_machthread(uth);
1482 }
1483
1484 return NULL;
1485 }
1486
1487 kauth_cred_t
proc_ucred(proc_t p)1488 proc_ucred(proc_t p)
1489 {
1490 return kauth_cred_require(proc_get_ro(p)->p_ucred);
1491 }
1492
1493 struct uthread *
current_uthread(void)1494 current_uthread(void)
1495 {
1496 return get_bsdthread_info(current_thread());
1497 }
1498
1499
1500 int
proc_is64bit(proc_t p)1501 proc_is64bit(proc_t p)
1502 {
1503 return IS_64BIT_PROCESS(p);
1504 }
1505
1506 int
proc_is64bit_data(proc_t p)1507 proc_is64bit_data(proc_t p)
1508 {
1509 assert(p->task);
1510 return (int)task_get_64bit_data(p->task);
1511 }
1512
1513 int
proc_isinitproc(proc_t p)1514 proc_isinitproc(proc_t p)
1515 {
1516 if (initproc == NULL) {
1517 return 0;
1518 }
1519 return p == initproc;
1520 }
1521
1522 int
proc_pidversion(proc_t p)1523 proc_pidversion(proc_t p)
1524 {
1525 return proc_get_ro(p)->p_idversion;
1526 }
1527
1528 void
proc_setpidversion(proc_t p,int idversion)1529 proc_setpidversion(proc_t p, int idversion)
1530 {
1531 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p), p_idversion,
1532 &idversion);
1533 }
1534
1535 uint32_t
proc_persona_id(proc_t p)1536 proc_persona_id(proc_t p)
1537 {
1538 return (uint32_t)persona_id_from_proc(p);
1539 }
1540
1541 uint32_t
proc_getuid(proc_t p)1542 proc_getuid(proc_t p)
1543 {
1544 return p->p_uid;
1545 }
1546
1547 uint32_t
proc_getgid(proc_t p)1548 proc_getgid(proc_t p)
1549 {
1550 return p->p_gid;
1551 }
1552
1553 uint64_t
proc_uniqueid(proc_t p)1554 proc_uniqueid(proc_t p)
1555 {
1556 if (p == &proc0) {
1557 return 0;
1558 }
1559
1560 return proc_get_ro(p)->p_uniqueid;
1561 }
1562
1563 uint64_t proc_uniqueid_task(void *p_arg, void *t);
1564 /*
1565 * During exec, two tasks point at the proc. This function is used
1566 * to gives tasks a unique ID; we make the matching task have the
1567 * proc's uniqueid, and any other task gets the high-bit flipped.
1568 * (We need to try to avoid returning UINT64_MAX, which is the
1569 * which is the uniqueid of a task without a proc. (e.g. while exiting))
1570 *
1571 * Only used by get_task_uniqueid(); do not add additional callers.
1572 */
1573 uint64_t
proc_uniqueid_task(void * p_arg,void * t)1574 proc_uniqueid_task(void *p_arg, void *t)
1575 {
1576 proc_t p = p_arg;
1577 uint64_t uniqueid = proc_uniqueid(p);
1578 return uniqueid ^ (__probable(t == (void *)p->task) ? 0 : (1ull << 63));
1579 }
1580
1581 uint64_t
proc_puniqueid(proc_t p)1582 proc_puniqueid(proc_t p)
1583 {
1584 return p->p_puniqueid;
1585 }
1586
1587 void
proc_coalitionids(__unused proc_t p,__unused uint64_t ids[COALITION_NUM_TYPES])1588 proc_coalitionids(__unused proc_t p, __unused uint64_t ids[COALITION_NUM_TYPES])
1589 {
1590 #if CONFIG_COALITIONS
1591 task_coalition_ids(p->task, ids);
1592 #else
1593 memset(ids, 0, sizeof(uint64_t[COALITION_NUM_TYPES]));
1594 #endif
1595 return;
1596 }
1597
1598 uint64_t
proc_was_throttled(proc_t p)1599 proc_was_throttled(proc_t p)
1600 {
1601 return p->was_throttled;
1602 }
1603
1604 uint64_t
proc_did_throttle(proc_t p)1605 proc_did_throttle(proc_t p)
1606 {
1607 return p->did_throttle;
1608 }
1609
1610 int
proc_getcdhash(proc_t p,unsigned char * cdhash)1611 proc_getcdhash(proc_t p, unsigned char *cdhash)
1612 {
1613 return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash);
1614 }
1615
1616 uint64_t
proc_getcsflags(proc_t p)1617 proc_getcsflags(proc_t p)
1618 {
1619 return proc_get_ro(p)->p_csflags;
1620 }
1621
1622 void
proc_csflags_update(proc_t p,uint64_t flags)1623 proc_csflags_update(proc_t p, uint64_t flags)
1624 {
1625 uint32_t csflags = (uint32_t)flags;
1626
1627 if (p != kernproc) {
1628 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1629 p_csflags, &csflags);
1630 }
1631 }
1632
1633 void
proc_csflags_set(proc_t p,uint64_t flags)1634 proc_csflags_set(proc_t p, uint64_t flags)
1635 {
1636 proc_csflags_update(p, proc_getcsflags(p) | (uint32_t)flags);
1637 }
1638
1639 void
proc_csflags_clear(proc_t p,uint64_t flags)1640 proc_csflags_clear(proc_t p, uint64_t flags)
1641 {
1642 proc_csflags_update(p, proc_getcsflags(p) & ~(uint32_t)flags);
1643 }
1644
1645 uint8_t *
proc_syscall_filter_mask(proc_t p)1646 proc_syscall_filter_mask(proc_t p)
1647 {
1648 return proc_get_ro(p)->syscall_filter_mask;
1649 }
1650
1651 void
proc_syscall_filter_mask_set(proc_t p,uint8_t * mask)1652 proc_syscall_filter_mask_set(proc_t p, uint8_t *mask)
1653 {
1654 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1655 syscall_filter_mask, &mask);
1656 }
1657
1658 int
proc_exitstatus(proc_t p)1659 proc_exitstatus(proc_t p)
1660 {
1661 return p->p_xstat & 0xffff;
1662 }
1663
1664 void
proc_setexecutableuuid(proc_t p,const unsigned char * uuid)1665 proc_setexecutableuuid(proc_t p, const unsigned char *uuid)
1666 {
1667 memcpy(p->p_uuid, uuid, sizeof(p->p_uuid));
1668 }
1669
1670 const unsigned char *
proc_executableuuid_addr(proc_t p)1671 proc_executableuuid_addr(proc_t p)
1672 {
1673 return &p->p_uuid[0];
1674 }
1675
1676 void
proc_getexecutableuuid(proc_t p,unsigned char * uuidbuf,unsigned long size)1677 proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
1678 {
1679 if (size >= sizeof(uuid_t)) {
1680 memcpy(uuidbuf, proc_executableuuid_addr(p), sizeof(uuid_t));
1681 }
1682 }
1683
1684 void
proc_set_ucred(proc_t p,kauth_cred_t cred)1685 proc_set_ucred(proc_t p, kauth_cred_t cred)
1686 {
1687 kauth_cred_t my_cred = proc_ucred(p);
1688
1689 /* update the field first so the proc never points to a freed cred. */
1690 zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p), p_ucred, &cred);
1691
1692 kauth_cred_set(&my_cred, cred);
1693 }
1694
1695 bool
1696 proc_update_label(proc_t p, bool setugid,
1697 kauth_cred_t (^update_cred)(kauth_cred_t))
1698 {
1699 kauth_cred_t cur_cred;
1700 kauth_cred_t new_cred;
1701 bool changed = false;
1702
1703 cur_cred = kauth_cred_proc_ref(p);
1704 retry:
1705 new_cred = update_cred(cur_cred);
1706 if (new_cred != cur_cred) {
1707 proc_ucred_lock(p);
1708
1709 /* Compare again under the lock. */
1710 if (__improbable(proc_ucred(p) != cur_cred)) {
1711 proc_ucred_unlock(p);
1712 kauth_cred_unref(&new_cred);
1713 cur_cred = kauth_cred_proc_ref(p);
1714 goto retry;
1715 }
1716
1717 proc_set_ucred(p, new_cred);
1718 proc_update_creds_onproc(p);
1719 proc_ucred_unlock(p);
1720
1721 if (setugid) {
1722 OSBitOrAtomic(P_SUGID, &p->p_flag);
1723 set_security_token(p);
1724 }
1725
1726 changed = true;
1727 }
1728
1729 kauth_cred_unref(&new_cred);
1730 return changed;
1731 }
1732
1733 /* Return vnode for executable with an iocount. Must be released with vnode_put() */
1734 vnode_t
proc_getexecutablevnode(proc_t p)1735 proc_getexecutablevnode(proc_t p)
1736 {
1737 vnode_t tvp = p->p_textvp;
1738
1739 if (tvp != NULLVP) {
1740 if (vnode_getwithref(tvp) == 0) {
1741 return tvp;
1742 }
1743 }
1744
1745 return NULLVP;
1746 }
1747
1748 int
proc_gettty(proc_t p,vnode_t * vp)1749 proc_gettty(proc_t p, vnode_t *vp)
1750 {
1751 struct session *procsp;
1752 struct pgrp *pg;
1753 int err = EINVAL;
1754
1755 if (!p || !vp) {
1756 return EINVAL;
1757 }
1758
1759 if ((pg = proc_pgrp(p, &procsp)) != PGRP_NULL) {
1760 session_lock(procsp);
1761 vnode_t ttyvp = procsp->s_ttyvp;
1762 int ttyvid = procsp->s_ttyvid;
1763 session_unlock(procsp);
1764
1765 if (ttyvp) {
1766 if (vnode_getwithvid(ttyvp, ttyvid) == 0) {
1767 *vp = ttyvp;
1768 err = 0;
1769 }
1770 } else {
1771 err = ENOENT;
1772 }
1773
1774 pgrp_rele(pg);
1775 }
1776
1777 return err;
1778 }
1779
1780 int
proc_gettty_dev(proc_t p,dev_t * devp)1781 proc_gettty_dev(proc_t p, dev_t *devp)
1782 {
1783 struct pgrp *pg;
1784 dev_t dev = NODEV;
1785
1786 if ((pg = proc_pgrp(p, NULL)) != PGRP_NULL) {
1787 dev = os_atomic_load(&pg->pg_session->s_ttydev, relaxed);
1788 pgrp_rele(pg);
1789 }
1790
1791 if (dev == NODEV) {
1792 return EINVAL;
1793 }
1794
1795 *devp = dev;
1796 return 0;
1797 }
1798
1799 int
proc_selfexecutableargs(uint8_t * buf,size_t * buflen)1800 proc_selfexecutableargs(uint8_t *buf, size_t *buflen)
1801 {
1802 proc_t p = current_proc();
1803
1804 // buflen must always be provided
1805 if (buflen == NULL) {
1806 return EINVAL;
1807 }
1808
1809 // If a buf is provided, there must be at least enough room to fit argc
1810 if (buf && *buflen < sizeof(p->p_argc)) {
1811 return EINVAL;
1812 }
1813
1814 if (!p->user_stack) {
1815 return EINVAL;
1816 }
1817
1818 if (buf == NULL) {
1819 *buflen = p->p_argslen + sizeof(p->p_argc);
1820 return 0;
1821 }
1822
1823 // Copy in argc to the first 4 bytes
1824 memcpy(buf, &p->p_argc, sizeof(p->p_argc));
1825
1826 if (*buflen > sizeof(p->p_argc) && p->p_argslen > 0) {
1827 // See memory layout comment in kern_exec.c:exec_copyout_strings()
1828 // We want to copy starting from `p_argslen` bytes away from top of stack
1829 return copyin(p->user_stack - p->p_argslen,
1830 buf + sizeof(p->p_argc),
1831 MIN(p->p_argslen, *buflen - sizeof(p->p_argc)));
1832 } else {
1833 return 0;
1834 }
1835 }
1836
1837 off_t
proc_getexecutableoffset(proc_t p)1838 proc_getexecutableoffset(proc_t p)
1839 {
1840 return p->p_textoff;
1841 }
1842
1843 void
bsd_set_dependency_capable(task_t task)1844 bsd_set_dependency_capable(task_t task)
1845 {
1846 proc_t p = get_bsdtask_info(task);
1847
1848 if (p) {
1849 OSBitOrAtomic(P_DEPENDENCY_CAPABLE, &p->p_flag);
1850 }
1851 }
1852
1853
1854 #ifndef __arm__
1855 int
IS_64BIT_PROCESS(proc_t p)1856 IS_64BIT_PROCESS(proc_t p)
1857 {
1858 if (p && (p->p_flag & P_LP64)) {
1859 return 1;
1860 } else {
1861 return 0;
1862 }
1863 }
1864 #endif
1865
1866 /*
1867 * Locate a process by number
1868 */
1869 proc_t
phash_find_locked(pid_t pid)1870 phash_find_locked(pid_t pid)
1871 {
1872 proc_t p;
1873
1874 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1875
1876 if (!pid) {
1877 return kernproc;
1878 }
1879
1880 for (p = hazard_ptr_serialized_load(PIDHASH(pid)); p;
1881 p = hazard_ptr_serialized_load(&p->p_hash)) {
1882 if (p->p_proc_ro && p->p_pid == pid) {
1883 break;
1884 }
1885 }
1886
1887 return p;
1888 }
1889
1890 void
phash_insert_locked(pid_t pid,struct proc * p)1891 phash_insert_locked(pid_t pid, struct proc *p)
1892 {
1893 struct proc_hp *head = PIDHASH(pid);
1894
1895 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1896
1897 hazard_ptr_serialized_store_relaxed(&p->p_hash,
1898 hazard_ptr_serialized_load(head));
1899 hazard_ptr_serialized_store(head, p);
1900 }
1901
1902 void
phash_remove_locked(pid_t pid,struct proc * p)1903 phash_remove_locked(pid_t pid, struct proc *p)
1904 {
1905 struct proc_hp *prev = PIDHASH(pid);
1906 struct proc *pn;
1907
1908 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1909
1910 while ((pn = hazard_ptr_serialized_load(prev)) != p) {
1911 prev = &pn->p_hash;
1912 }
1913
1914 /*
1915 * Now that the proc is no longer in the hash,
1916 * it needs to keep its p_hash value alive.
1917 */
1918 pn = hazard_ptr_serialized_load(&p->p_hash);
1919 if (pn) {
1920 os_ref_retain_raw(&pn->p_waitref, &p_refgrp);
1921 }
1922 hazard_ptr_serialized_store_relaxed(prev, pn);
1923 }
1924
1925 proc_t
proc_find(int pid)1926 proc_find(int pid)
1927 {
1928 struct proc_hp *hp = PIDHASH(pid);
1929 proc_t p = PROC_NULL;
1930 hazard_guard_array_t g;
1931 uint32_t bits = 0;
1932
1933 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1934
1935 if (!pid) {
1936 return proc_ref(kernproc, false);
1937 }
1938
1939 g = hazard_guard_get_n(0, 3);
1940
1941 /*
1942 * Note: In theory, reusing a guard needs to use hazard_guard_reacquire(),
1943 * however, using 3 guards helps us being smarter:
1944 *
1945 * If one considers the sequence of guards being acquired be:
1946 * <n>, <n+1>, <n+2>, <n+3> ...
1947 *
1948 * then the pointer acquired at step <n> is used to acquire
1949 * <n+1> but no longer used once <n+2> has been acquired.
1950 *
1951 * Acquiring <n+2> has a full barrier which we can hence
1952 * piggy back on, and make the <n+3> reuse of the same guard
1953 * as <n> be an "acquire" instead of a "re-acquire".
1954 *
1955 * This unrolling is good for the CPU too since it can help it
1956 * speculate through values/barriers anyway.
1957 */
1958 for (;;) {
1959 p = hazard_guard_acquire(&g[0], hp);
1960 if (p == PROC_NULL ||
1961 (p->p_pid == pid && p->p_proc_ro != NULL)) {
1962 break;
1963 }
1964 hp = &p->p_hash;
1965
1966 p = hazard_guard_acquire(&g[1], hp);
1967 if (p == PROC_NULL ||
1968 (p->p_pid == pid && p->p_proc_ro != NULL)) {
1969 break;
1970 }
1971 hp = &p->p_hash;
1972
1973 p = hazard_guard_acquire(&g[2], hp);
1974 if (p == PROC_NULL ||
1975 (p->p_pid == pid && p->p_proc_ro != NULL)) {
1976 break;
1977 }
1978 hp = &p->p_hash;
1979 }
1980
1981 if (p) {
1982 bits = proc_ref_try_fast(p);
1983 }
1984
1985 hazard_guard_put_n(g, 3);
1986
1987 if (__improbable(!bits)) {
1988 return PROC_NULL;
1989 }
1990
1991 if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
1992 p = proc_ref_wait_for_exec(p, bits, false);
1993 }
1994
1995 if (p != PROC_NULL) {
1996 /*
1997 * pair with proc_refwake_did_exec() to be able to observe
1998 * a fully formed proc_ro structure.
1999 */
2000 os_atomic_thread_fence(acquire);
2001 }
2002
2003 return p;
2004 }
2005
2006 proc_t
proc_find_locked(int pid)2007 proc_find_locked(int pid)
2008 {
2009 proc_t p = PROC_NULL;
2010
2011 p = phash_find_locked(pid);
2012 if (p != PROC_NULL) {
2013 p = proc_ref(p, true);
2014 }
2015
2016 return p;
2017 }
2018
2019 proc_t
proc_findthread(thread_t thread)2020 proc_findthread(thread_t thread)
2021 {
2022 proc_t p = PROC_NULL;
2023
2024 proc_list_lock();
2025 {
2026 p = (proc_t)(get_bsdthreadtask_info(thread));
2027 }
2028 p = proc_ref(p, true);
2029 proc_list_unlock();
2030 return p;
2031 }
2032
2033
2034 /*
2035 * Locate a zombie by PID
2036 */
2037 __private_extern__ proc_t
pzfind(pid_t pid)2038 pzfind(pid_t pid)
2039 {
2040 proc_t p;
2041
2042
2043 proc_list_lock();
2044
2045 LIST_FOREACH(p, &zombproc, p_list) {
2046 if (proc_getpid(p) == pid) {
2047 break;
2048 }
2049 }
2050
2051 proc_list_unlock();
2052
2053 return p;
2054 }
2055
2056 /*
2057 * Acquire a pgrp ref, if and only if the pgrp is non empty.
2058 */
2059 static inline bool
pg_ref_try(struct pgrp * pgrp)2060 pg_ref_try(struct pgrp *pgrp)
2061 {
2062 return os_ref_retain_try_mask(&pgrp->pg_refcount, PGRP_REF_BITS,
2063 PGRP_REF_EMPTY, &p_refgrp);
2064 }
2065
2066 /*
2067 * Unconditionally acquire a pgrp ref,
2068 * regardless of whether the pgrp is empty or not.
2069 */
2070 static inline struct pgrp *
pg_ref(struct pgrp * pgrp)2071 pg_ref(struct pgrp *pgrp)
2072 {
2073 os_ref_retain_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp);
2074 return pgrp;
2075 }
2076
2077 /*
2078 * Locate a process group by number
2079 */
2080 struct pgrp *
pghash_find_locked(pid_t pgid)2081 pghash_find_locked(pid_t pgid)
2082 {
2083 struct pgrp *pgrp;
2084
2085 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2086
2087 for (pgrp = hazard_ptr_serialized_load(PGRPHASH(pgid)); pgrp;
2088 pgrp = hazard_ptr_serialized_load(&pgrp->pg_hash)) {
2089 if (pgrp->pg_id == pgid) {
2090 break;
2091 }
2092 }
2093
2094 return pgrp;
2095 }
2096
2097 void
pghash_insert_locked(pid_t pgid,struct pgrp * pgrp)2098 pghash_insert_locked(pid_t pgid, struct pgrp *pgrp)
2099 {
2100 struct pgrp_hp *head = PGRPHASH(pgid);
2101
2102 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2103
2104 hazard_ptr_serialized_store_relaxed(&pgrp->pg_hash,
2105 hazard_ptr_serialized_load(head));
2106 hazard_ptr_serialized_store(head, pgrp);
2107 }
2108
2109 static void
pghash_remove_locked(pid_t pgid,struct pgrp * pgrp)2110 pghash_remove_locked(pid_t pgid, struct pgrp *pgrp)
2111 {
2112 struct pgrp_hp *prev = PGRPHASH(pgid);
2113 struct pgrp *pgn;
2114
2115 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2116
2117 while ((pgn = hazard_ptr_serialized_load(prev)) != pgrp) {
2118 prev = &pgn->pg_hash;
2119 }
2120
2121 /*
2122 * Now that the process group is out of the hash,
2123 * we need to protect its "next" for readers until
2124 * its death.
2125 */
2126 pgn = hazard_ptr_serialized_load(&pgrp->pg_hash);
2127 if (pgn) {
2128 os_ref_retain_raw(&pgn->pg_hashref, &p_refgrp);
2129 }
2130 hazard_ptr_serialized_store_relaxed(prev, pgn);
2131 }
2132
2133 struct pgrp *
pgrp_find(pid_t pgid)2134 pgrp_find(pid_t pgid)
2135 {
2136 struct pgrp_hp *hp = PGRPHASH(pgid);
2137 struct pgrp *pgrp = PGRP_NULL;
2138 hazard_guard_array_t g;
2139
2140 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2141
2142 g = hazard_guard_get_n(0, 3);
2143
2144 for (;;) {
2145 pgrp = hazard_guard_acquire(&g[0], hp);
2146 if (pgrp == PGRP_NULL || pgrp->pg_id == pgid) {
2147 break;
2148 }
2149 hp = &pgrp->pg_hash;
2150
2151 pgrp = hazard_guard_acquire(&g[1], hp);
2152 if (pgrp == PGRP_NULL || pgrp->pg_id == pgid) {
2153 break;
2154 }
2155 hp = &pgrp->pg_hash;
2156
2157 pgrp = hazard_guard_acquire(&g[2], hp);
2158 if (pgrp == PGRP_NULL || pgrp->pg_id == pgid) {
2159 break;
2160 }
2161 hp = &pgrp->pg_hash;
2162 }
2163
2164 if (pgrp && !pg_ref_try(pgrp)) {
2165 pgrp = PGRP_NULL;
2166 }
2167
2168 hazard_guard_put_n(g, 3);
2169
2170 return pgrp;
2171 }
2172
2173 /* consumes one ref from pgrp */
2174 static void
pgrp_add_member(struct pgrp * pgrp,struct proc * parent,struct proc * p)2175 pgrp_add_member(struct pgrp *pgrp, struct proc *parent, struct proc *p)
2176 {
2177 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2178
2179 pgrp_lock(pgrp);
2180 if (LIST_EMPTY(&pgrp->pg_members)) {
2181 os_atomic_andnot(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2182 }
2183 if (parent != PROC_NULL) {
2184 assert(pgrp == hazard_ptr_serialized_load(&parent->p_pgrp));
2185 LIST_INSERT_AFTER(parent, p, p_pglist);
2186 } else {
2187 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
2188 }
2189 pgrp_unlock(pgrp);
2190
2191 p->p_pgrpid = pgrp->pg_id;
2192 p->p_sessionid = pgrp->pg_session->s_sid;
2193 hazard_ptr_serialized_store(&p->p_pgrp, pgrp);
2194 }
2195
2196 /* returns one ref from pgrp */
2197 static void
pgrp_del_member(struct pgrp * pgrp,struct proc * p)2198 pgrp_del_member(struct pgrp *pgrp, struct proc *p)
2199 {
2200 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2201
2202 pgrp_lock(pgrp);
2203 LIST_REMOVE(p, p_pglist);
2204 if (LIST_EMPTY(&pgrp->pg_members)) {
2205 os_atomic_or(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2206 }
2207 pgrp_unlock(pgrp);
2208 }
2209
2210 void
pgrp_rele(struct pgrp * pgrp)2211 pgrp_rele(struct pgrp * pgrp)
2212 {
2213 if (pgrp == PGRP_NULL) {
2214 return;
2215 }
2216
2217 if (os_ref_release_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp) == 0) {
2218 pgrp_destroy(pgrp);
2219 }
2220 }
2221
2222 struct session *
session_alloc(proc_t leader)2223 session_alloc(proc_t leader)
2224 {
2225 struct session *sess;
2226
2227 sess = zalloc_flags(session_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2228 lck_mtx_init(&sess->s_mlock, &proc_mlock_grp, &proc_lck_attr);
2229 sess->s_leader = leader;
2230 sess->s_sid = proc_getpid(leader);
2231 sess->s_ttypgrpid = NO_PID;
2232 os_atomic_init(&sess->s_ttydev, NODEV);
2233 os_ref_init_mask(&sess->s_refcount, SESSION_REF_BITS,
2234 &p_refgrp, S_DEFAULT);
2235
2236 return sess;
2237 }
2238
2239 struct tty *
session_set_tty_locked(struct session * sessp,struct tty * tp)2240 session_set_tty_locked(struct session *sessp, struct tty *tp)
2241 {
2242 struct tty *old;
2243
2244 LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2245
2246 old = sessp->s_ttyp;
2247 ttyhold(tp);
2248 sessp->s_ttyp = tp;
2249 os_atomic_store(&sessp->s_ttydev, tp->t_dev, relaxed);
2250
2251 return old;
2252 }
2253
2254 struct tty *
session_clear_tty_locked(struct session * sessp)2255 session_clear_tty_locked(struct session *sessp)
2256 {
2257 struct tty *tp = sessp->s_ttyp;
2258
2259 LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2260 sessp->s_ttyvp = NULLVP;
2261 sessp->s_ttyvid = 0;
2262 sessp->s_ttyp = TTY_NULL;
2263 sessp->s_ttypgrpid = NO_PID;
2264 os_atomic_store(&sessp->s_ttydev, NODEV, relaxed);
2265
2266 return tp;
2267 }
2268
2269 __attribute__((noinline))
2270 static void
session_destroy(struct session * sess)2271 session_destroy(struct session *sess)
2272 {
2273 proc_list_lock();
2274 LIST_REMOVE(sess, s_hash);
2275 proc_list_unlock();
2276
2277 /*
2278 * Either the TTY was closed,
2279 * or proc_exit() destroyed it when the leader went away
2280 */
2281 assert(sess->s_ttyp == TTY_NULL);
2282
2283 lck_mtx_destroy(&sess->s_mlock, &proc_mlock_grp);
2284 zfree(session_zone, sess);
2285 }
2286
2287 struct session *
session_ref(struct session * sess)2288 session_ref(struct session *sess)
2289 {
2290 os_ref_retain_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp);
2291 return sess;
2292 }
2293
2294 void
session_rele(struct session * sess)2295 session_rele(struct session *sess)
2296 {
2297 if (os_ref_release_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp) == 0) {
2298 session_destroy(sess);
2299 }
2300 }
2301
2302
2303 /*
2304 * Make a new process ready to become a useful member of society by making it
2305 * visible in all the right places and initialize its own lists to empty.
2306 *
2307 * Parameters: parent The parent of the process to insert
2308 * child The child process to insert
2309 *
2310 * Returns: (void)
2311 *
2312 * Notes: Insert a child process into the parents children list, assign
2313 * the child the parent process pointer and PPID of the parent...
2314 */
2315 void
pinsertchild(proc_t parent,proc_t child)2316 pinsertchild(proc_t parent, proc_t child)
2317 {
2318 LIST_INIT(&child->p_children);
2319 child->p_pptr = parent;
2320 child->p_ppid = proc_getpid(parent);
2321 child->p_original_ppid = proc_getpid(parent);
2322 child->p_puniqueid = proc_uniqueid(parent);
2323 child->p_xhighbits = 0;
2324
2325 proc_list_lock();
2326 #if CONFIG_MEMORYSTATUS
2327 memorystatus_add(child, TRUE);
2328 #endif
2329
2330 parent->p_childrencnt++;
2331 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
2332
2333 LIST_INSERT_HEAD(&allproc, child, p_list);
2334 /* mark the completion of proc creation */
2335 os_atomic_andnot(&child->p_refcount, P_REF_NEW, relaxed);
2336
2337 proc_list_unlock();
2338 }
2339
2340 /*
2341 * Move p to a new or existing process group (and session)
2342 *
2343 * Returns: 0 Success
2344 * ESRCH No such process
2345 */
2346 int
enterpgrp(proc_t p,pid_t pgid,int mksess)2347 enterpgrp(proc_t p, pid_t pgid, int mksess)
2348 {
2349 struct pgrp *pgrp;
2350 struct pgrp *mypgrp;
2351 struct session *procsp;
2352
2353 pgrp = pgrp_find(pgid);
2354 mypgrp = proc_pgrp(p, &procsp);
2355
2356 #if DIAGNOSTIC
2357 if (pgrp != NULL && mksess) { /* firewalls */
2358 panic("enterpgrp: setsid into non-empty pgrp");
2359 }
2360 if (SESS_LEADER(p, mypgrp->pg_session)) {
2361 panic("enterpgrp: session leader attempted setpgrp");
2362 }
2363 #endif
2364 if (pgrp == PGRP_NULL) {
2365 struct session *sess;
2366 pid_t savepid = proc_getpid(p);
2367 proc_t np = PROC_NULL;
2368
2369 /*
2370 * new process group
2371 */
2372 #if DIAGNOSTIC
2373 if (proc_getpid(p) != pgid) {
2374 panic("enterpgrp: new pgrp and pid != pgid");
2375 }
2376 #endif
2377 if ((np = proc_find(savepid)) == NULL || np != p) {
2378 if (np != PROC_NULL) {
2379 proc_rele(np);
2380 }
2381 pgrp_rele(mypgrp);
2382 return ESRCH;
2383 }
2384 proc_rele(np);
2385
2386 pgrp = pgrp_alloc(pgid, PGRP_REF_EMPTY);
2387
2388 if (mksess) {
2389 /*
2390 * new session
2391 */
2392 sess = session_alloc(p);
2393
2394 bcopy(mypgrp->pg_session->s_login, sess->s_login,
2395 sizeof(sess->s_login));
2396 os_atomic_andnot(&p->p_flag, P_CONTROLT, relaxed);
2397 } else {
2398 sess = session_ref(procsp);
2399 }
2400
2401 proc_list_lock();
2402 pgrp->pg_session = sess;
2403 p->p_sessionid = sess->s_sid;
2404 pghash_insert_locked(pgid, pgrp);
2405 if (mksess) {
2406 LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash);
2407 }
2408 proc_list_unlock();
2409 } else if (pgrp == mypgrp) {
2410 pgrp_rele(pgrp);
2411 pgrp_rele(mypgrp);
2412 return 0;
2413 }
2414
2415 /*
2416 * Adjust eligibility of affected pgrps to participate in job control.
2417 * Increment eligibility counts before decrementing, otherwise we
2418 * could reach 0 spuriously during the first call.
2419 */
2420 fixjobc(p, pgrp, 1);
2421 fixjobc(p, mypgrp, 0);
2422
2423 pgrp_rele(mypgrp);
2424 pgrp_replace(p, pgrp);
2425
2426 return 0;
2427 }
2428
2429 /*
2430 * remove process from process group
2431 */
2432 struct pgrp *
pgrp_leave_locked(proc_t p)2433 pgrp_leave_locked(proc_t p)
2434 {
2435 struct pgrp *pg;
2436
2437 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2438
2439 pg = hazard_ptr_serialized_load(&p->p_pgrp);
2440 pgrp_del_member(pg, p);
2441 p->p_pgrpid = PGRPID_DEAD;
2442 hazard_ptr_clear(&p->p_pgrp);
2443
2444 return pg;
2445 }
2446
2447 struct pgrp *
pgrp_enter_locked(struct proc * parent,struct proc * child)2448 pgrp_enter_locked(struct proc *parent, struct proc *child)
2449 {
2450 struct pgrp *pgrp;
2451
2452 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2453
2454 pgrp = pg_ref(hazard_ptr_serialized_load(&parent->p_pgrp));
2455 pgrp_add_member(pgrp, parent, child);
2456 return pgrp;
2457 }
2458
2459 /*
2460 * delete a process group
2461 */
2462 static void
pgrp_free(void * _pg)2463 pgrp_free(void *_pg)
2464 {
2465 struct pgrp *pg = _pg;
2466 struct pgrp *pgn = hazard_ptr_serialized_load(&pg->pg_hash);
2467
2468 if (pgn && os_ref_release_raw(&pgn->pg_hashref, &p_refgrp) == 0) {
2469 /* release the reference taken in pghash_remove_locked() */
2470 hazard_retire(pgn, sizeof(*pgn), pgrp_free);
2471 }
2472 zfree(pgrp_zone, pg);
2473 }
2474
2475 __attribute__((noinline))
2476 static void
pgrp_destroy(struct pgrp * pgrp)2477 pgrp_destroy(struct pgrp *pgrp)
2478 {
2479 struct session *sess;
2480
2481 assert(LIST_EMPTY(&pgrp->pg_members));
2482 assert(os_ref_get_raw_mask(&pgrp->pg_refcount) & PGRP_REF_EMPTY);
2483
2484 proc_list_lock();
2485 pghash_remove_locked(pgrp->pg_id, pgrp);
2486 proc_list_unlock();
2487
2488 sess = pgrp->pg_session;
2489 pgrp->pg_session = SESSION_NULL;
2490 session_rele(sess);
2491
2492 lck_mtx_destroy(&pgrp->pg_mlock, &proc_mlock_grp);
2493 if (os_ref_release_raw(&pgrp->pg_hashref, &p_refgrp) == 0) {
2494 hazard_retire(pgrp, sizeof(*pgrp), pgrp_free);
2495 }
2496 }
2497
2498
2499 /*
2500 * Adjust pgrp jobc counters when specified process changes process group.
2501 * We count the number of processes in each process group that "qualify"
2502 * the group for terminal job control (those with a parent in a different
2503 * process group of the same session). If that count reaches zero, the
2504 * process group becomes orphaned. Check both the specified process'
2505 * process group and that of its children.
2506 * entering == 0 => p is leaving specified group.
2507 * entering == 1 => p is entering specified group.
2508 */
2509 int
fixjob_callback(proc_t p,void * arg)2510 fixjob_callback(proc_t p, void * arg)
2511 {
2512 struct fixjob_iterargs *fp;
2513 struct pgrp * pg, *hispg;
2514 struct session * mysession, *hissess;
2515 int entering;
2516
2517 fp = (struct fixjob_iterargs *)arg;
2518 pg = fp->pg;
2519 mysession = fp->mysession;
2520 entering = fp->entering;
2521
2522 hispg = proc_pgrp(p, &hissess);
2523
2524 if (hispg != pg && hissess == mysession) {
2525 pgrp_lock(hispg);
2526 if (entering) {
2527 hispg->pg_jobc++;
2528 pgrp_unlock(hispg);
2529 } else if (--hispg->pg_jobc == 0) {
2530 pgrp_unlock(hispg);
2531 orphanpg(hispg);
2532 } else {
2533 pgrp_unlock(hispg);
2534 }
2535 }
2536 pgrp_rele(hispg);
2537
2538 return PROC_RETURNED;
2539 }
2540
2541 void
fixjobc(proc_t p,struct pgrp * pgrp,int entering)2542 fixjobc(proc_t p, struct pgrp *pgrp, int entering)
2543 {
2544 struct pgrp *hispgrp = PGRP_NULL;
2545 struct session *hissess = SESSION_NULL;
2546 struct session *mysession = pgrp->pg_session;
2547 proc_t parent;
2548 struct fixjob_iterargs fjarg;
2549 boolean_t proc_parent_self;
2550
2551 /*
2552 * Check if p's parent is current proc, if yes then no need to take
2553 * a ref; calling proc_parent with current proc as parent may
2554 * deadlock if current proc is exiting.
2555 */
2556 proc_parent_self = proc_parent_is_currentproc(p);
2557 if (proc_parent_self) {
2558 parent = current_proc();
2559 } else {
2560 parent = proc_parent(p);
2561 }
2562
2563 if (parent != PROC_NULL) {
2564 hispgrp = proc_pgrp(parent, &hissess);
2565 if (!proc_parent_self) {
2566 proc_rele(parent);
2567 }
2568 }
2569
2570 /*
2571 * Check p's parent to see whether p qualifies its own process
2572 * group; if so, adjust count for p's process group.
2573 */
2574 if (hispgrp != pgrp && hissess == mysession) {
2575 pgrp_lock(pgrp);
2576 if (entering) {
2577 pgrp->pg_jobc++;
2578 pgrp_unlock(pgrp);
2579 } else if (--pgrp->pg_jobc == 0) {
2580 pgrp_unlock(pgrp);
2581 orphanpg(pgrp);
2582 } else {
2583 pgrp_unlock(pgrp);
2584 }
2585 }
2586
2587 pgrp_rele(hispgrp);
2588
2589 /*
2590 * Check this process' children to see whether they qualify
2591 * their process groups; if so, adjust counts for children's
2592 * process groups.
2593 */
2594 fjarg.pg = pgrp;
2595 fjarg.mysession = mysession;
2596 fjarg.entering = entering;
2597 proc_childrenwalk(p, fixjob_callback, &fjarg);
2598 }
2599
2600 /*
2601 * The pidlist_* routines support the functions in this file that
2602 * walk lists of processes applying filters and callouts to the
2603 * elements of the list.
2604 *
2605 * A prior implementation used a single linear array, which can be
2606 * tricky to allocate on large systems. This implementation creates
2607 * an SLIST of modestly sized arrays of PIDS_PER_ENTRY elements.
2608 *
2609 * The array should be sized large enough to keep the overhead of
2610 * walking the list low, but small enough that blocking allocations of
2611 * pidlist_entry_t structures always succeed.
2612 */
2613
2614 #define PIDS_PER_ENTRY 1021
2615
2616 typedef struct pidlist_entry {
2617 SLIST_ENTRY(pidlist_entry) pe_link;
2618 u_int pe_nused;
2619 pid_t pe_pid[PIDS_PER_ENTRY];
2620 } pidlist_entry_t;
2621
2622 typedef struct {
2623 SLIST_HEAD(, pidlist_entry) pl_head;
2624 struct pidlist_entry *pl_active;
2625 u_int pl_nalloc;
2626 } pidlist_t;
2627
2628 static __inline__ pidlist_t *
pidlist_init(pidlist_t * pl)2629 pidlist_init(pidlist_t *pl)
2630 {
2631 SLIST_INIT(&pl->pl_head);
2632 pl->pl_active = NULL;
2633 pl->pl_nalloc = 0;
2634 return pl;
2635 }
2636
2637 static u_int
pidlist_alloc(pidlist_t * pl,u_int needed)2638 pidlist_alloc(pidlist_t *pl, u_int needed)
2639 {
2640 while (pl->pl_nalloc < needed) {
2641 pidlist_entry_t *pe = kalloc_type(pidlist_entry_t,
2642 Z_WAITOK | Z_ZERO | Z_NOFAIL);
2643 SLIST_INSERT_HEAD(&pl->pl_head, pe, pe_link);
2644 pl->pl_nalloc += (sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0]));
2645 }
2646 return pl->pl_nalloc;
2647 }
2648
2649 static void
pidlist_free(pidlist_t * pl)2650 pidlist_free(pidlist_t *pl)
2651 {
2652 pidlist_entry_t *pe;
2653 while (NULL != (pe = SLIST_FIRST(&pl->pl_head))) {
2654 SLIST_FIRST(&pl->pl_head) = SLIST_NEXT(pe, pe_link);
2655 kfree_type(pidlist_entry_t, pe);
2656 }
2657 pl->pl_nalloc = 0;
2658 }
2659
2660 static __inline__ void
pidlist_set_active(pidlist_t * pl)2661 pidlist_set_active(pidlist_t *pl)
2662 {
2663 pl->pl_active = SLIST_FIRST(&pl->pl_head);
2664 assert(pl->pl_active);
2665 }
2666
2667 static void
pidlist_add_pid(pidlist_t * pl,pid_t pid)2668 pidlist_add_pid(pidlist_t *pl, pid_t pid)
2669 {
2670 pidlist_entry_t *pe = pl->pl_active;
2671 if (pe->pe_nused >= sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0])) {
2672 if (NULL == (pe = SLIST_NEXT(pe, pe_link))) {
2673 panic("pidlist allocation exhausted");
2674 }
2675 pl->pl_active = pe;
2676 }
2677 pe->pe_pid[pe->pe_nused++] = pid;
2678 }
2679
2680 static __inline__ u_int
pidlist_nalloc(const pidlist_t * pl)2681 pidlist_nalloc(const pidlist_t *pl)
2682 {
2683 return pl->pl_nalloc;
2684 }
2685
2686 /*
2687 * A process group has become orphaned; if there are any stopped processes in
2688 * the group, hang-up all process in that group.
2689 */
2690 static void
orphanpg(struct pgrp * pgrp)2691 orphanpg(struct pgrp *pgrp)
2692 {
2693 pidlist_t pid_list, *pl = pidlist_init(&pid_list);
2694 u_int pid_count_available = 0;
2695 proc_t p;
2696
2697 /* allocate outside of the pgrp_lock */
2698 for (;;) {
2699 pgrp_lock(pgrp);
2700
2701 boolean_t should_iterate = FALSE;
2702 pid_count_available = 0;
2703
2704 PGMEMBERS_FOREACH(pgrp, p) {
2705 pid_count_available++;
2706 if (p->p_stat == SSTOP) {
2707 should_iterate = TRUE;
2708 }
2709 }
2710 if (pid_count_available == 0 || !should_iterate) {
2711 pgrp_unlock(pgrp);
2712 goto out; /* no orphaned processes OR nothing stopped */
2713 }
2714 if (pidlist_nalloc(pl) >= pid_count_available) {
2715 break;
2716 }
2717 pgrp_unlock(pgrp);
2718
2719 pidlist_alloc(pl, pid_count_available);
2720 }
2721 pidlist_set_active(pl);
2722
2723 u_int pid_count = 0;
2724 PGMEMBERS_FOREACH(pgrp, p) {
2725 pidlist_add_pid(pl, proc_pid(p));
2726 if (++pid_count >= pid_count_available) {
2727 break;
2728 }
2729 }
2730 pgrp_unlock(pgrp);
2731
2732 const pidlist_entry_t *pe;
2733 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
2734 for (u_int i = 0; i < pe->pe_nused; i++) {
2735 const pid_t pid = pe->pe_pid[i];
2736 if (0 == pid) {
2737 continue; /* skip kernproc */
2738 }
2739 p = proc_find(pid);
2740 if (!p) {
2741 continue;
2742 }
2743 proc_transwait(p, 0);
2744 pt_setrunnable(p);
2745 psignal(p, SIGHUP);
2746 psignal(p, SIGCONT);
2747 proc_rele(p);
2748 }
2749 }
2750 out:
2751 pidlist_free(pl);
2752 }
2753
2754 boolean_t
proc_is_translated(proc_t p __unused)2755 proc_is_translated(proc_t p __unused)
2756 {
2757 return 0;
2758 }
2759
2760 int
proc_is_classic(proc_t p __unused)2761 proc_is_classic(proc_t p __unused)
2762 {
2763 return 0;
2764 }
2765
2766 bool
proc_is_exotic(proc_t p)2767 proc_is_exotic(
2768 proc_t p)
2769 {
2770 if (p == NULL) {
2771 return false;
2772 }
2773 return task_is_exotic(proc_task(p));
2774 }
2775
2776 bool
proc_is_alien(proc_t p)2777 proc_is_alien(
2778 proc_t p)
2779 {
2780 if (p == NULL) {
2781 return false;
2782 }
2783 return task_is_alien(proc_task(p));
2784 }
2785
2786 /* XXX Why does this function exist? Need to kill it off... */
2787 proc_t
current_proc_EXTERNAL(void)2788 current_proc_EXTERNAL(void)
2789 {
2790 return current_proc();
2791 }
2792
2793 int
proc_is_forcing_hfs_case_sensitivity(proc_t p)2794 proc_is_forcing_hfs_case_sensitivity(proc_t p)
2795 {
2796 return (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY) ? 1 : 0;
2797 }
2798
2799 bool
proc_ignores_content_protection(proc_t p)2800 proc_ignores_content_protection(proc_t p)
2801 {
2802 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION;
2803 }
2804
2805 bool
proc_ignores_node_permissions(proc_t p)2806 proc_ignores_node_permissions(proc_t p)
2807 {
2808 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS;
2809 }
2810
2811 bool
proc_skip_mtime_update(proc_t p)2812 proc_skip_mtime_update(proc_t p)
2813 {
2814 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_SKIP_MTIME_UPDATE;
2815 }
2816
2817 bool
proc_allow_low_space_writes(proc_t p)2818 proc_allow_low_space_writes(proc_t p)
2819 {
2820 return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_ALLOW_LOW_SPACE_WRITES;
2821 }
2822
2823
2824 #if CONFIG_COREDUMP
2825 /*
2826 * proc_core_name(name, uid, pid)
2827 * Expand the name described in corefilename, using name, uid, and pid.
2828 * corefilename is a printf-like string, with three format specifiers:
2829 * %N name of process ("name")
2830 * %P process id (pid)
2831 * %U user id (uid)
2832 * For example, "%N.core" is the default; they can be disabled completely
2833 * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
2834 * This is controlled by the sysctl variable kern.corefile (see above).
2835 */
2836 __private_extern__ int
proc_core_name(const char * name,uid_t uid,pid_t pid,char * cf_name,size_t cf_name_len)2837 proc_core_name(const char *name, uid_t uid, pid_t pid, char *cf_name,
2838 size_t cf_name_len)
2839 {
2840 const char *format, *appendstr;
2841 char id_buf[11]; /* Buffer for pid/uid -- max 4B */
2842 size_t i, l, n;
2843
2844 if (cf_name == NULL) {
2845 goto toolong;
2846 }
2847
2848 format = corefilename;
2849 for (i = 0, n = 0; n < cf_name_len && format[i]; i++) {
2850 switch (format[i]) {
2851 case '%': /* Format character */
2852 i++;
2853 switch (format[i]) {
2854 case '%':
2855 appendstr = "%";
2856 break;
2857 case 'N': /* process name */
2858 appendstr = name;
2859 break;
2860 case 'P': /* process id */
2861 snprintf(id_buf, sizeof(id_buf), "%u", pid);
2862 appendstr = id_buf;
2863 break;
2864 case 'U': /* user id */
2865 snprintf(id_buf, sizeof(id_buf), "%u", uid);
2866 appendstr = id_buf;
2867 break;
2868 case '\0': /* format string ended in % symbol */
2869 goto endofstring;
2870 default:
2871 appendstr = "";
2872 log(LOG_ERR,
2873 "Unknown format character %c in `%s'\n",
2874 format[i], format);
2875 }
2876 l = strlen(appendstr);
2877 if ((n + l) >= cf_name_len) {
2878 goto toolong;
2879 }
2880 bcopy(appendstr, cf_name + n, l);
2881 n += l;
2882 break;
2883 default:
2884 cf_name[n++] = format[i];
2885 }
2886 }
2887 if (format[i] != '\0') {
2888 goto toolong;
2889 }
2890 return 0;
2891 toolong:
2892 log(LOG_ERR, "pid %ld (%s), uid (%u): corename is too long\n",
2893 (long)pid, name, (uint32_t)uid);
2894 return 1;
2895 endofstring:
2896 log(LOG_ERR, "pid %ld (%s), uid (%u): unexpected end of string after %% token\n",
2897 (long)pid, name, (uint32_t)uid);
2898 return 1;
2899 }
2900 #endif /* CONFIG_COREDUMP */
2901
2902 /* Code Signing related routines */
2903
2904 int
csops(__unused proc_t p,struct csops_args * uap,__unused int32_t * retval)2905 csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
2906 {
2907 return csops_internal(uap->pid, uap->ops, uap->useraddr,
2908 uap->usersize, USER_ADDR_NULL);
2909 }
2910
2911 int
csops_audittoken(__unused proc_t p,struct csops_audittoken_args * uap,__unused int32_t * retval)2912 csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused int32_t *retval)
2913 {
2914 if (uap->uaudittoken == USER_ADDR_NULL) {
2915 return EINVAL;
2916 }
2917 return csops_internal(uap->pid, uap->ops, uap->useraddr,
2918 uap->usersize, uap->uaudittoken);
2919 }
2920
2921 static int
csops_copy_token(const void * start,size_t length,user_size_t usize,user_addr_t uaddr)2922 csops_copy_token(const void *start, size_t length, user_size_t usize, user_addr_t uaddr)
2923 {
2924 char fakeheader[8] = { 0 };
2925 int error;
2926
2927 if (usize < sizeof(fakeheader)) {
2928 return ERANGE;
2929 }
2930
2931 /* if no blob, fill in zero header */
2932 if (NULL == start) {
2933 start = fakeheader;
2934 length = sizeof(fakeheader);
2935 } else if (usize < length) {
2936 /* ... if input too short, copy out length of entitlement */
2937 uint32_t length32 = htonl((uint32_t)length);
2938 memcpy(&fakeheader[4], &length32, sizeof(length32));
2939
2940 error = copyout(fakeheader, uaddr, sizeof(fakeheader));
2941 if (error == 0) {
2942 return ERANGE; /* input buffer to short, ERANGE signals that */
2943 }
2944 return error;
2945 }
2946 return copyout(start, uaddr, length);
2947 }
2948
2949 static int
csops_internal(pid_t pid,int ops,user_addr_t uaddr,user_size_t usersize,user_addr_t uaudittoken)2950 csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken)
2951 {
2952 size_t usize = (size_t)CAST_DOWN(size_t, usersize);
2953 proc_t pt;
2954 int forself;
2955 int error;
2956 vnode_t tvp;
2957 off_t toff;
2958 unsigned char cdhash[SHA1_RESULTLEN];
2959 audit_token_t token;
2960 unsigned int upid = 0, uidversion = 0;
2961
2962 forself = error = 0;
2963
2964 if (pid == 0) {
2965 pid = proc_selfpid();
2966 }
2967 if (pid == proc_selfpid()) {
2968 forself = 1;
2969 }
2970
2971
2972 switch (ops) {
2973 case CS_OPS_STATUS:
2974 case CS_OPS_CDHASH:
2975 case CS_OPS_PIDOFFSET:
2976 case CS_OPS_ENTITLEMENTS_BLOB:
2977 case CS_OPS_DER_ENTITLEMENTS_BLOB:
2978 case CS_OPS_IDENTITY:
2979 case CS_OPS_BLOB:
2980 case CS_OPS_TEAMID:
2981 case CS_OPS_CLEAR_LV:
2982 break; /* not restricted to root */
2983 default:
2984 if (forself == 0 && kauth_cred_issuser(kauth_cred_get()) != TRUE) {
2985 return EPERM;
2986 }
2987 break;
2988 }
2989
2990 pt = proc_find(pid);
2991 if (pt == PROC_NULL) {
2992 return ESRCH;
2993 }
2994
2995 upid = proc_getpid(pt);
2996 uidversion = proc_pidversion(pt);
2997 if (uaudittoken != USER_ADDR_NULL) {
2998 error = copyin(uaudittoken, &token, sizeof(audit_token_t));
2999 if (error != 0) {
3000 goto out;
3001 }
3002 /* verify the audit token pid/idversion matches with proc */
3003 if ((token.val[5] != upid) || (token.val[7] != uidversion)) {
3004 error = ESRCH;
3005 goto out;
3006 }
3007 }
3008
3009 #if CONFIG_MACF
3010 switch (ops) {
3011 case CS_OPS_MARKINVALID:
3012 case CS_OPS_MARKHARD:
3013 case CS_OPS_MARKKILL:
3014 case CS_OPS_MARKRESTRICT:
3015 case CS_OPS_SET_STATUS:
3016 case CS_OPS_CLEARINSTALLER:
3017 case CS_OPS_CLEARPLATFORM:
3018 case CS_OPS_CLEAR_LV:
3019 if ((error = mac_proc_check_set_cs_info(current_proc(), pt, ops))) {
3020 goto out;
3021 }
3022 break;
3023 default:
3024 if ((error = mac_proc_check_get_cs_info(current_proc(), pt, ops))) {
3025 goto out;
3026 }
3027 }
3028 #endif
3029
3030 switch (ops) {
3031 case CS_OPS_STATUS: {
3032 uint32_t retflags;
3033
3034 proc_lock(pt);
3035 retflags = (uint32_t)proc_getcsflags(pt);
3036 if (cs_process_enforcement(pt)) {
3037 retflags |= CS_ENFORCEMENT;
3038 }
3039 if (csproc_get_platform_binary(pt)) {
3040 retflags |= CS_PLATFORM_BINARY;
3041 }
3042 if (csproc_get_platform_path(pt)) {
3043 retflags |= CS_PLATFORM_PATH;
3044 }
3045 //Don't return CS_REQUIRE_LV if we turned it on with CS_FORCED_LV but still report CS_FORCED_LV
3046 if ((proc_getcsflags(pt) & CS_FORCED_LV) == CS_FORCED_LV) {
3047 retflags &= (~CS_REQUIRE_LV);
3048 }
3049 proc_unlock(pt);
3050
3051 if (uaddr != USER_ADDR_NULL) {
3052 error = copyout(&retflags, uaddr, sizeof(uint32_t));
3053 }
3054 break;
3055 }
3056 case CS_OPS_MARKINVALID:
3057 proc_lock(pt);
3058 if ((proc_getcsflags(pt) & CS_VALID) == CS_VALID) { /* is currently valid */
3059 proc_csflags_clear(pt, CS_VALID); /* set invalid */
3060 cs_process_invalidated(pt);
3061 if ((proc_getcsflags(pt) & CS_KILL) == CS_KILL) {
3062 proc_csflags_set(pt, CS_KILLED);
3063 proc_unlock(pt);
3064 if (cs_debug) {
3065 printf("CODE SIGNING: marked invalid by pid %d: "
3066 "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
3067 proc_selfpid(), proc_getpid(pt), pt->p_comm,
3068 (unsigned int)proc_getcsflags(pt));
3069 }
3070 psignal(pt, SIGKILL);
3071 } else {
3072 proc_unlock(pt);
3073 }
3074 } else {
3075 proc_unlock(pt);
3076 }
3077
3078 break;
3079
3080 case CS_OPS_MARKHARD:
3081 proc_lock(pt);
3082 proc_csflags_set(pt, CS_HARD);
3083 if ((proc_getcsflags(pt) & CS_VALID) == 0) {
3084 /* @@@ allow? reject? kill? @@@ */
3085 proc_unlock(pt);
3086 error = EINVAL;
3087 goto out;
3088 } else {
3089 proc_unlock(pt);
3090 }
3091 break;
3092
3093 case CS_OPS_MARKKILL:
3094 proc_lock(pt);
3095 proc_csflags_set(pt, CS_KILL);
3096 if ((proc_getcsflags(pt) & CS_VALID) == 0) {
3097 proc_unlock(pt);
3098 psignal(pt, SIGKILL);
3099 } else {
3100 proc_unlock(pt);
3101 }
3102 break;
3103
3104 case CS_OPS_PIDOFFSET:
3105 toff = pt->p_textoff;
3106 proc_rele(pt);
3107 error = copyout(&toff, uaddr, sizeof(toff));
3108 return error;
3109
3110 case CS_OPS_CDHASH:
3111
3112 /* pt already holds a reference on its p_textvp */
3113 tvp = pt->p_textvp;
3114 toff = pt->p_textoff;
3115
3116 if (tvp == NULLVP || usize != SHA1_RESULTLEN) {
3117 proc_rele(pt);
3118 return EINVAL;
3119 }
3120
3121 error = vn_getcdhash(tvp, toff, cdhash);
3122 proc_rele(pt);
3123
3124 if (error == 0) {
3125 error = copyout(cdhash, uaddr, sizeof(cdhash));
3126 }
3127
3128 return error;
3129
3130 case CS_OPS_ENTITLEMENTS_BLOB: {
3131 void *start;
3132 size_t length;
3133 struct cs_blob* blob;
3134
3135 proc_lock(pt);
3136 if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3137 proc_unlock(pt);
3138 error = EINVAL;
3139 goto out;
3140 }
3141 blob = csproc_get_blob(pt);
3142 proc_unlock(pt);
3143
3144 if (!blob) {
3145 error = EBADEXEC;
3146 goto out;
3147 }
3148
3149 void* osent = csblob_os_entitlements_get(blob);
3150 if (!osent) {
3151 goto out;
3152 }
3153 CS_GenericBlob* xmlblob = NULL;
3154 if (amfi->OSEntitlements_get_xml(osent, &xmlblob)) {
3155 start = (void*)xmlblob;
3156 length = (size_t)ntohl(xmlblob->length);
3157 } else {
3158 goto out;
3159 }
3160
3161 error = csops_copy_token(start, length, usize, uaddr);
3162 kfree_data(start, length);
3163 goto out;
3164 }
3165 case CS_OPS_DER_ENTITLEMENTS_BLOB: {
3166 const void *start;
3167 size_t length;
3168 struct cs_blob* blob;
3169
3170 proc_lock(pt);
3171 if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3172 proc_unlock(pt);
3173 error = EINVAL;
3174 goto out;
3175 }
3176 blob = csproc_get_blob(pt);
3177 proc_unlock(pt);
3178
3179 if (!blob) {
3180 error = EBADEXEC;
3181 goto out;
3182 }
3183
3184 error = csblob_get_der_entitlements(blob, (const CS_GenericBlob **)&start, &length);
3185 if (error || start == NULL) {
3186 if (amfi && csblob_os_entitlements_get(blob)) {
3187 void* osent = csblob_os_entitlements_get(blob);
3188
3189 const CS_GenericBlob* transmuted = NULL;
3190 if (amfi->OSEntitlements_get_transmuted(osent, &transmuted)) {
3191 start = transmuted;
3192 length = (size_t)ntohl(transmuted->length);
3193 } else {
3194 goto out;
3195 }
3196 } else {
3197 goto out;
3198 }
3199 }
3200
3201 error = csops_copy_token(start, length, usize, uaddr);
3202 goto out;
3203 }
3204 case CS_OPS_MARKRESTRICT:
3205 proc_lock(pt);
3206 proc_csflags_set(pt, CS_RESTRICT);
3207 proc_unlock(pt);
3208 break;
3209
3210 case CS_OPS_SET_STATUS: {
3211 uint32_t flags;
3212
3213 if (usize < sizeof(flags)) {
3214 error = ERANGE;
3215 break;
3216 }
3217
3218 error = copyin(uaddr, &flags, sizeof(flags));
3219 if (error) {
3220 break;
3221 }
3222
3223 /* only allow setting a subset of all code sign flags */
3224 flags &=
3225 CS_HARD | CS_EXEC_SET_HARD |
3226 CS_KILL | CS_EXEC_SET_KILL |
3227 CS_RESTRICT |
3228 CS_REQUIRE_LV |
3229 CS_ENFORCEMENT | CS_EXEC_SET_ENFORCEMENT;
3230
3231 proc_lock(pt);
3232 if (proc_getcsflags(pt) & CS_VALID) {
3233 if ((flags & CS_ENFORCEMENT) &&
3234 !(proc_getcsflags(pt) & CS_ENFORCEMENT)) {
3235 vm_map_cs_enforcement_set(get_task_map(pt->task), TRUE);
3236 }
3237 proc_csflags_set(pt, flags);
3238 } else {
3239 error = EINVAL;
3240 }
3241 proc_unlock(pt);
3242
3243 break;
3244 }
3245 case CS_OPS_CLEAR_LV: {
3246 /*
3247 * This option is used to remove library validation from
3248 * a running process. This is used in plugin architectures
3249 * when a program needs to load untrusted libraries. This
3250 * allows the process to maintain library validation as
3251 * long as possible, then drop it only when required.
3252 * Once a process has loaded the untrusted library,
3253 * relying on library validation in the future will
3254 * not be effective. An alternative is to re-exec
3255 * your application without library validation, or
3256 * fork an untrusted child.
3257 */
3258 #if !defined(XNU_TARGET_OS_OSX)
3259 // We only support dropping library validation on macOS
3260 error = ENOTSUP;
3261 #else
3262 /*
3263 * if we have the flag set, and the caller wants
3264 * to remove it, and they're entitled to, then
3265 * we remove it from the csflags
3266 *
3267 * NOTE: We are fine to poke into the task because
3268 * we get a ref to pt when we do the proc_find
3269 * at the beginning of this function.
3270 *
3271 * We also only allow altering ourselves.
3272 */
3273 if (forself == 1 && IOTaskHasEntitlement(pt->task, CLEAR_LV_ENTITLEMENT)) {
3274 proc_lock(pt);
3275 proc_csflags_clear(pt, CS_REQUIRE_LV | CS_FORCED_LV);
3276 proc_unlock(pt);
3277 error = 0;
3278 } else {
3279 error = EPERM;
3280 }
3281 #endif
3282 break;
3283 }
3284 case CS_OPS_BLOB: {
3285 void *start;
3286 size_t length;
3287
3288 proc_lock(pt);
3289 if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3290 proc_unlock(pt);
3291 error = EINVAL;
3292 break;
3293 }
3294 proc_unlock(pt);
3295 // Don't need to lock here as not accessing CSFLAGS
3296 error = cs_blob_get(pt, &start, &length);
3297 if (error) {
3298 goto out;
3299 }
3300
3301 error = csops_copy_token(start, length, usize, uaddr);
3302 goto out;
3303 }
3304 case CS_OPS_IDENTITY:
3305 case CS_OPS_TEAMID: {
3306 const char *identity;
3307 uint8_t fakeheader[8];
3308 uint32_t idlen;
3309 size_t length;
3310
3311 /*
3312 * Make identity have a blob header to make it
3313 * easier on userland to guess the identity
3314 * length.
3315 */
3316 if (usize < sizeof(fakeheader)) {
3317 error = ERANGE;
3318 break;
3319 }
3320 memset(fakeheader, 0, sizeof(fakeheader));
3321
3322 proc_lock(pt);
3323 if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3324 proc_unlock(pt);
3325 error = EINVAL;
3326 break;
3327 }
3328 identity = ops == CS_OPS_TEAMID ? csproc_get_teamid(pt) : cs_identity_get(pt);
3329 proc_unlock(pt);
3330
3331 if (identity == NULL) {
3332 error = ENOENT;
3333 goto out;
3334 }
3335
3336 length = strlen(identity) + 1; /* include NUL */
3337 idlen = htonl((uint32_t)(length + sizeof(fakeheader)));
3338 memcpy(&fakeheader[4], &idlen, sizeof(idlen));
3339
3340 error = copyout(fakeheader, uaddr, sizeof(fakeheader));
3341 if (error) {
3342 goto out;
3343 }
3344
3345 if (usize < sizeof(fakeheader) + length) {
3346 error = ERANGE;
3347 } else if (usize > sizeof(fakeheader)) {
3348 error = copyout(identity, uaddr + sizeof(fakeheader), length);
3349 }
3350 goto out;
3351 }
3352
3353 case CS_OPS_CLEARINSTALLER:
3354 proc_lock(pt);
3355 proc_csflags_clear(pt, CS_INSTALLER | CS_DATAVAULT_CONTROLLER | CS_EXEC_INHERIT_SIP);
3356 proc_unlock(pt);
3357 break;
3358
3359 case CS_OPS_CLEARPLATFORM:
3360 #if DEVELOPMENT || DEBUG
3361 if (cs_process_global_enforcement()) {
3362 error = ENOTSUP;
3363 break;
3364 }
3365
3366 #if CONFIG_CSR
3367 if (csr_check(CSR_ALLOW_APPLE_INTERNAL) != 0) {
3368 error = ENOTSUP;
3369 break;
3370 }
3371 #endif
3372
3373 proc_lock(pt);
3374 proc_csflags_clear(pt, CS_PLATFORM_BINARY | CS_PLATFORM_PATH);
3375 csproc_clear_platform_binary(pt);
3376 proc_unlock(pt);
3377 break;
3378 #else
3379 error = ENOTSUP;
3380 break;
3381 #endif /* !DEVELOPMENT || DEBUG */
3382
3383 default:
3384 error = EINVAL;
3385 break;
3386 }
3387 out:
3388 proc_rele(pt);
3389 return error;
3390 }
3391
3392 void
proc_iterate(unsigned int flags,proc_iterate_fn_t callout,void * arg,proc_iterate_fn_t filterfn,void * filterarg)3393 proc_iterate(
3394 unsigned int flags,
3395 proc_iterate_fn_t callout,
3396 void *arg,
3397 proc_iterate_fn_t filterfn,
3398 void *filterarg)
3399 {
3400 pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3401 u_int pid_count_available = 0;
3402
3403 assert(callout != NULL);
3404
3405 /* allocate outside of the proc_list_lock */
3406 for (;;) {
3407 proc_list_lock();
3408 pid_count_available = nprocs + 1; /* kernel_task not counted in nprocs */
3409 assert(pid_count_available > 0);
3410 if (pidlist_nalloc(pl) >= pid_count_available) {
3411 break;
3412 }
3413 proc_list_unlock();
3414
3415 pidlist_alloc(pl, pid_count_available);
3416 }
3417 pidlist_set_active(pl);
3418
3419 /* filter pids into the pid_list */
3420
3421 u_int pid_count = 0;
3422 if (flags & PROC_ALLPROCLIST) {
3423 proc_t p;
3424 ALLPROC_FOREACH(p) {
3425 /* ignore processes that are being forked */
3426 if (p->p_stat == SIDL) {
3427 continue;
3428 }
3429 if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3430 continue;
3431 }
3432 pidlist_add_pid(pl, proc_pid(p));
3433 if (++pid_count >= pid_count_available) {
3434 break;
3435 }
3436 }
3437 }
3438
3439 if ((pid_count < pid_count_available) &&
3440 (flags & PROC_ZOMBPROCLIST)) {
3441 proc_t p;
3442 ZOMBPROC_FOREACH(p) {
3443 if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3444 continue;
3445 }
3446 pidlist_add_pid(pl, proc_pid(p));
3447 if (++pid_count >= pid_count_available) {
3448 break;
3449 }
3450 }
3451 }
3452
3453 proc_list_unlock();
3454
3455 /* call callout on processes in the pid_list */
3456
3457 const pidlist_entry_t *pe;
3458 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3459 for (u_int i = 0; i < pe->pe_nused; i++) {
3460 const pid_t pid = pe->pe_pid[i];
3461 proc_t p = proc_find(pid);
3462 if (p) {
3463 if ((flags & PROC_NOWAITTRANS) == 0) {
3464 proc_transwait(p, 0);
3465 }
3466 const int callout_ret = callout(p, arg);
3467
3468 switch (callout_ret) {
3469 case PROC_RETURNED_DONE:
3470 proc_rele(p);
3471 OS_FALLTHROUGH;
3472 case PROC_CLAIMED_DONE:
3473 goto out;
3474
3475 case PROC_RETURNED:
3476 proc_rele(p);
3477 OS_FALLTHROUGH;
3478 case PROC_CLAIMED:
3479 break;
3480 default:
3481 panic("%s: callout =%d for pid %d",
3482 __func__, callout_ret, pid);
3483 break;
3484 }
3485 } else if (flags & PROC_ZOMBPROCLIST) {
3486 p = proc_find_zombref(pid);
3487 if (!p) {
3488 continue;
3489 }
3490 const int callout_ret = callout(p, arg);
3491
3492 switch (callout_ret) {
3493 case PROC_RETURNED_DONE:
3494 proc_drop_zombref(p);
3495 OS_FALLTHROUGH;
3496 case PROC_CLAIMED_DONE:
3497 goto out;
3498
3499 case PROC_RETURNED:
3500 proc_drop_zombref(p);
3501 OS_FALLTHROUGH;
3502 case PROC_CLAIMED:
3503 break;
3504 default:
3505 panic("%s: callout =%d for zombie %d",
3506 __func__, callout_ret, pid);
3507 break;
3508 }
3509 }
3510 }
3511 }
3512 out:
3513 pidlist_free(pl);
3514 }
3515
3516 void
proc_rebootscan(proc_iterate_fn_t callout,void * arg,proc_iterate_fn_t filterfn,void * filterarg)3517 proc_rebootscan(
3518 proc_iterate_fn_t callout,
3519 void *arg,
3520 proc_iterate_fn_t filterfn,
3521 void *filterarg)
3522 {
3523 proc_t p;
3524
3525 assert(callout != NULL);
3526
3527 proc_shutdown_exitcount = 0;
3528
3529 restart_foreach:
3530
3531 proc_list_lock();
3532
3533 ALLPROC_FOREACH(p) {
3534 if ((filterfn != NULL) && filterfn(p, filterarg) == 0) {
3535 continue;
3536 }
3537 p = proc_ref(p, true);
3538 if (!p) {
3539 continue;
3540 }
3541
3542 proc_list_unlock();
3543
3544 proc_transwait(p, 0);
3545 (void)callout(p, arg);
3546 proc_rele(p);
3547
3548 goto restart_foreach;
3549 }
3550
3551 proc_list_unlock();
3552 }
3553
3554 void
proc_childrenwalk(proc_t parent,proc_iterate_fn_t callout,void * arg)3555 proc_childrenwalk(
3556 proc_t parent,
3557 proc_iterate_fn_t callout,
3558 void *arg)
3559 {
3560 pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3561 u_int pid_count_available = 0;
3562
3563 assert(parent != NULL);
3564 assert(callout != NULL);
3565
3566 for (;;) {
3567 proc_list_lock();
3568 pid_count_available = parent->p_childrencnt;
3569 if (pid_count_available == 0) {
3570 proc_list_unlock();
3571 goto out;
3572 }
3573 if (pidlist_nalloc(pl) >= pid_count_available) {
3574 break;
3575 }
3576 proc_list_unlock();
3577
3578 pidlist_alloc(pl, pid_count_available);
3579 }
3580 pidlist_set_active(pl);
3581
3582 u_int pid_count = 0;
3583 proc_t p;
3584 PCHILDREN_FOREACH(parent, p) {
3585 if (p->p_stat == SIDL) {
3586 continue;
3587 }
3588 pidlist_add_pid(pl, proc_pid(p));
3589 if (++pid_count >= pid_count_available) {
3590 break;
3591 }
3592 }
3593
3594 proc_list_unlock();
3595
3596 const pidlist_entry_t *pe;
3597 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3598 for (u_int i = 0; i < pe->pe_nused; i++) {
3599 const pid_t pid = pe->pe_pid[i];
3600 p = proc_find(pid);
3601 if (!p) {
3602 continue;
3603 }
3604 const int callout_ret = callout(p, arg);
3605
3606 switch (callout_ret) {
3607 case PROC_RETURNED_DONE:
3608 proc_rele(p);
3609 OS_FALLTHROUGH;
3610 case PROC_CLAIMED_DONE:
3611 goto out;
3612
3613 case PROC_RETURNED:
3614 proc_rele(p);
3615 OS_FALLTHROUGH;
3616 case PROC_CLAIMED:
3617 break;
3618 default:
3619 panic("%s: callout =%d for pid %d",
3620 __func__, callout_ret, pid);
3621 break;
3622 }
3623 }
3624 }
3625 out:
3626 pidlist_free(pl);
3627 }
3628
3629 void
3630 pgrp_iterate(
3631 struct pgrp *pgrp,
3632 proc_iterate_fn_t callout,
3633 void * arg,
3634 bool (^filterfn)(proc_t))
3635 {
3636 pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3637 u_int pid_count_available = 0;
3638 proc_t p;
3639
3640 assert(pgrp != NULL);
3641 assert(callout != NULL);
3642
3643 for (;;) {
3644 pgrp_lock(pgrp);
3645 /*
3646 * each member has one ref + some transient holders,
3647 * this is a good enough approximation
3648 */
3649 pid_count_available = os_ref_get_count_mask(&pgrp->pg_refcount,
3650 PGRP_REF_BITS);
3651 if (pidlist_nalloc(pl) >= pid_count_available) {
3652 break;
3653 }
3654 pgrp_unlock(pgrp);
3655
3656 pidlist_alloc(pl, pid_count_available);
3657 }
3658 pidlist_set_active(pl);
3659
3660 const pid_t pgid = pgrp->pg_id;
3661 u_int pid_count = 0;
3662
PGMEMBERS_FOREACH(pgrp,p)3663 PGMEMBERS_FOREACH(pgrp, p) {
3664 if ((filterfn != NULL) && (filterfn(p) == 0)) {
3665 continue;
3666 }
3667 pidlist_add_pid(pl, proc_pid(p));
3668 if (++pid_count >= pid_count_available) {
3669 break;
3670 }
3671 }
3672
3673 pgrp_unlock(pgrp);
3674
3675 const pidlist_entry_t *pe;
3676 SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3677 for (u_int i = 0; i < pe->pe_nused; i++) {
3678 const pid_t pid = pe->pe_pid[i];
3679 if (0 == pid) {
3680 continue; /* skip kernproc */
3681 }
3682 p = proc_find(pid);
3683 if (!p) {
3684 continue;
3685 }
3686 if (p->p_pgrpid != pgid) {
3687 proc_rele(p);
3688 continue;
3689 }
3690 const int callout_ret = callout(p, arg);
3691
3692 switch (callout_ret) {
3693 case PROC_RETURNED:
3694 proc_rele(p);
3695 OS_FALLTHROUGH;
3696 case PROC_CLAIMED:
3697 break;
3698 case PROC_RETURNED_DONE:
3699 proc_rele(p);
3700 OS_FALLTHROUGH;
3701 case PROC_CLAIMED_DONE:
3702 goto out;
3703
3704 default:
3705 panic("%s: callout =%d for pid %d",
3706 __func__, callout_ret, pid);
3707 }
3708 }
3709 }
3710
3711 out:
3712 pidlist_free(pl);
3713 }
3714
3715 /* consumes the newpg ref */
3716 static void
pgrp_replace(struct proc * p,struct pgrp * newpg)3717 pgrp_replace(struct proc *p, struct pgrp *newpg)
3718 {
3719 struct pgrp *oldpg;
3720
3721 proc_list_lock();
3722 oldpg = hazard_ptr_serialized_load(&p->p_pgrp);
3723 pgrp_del_member(oldpg, p);
3724 pgrp_add_member(newpg, PROC_NULL, p);
3725 proc_list_unlock();
3726
3727 pgrp_rele(oldpg);
3728 }
3729
3730 struct pgrp *
pgrp_alloc(pid_t pgid,pggrp_ref_bits_t bits)3731 pgrp_alloc(pid_t pgid, pggrp_ref_bits_t bits)
3732 {
3733 struct pgrp *pgrp = zalloc_flags(pgrp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3734
3735 os_ref_init_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp, bits);
3736 os_ref_init_raw(&pgrp->pg_hashref, &p_refgrp);
3737 LIST_INIT(&pgrp->pg_members);
3738 lck_mtx_init(&pgrp->pg_mlock, &proc_mlock_grp, &proc_lck_attr);
3739 pgrp->pg_id = pgid;
3740
3741 return pgrp;
3742 }
3743
3744 void
pgrp_lock(struct pgrp * pgrp)3745 pgrp_lock(struct pgrp * pgrp)
3746 {
3747 lck_mtx_lock(&pgrp->pg_mlock);
3748 }
3749
3750 void
pgrp_unlock(struct pgrp * pgrp)3751 pgrp_unlock(struct pgrp * pgrp)
3752 {
3753 lck_mtx_unlock(&pgrp->pg_mlock);
3754 }
3755
3756 struct session *
session_find_locked(pid_t sessid)3757 session_find_locked(pid_t sessid)
3758 {
3759 struct session *sess;
3760
3761 LIST_FOREACH(sess, SESSHASH(sessid), s_hash) {
3762 if (sess->s_sid == sessid) {
3763 break;
3764 }
3765 }
3766
3767 return sess;
3768 }
3769
3770 void
session_lock(struct session * sess)3771 session_lock(struct session * sess)
3772 {
3773 lck_mtx_lock(&sess->s_mlock);
3774 }
3775
3776
3777 void
session_unlock(struct session * sess)3778 session_unlock(struct session * sess)
3779 {
3780 lck_mtx_unlock(&sess->s_mlock);
3781 }
3782
3783 struct pgrp *
proc_pgrp(proc_t p,struct session ** sessp)3784 proc_pgrp(proc_t p, struct session **sessp)
3785 {
3786 struct pgrp *pgrp = PGRP_NULL;
3787 hazard_guard_t g;
3788 bool success = false;
3789
3790 if (__probable(p != PROC_NULL)) {
3791 g = hazard_guard_get(0);
3792 pgrp = hazard_guard_acquire(g, &p->p_pgrp);
3793 success = pgrp == PGRP_NULL || pg_ref_try(pgrp);
3794 hazard_guard_put(g);
3795
3796 if (__improbable(!success)) {
3797 /*
3798 * We caught the process in the middle of pgrp_replace(),
3799 * go the slow, never failing way.
3800 */
3801 proc_list_lock();
3802 pgrp = pg_ref(hazard_ptr_serialized_load(&p->p_pgrp));
3803 proc_list_unlock();
3804 }
3805 }
3806
3807 if (sessp) {
3808 *sessp = pgrp ? pgrp->pg_session : SESSION_NULL;
3809 }
3810 return pgrp;
3811 }
3812
3813 struct pgrp *
tty_pgrp_locked(struct tty * tp)3814 tty_pgrp_locked(struct tty *tp)
3815 {
3816 struct pgrp *pg = PGRP_NULL;
3817
3818 /* either the tty_lock() or the proc_list_lock() must be held */
3819
3820 if (tp->t_pgrp) {
3821 pg = pg_ref(tp->t_pgrp);
3822 }
3823
3824 return pg;
3825 }
3826
3827 int
proc_transstart(proc_t p,int locked,int non_blocking)3828 proc_transstart(proc_t p, int locked, int non_blocking)
3829 {
3830 if (locked == 0) {
3831 proc_lock(p);
3832 }
3833 while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
3834 if (((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) || non_blocking) {
3835 if (locked == 0) {
3836 proc_unlock(p);
3837 }
3838 return EDEADLK;
3839 }
3840 p->p_lflag |= P_LTRANSWAIT;
3841 msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL);
3842 }
3843 p->p_lflag |= P_LINTRANSIT;
3844 p->p_transholder = current_thread();
3845 if (locked == 0) {
3846 proc_unlock(p);
3847 }
3848 return 0;
3849 }
3850
3851 void
proc_transcommit(proc_t p,int locked)3852 proc_transcommit(proc_t p, int locked)
3853 {
3854 if (locked == 0) {
3855 proc_lock(p);
3856 }
3857
3858 assert((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT);
3859 assert(p->p_transholder == current_thread());
3860 p->p_lflag |= P_LTRANSCOMMIT;
3861
3862 if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
3863 p->p_lflag &= ~P_LTRANSWAIT;
3864 wakeup(&p->p_lflag);
3865 }
3866 if (locked == 0) {
3867 proc_unlock(p);
3868 }
3869 }
3870
3871 void
proc_transend(proc_t p,int locked)3872 proc_transend(proc_t p, int locked)
3873 {
3874 if (locked == 0) {
3875 proc_lock(p);
3876 }
3877
3878 p->p_lflag &= ~(P_LINTRANSIT | P_LTRANSCOMMIT);
3879 p->p_transholder = NULL;
3880
3881 if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
3882 p->p_lflag &= ~P_LTRANSWAIT;
3883 wakeup(&p->p_lflag);
3884 }
3885 if (locked == 0) {
3886 proc_unlock(p);
3887 }
3888 }
3889
3890 int
proc_transwait(proc_t p,int locked)3891 proc_transwait(proc_t p, int locked)
3892 {
3893 if (locked == 0) {
3894 proc_lock(p);
3895 }
3896 while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
3897 if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT && current_proc() == p) {
3898 if (locked == 0) {
3899 proc_unlock(p);
3900 }
3901 return EDEADLK;
3902 }
3903 p->p_lflag |= P_LTRANSWAIT;
3904 msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL);
3905 }
3906 if (locked == 0) {
3907 proc_unlock(p);
3908 }
3909 return 0;
3910 }
3911
3912 void
proc_klist_lock(void)3913 proc_klist_lock(void)
3914 {
3915 lck_mtx_lock(&proc_klist_mlock);
3916 }
3917
3918 void
proc_klist_unlock(void)3919 proc_klist_unlock(void)
3920 {
3921 lck_mtx_unlock(&proc_klist_mlock);
3922 }
3923
3924 void
proc_knote(struct proc * p,long hint)3925 proc_knote(struct proc * p, long hint)
3926 {
3927 proc_klist_lock();
3928 KNOTE(&p->p_klist, hint);
3929 proc_klist_unlock();
3930 }
3931
3932 void
proc_knote_drain(struct proc * p)3933 proc_knote_drain(struct proc *p)
3934 {
3935 struct knote *kn = NULL;
3936
3937 /*
3938 * Clear the proc's klist to avoid references after the proc is reaped.
3939 */
3940 proc_klist_lock();
3941 while ((kn = SLIST_FIRST(&p->p_klist))) {
3942 kn->kn_proc = PROC_NULL;
3943 KNOTE_DETACH(&p->p_klist, kn);
3944 }
3945 proc_klist_unlock();
3946 }
3947
3948 void
proc_setregister(proc_t p)3949 proc_setregister(proc_t p)
3950 {
3951 proc_lock(p);
3952 p->p_lflag |= P_LREGISTER;
3953 proc_unlock(p);
3954 }
3955
3956 void
proc_resetregister(proc_t p)3957 proc_resetregister(proc_t p)
3958 {
3959 proc_lock(p);
3960 p->p_lflag &= ~P_LREGISTER;
3961 proc_unlock(p);
3962 }
3963
3964 bool
proc_get_pthread_jit_allowlist(proc_t p,bool * late_out)3965 proc_get_pthread_jit_allowlist(proc_t p, bool *late_out)
3966 {
3967 bool ret = false;
3968
3969 proc_lock(p);
3970 ret = (p->p_lflag & P_LPTHREADJITALLOWLIST);
3971 *late_out = (p->p_lflag & P_LPTHREADJITFREEZELATE);
3972 proc_unlock(p);
3973
3974 return ret;
3975 }
3976
3977 void
proc_set_pthread_jit_allowlist(proc_t p,bool late)3978 proc_set_pthread_jit_allowlist(proc_t p, bool late)
3979 {
3980 proc_lock(p);
3981 p->p_lflag |= P_LPTHREADJITALLOWLIST;
3982 if (late) {
3983 p->p_lflag |= P_LPTHREADJITFREEZELATE;
3984 }
3985 proc_unlock(p);
3986 }
3987
3988 pid_t
proc_pgrpid(proc_t p)3989 proc_pgrpid(proc_t p)
3990 {
3991 return p->p_pgrpid;
3992 }
3993
3994 pid_t
proc_sessionid(proc_t p)3995 proc_sessionid(proc_t p)
3996 {
3997 return p->p_sessionid;
3998 }
3999
4000 pid_t
proc_selfpgrpid()4001 proc_selfpgrpid()
4002 {
4003 return current_proc()->p_pgrpid;
4004 }
4005
4006
4007 /* return control and action states */
4008 int
proc_getpcontrol(int pid,int * pcontrolp)4009 proc_getpcontrol(int pid, int * pcontrolp)
4010 {
4011 proc_t p;
4012
4013 p = proc_find(pid);
4014 if (p == PROC_NULL) {
4015 return ESRCH;
4016 }
4017 if (pcontrolp != NULL) {
4018 *pcontrolp = p->p_pcaction;
4019 }
4020
4021 proc_rele(p);
4022 return 0;
4023 }
4024
4025 int
proc_dopcontrol(proc_t p)4026 proc_dopcontrol(proc_t p)
4027 {
4028 int pcontrol;
4029 os_reason_t kill_reason;
4030
4031 proc_lock(p);
4032
4033 pcontrol = PROC_CONTROL_STATE(p);
4034
4035 if (PROC_ACTION_STATE(p) == 0) {
4036 switch (pcontrol) {
4037 case P_PCTHROTTLE:
4038 PROC_SETACTION_STATE(p);
4039 proc_unlock(p);
4040 printf("low swap: throttling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4041 break;
4042
4043 case P_PCSUSP:
4044 PROC_SETACTION_STATE(p);
4045 proc_unlock(p);
4046 printf("low swap: suspending pid %d (%s)\n", proc_getpid(p), p->p_comm);
4047 task_suspend(p->task);
4048 break;
4049
4050 case P_PCKILL:
4051 PROC_SETACTION_STATE(p);
4052 proc_unlock(p);
4053 printf("low swap: killing pid %d (%s)\n", proc_getpid(p), p->p_comm);
4054 kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4055 psignal_with_reason(p, SIGKILL, kill_reason);
4056 break;
4057
4058 default:
4059 proc_unlock(p);
4060 }
4061 } else {
4062 proc_unlock(p);
4063 }
4064
4065 return PROC_RETURNED;
4066 }
4067
4068
4069 /*
4070 * Resume a throttled or suspended process. This is an internal interface that's only
4071 * used by the user level code that presents the GUI when we run out of swap space and
4072 * hence is restricted to processes with superuser privileges.
4073 */
4074
4075 int
proc_resetpcontrol(int pid)4076 proc_resetpcontrol(int pid)
4077 {
4078 proc_t p;
4079 int pcontrol;
4080 int error;
4081 proc_t self = current_proc();
4082
4083 /* if the process has been validated to handle resource control or root is valid one */
4084 if (((self->p_lflag & P_LVMRSRCOWNER) == 0) && (error = suser(kauth_cred_get(), 0))) {
4085 return error;
4086 }
4087
4088 p = proc_find(pid);
4089 if (p == PROC_NULL) {
4090 return ESRCH;
4091 }
4092
4093 proc_lock(p);
4094
4095 pcontrol = PROC_CONTROL_STATE(p);
4096
4097 if (PROC_ACTION_STATE(p) != 0) {
4098 switch (pcontrol) {
4099 case P_PCTHROTTLE:
4100 PROC_RESETACTION_STATE(p);
4101 proc_unlock(p);
4102 printf("low swap: unthrottling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4103 break;
4104
4105 case P_PCSUSP:
4106 PROC_RESETACTION_STATE(p);
4107 proc_unlock(p);
4108 printf("low swap: resuming pid %d (%s)\n", proc_getpid(p), p->p_comm);
4109 task_resume(p->task);
4110 break;
4111
4112 case P_PCKILL:
4113 /* Huh? */
4114 PROC_SETACTION_STATE(p);
4115 proc_unlock(p);
4116 printf("low swap: attempt to unkill pid %d (%s) ignored\n", proc_getpid(p), p->p_comm);
4117 break;
4118
4119 default:
4120 proc_unlock(p);
4121 }
4122 } else {
4123 proc_unlock(p);
4124 }
4125
4126 proc_rele(p);
4127 return 0;
4128 }
4129
4130
4131
4132 struct no_paging_space {
4133 uint64_t pcs_max_size;
4134 uint64_t pcs_uniqueid;
4135 int pcs_pid;
4136 int pcs_proc_count;
4137 uint64_t pcs_total_size;
4138
4139 uint64_t npcs_max_size;
4140 uint64_t npcs_uniqueid;
4141 int npcs_pid;
4142 int npcs_proc_count;
4143 uint64_t npcs_total_size;
4144
4145 int apcs_proc_count;
4146 uint64_t apcs_total_size;
4147 };
4148
4149
4150 static int
proc_pcontrol_filter(proc_t p,void * arg)4151 proc_pcontrol_filter(proc_t p, void *arg)
4152 {
4153 struct no_paging_space *nps;
4154 uint64_t compressed;
4155
4156 nps = (struct no_paging_space *)arg;
4157
4158 compressed = get_task_compressed(p->task);
4159
4160 if (PROC_CONTROL_STATE(p)) {
4161 if (PROC_ACTION_STATE(p) == 0) {
4162 if (compressed > nps->pcs_max_size) {
4163 nps->pcs_pid = proc_getpid(p);
4164 nps->pcs_uniqueid = proc_uniqueid(p);
4165 nps->pcs_max_size = compressed;
4166 }
4167 nps->pcs_total_size += compressed;
4168 nps->pcs_proc_count++;
4169 } else {
4170 nps->apcs_total_size += compressed;
4171 nps->apcs_proc_count++;
4172 }
4173 } else {
4174 if (compressed > nps->npcs_max_size) {
4175 nps->npcs_pid = proc_getpid(p);
4176 nps->npcs_uniqueid = proc_uniqueid(p);
4177 nps->npcs_max_size = compressed;
4178 }
4179 nps->npcs_total_size += compressed;
4180 nps->npcs_proc_count++;
4181 }
4182 return 0;
4183 }
4184
4185
4186 static int
proc_pcontrol_null(__unused proc_t p,__unused void * arg)4187 proc_pcontrol_null(__unused proc_t p, __unused void *arg)
4188 {
4189 return PROC_RETURNED;
4190 }
4191
4192
4193 /*
4194 * Deal with the low on compressor pool space condition... this function
4195 * gets called when we are approaching the limits of the compressor pool or
4196 * we are unable to create a new swap file.
4197 * Since this eventually creates a memory deadlock situtation, we need to take action to free up
4198 * memory resources (both compressed and uncompressed) in order to prevent the system from hanging completely.
4199 * There are 2 categories of processes to deal with. Those that have an action
4200 * associated with them by the task itself and those that do not. Actionable
4201 * tasks can have one of three categories specified: ones that
4202 * can be killed immediately, ones that should be suspended, and ones that should
4203 * be throttled. Processes that do not have an action associated with them are normally
4204 * ignored unless they are utilizing such a large percentage of the compressor pool (currently 50%)
4205 * that only by killing them can we hope to put the system back into a usable state.
4206 */
4207
4208 #define NO_PAGING_SPACE_DEBUG 0
4209
4210 extern uint64_t vm_compressor_pages_compressed(void);
4211
4212 struct timeval last_no_space_action = {.tv_sec = 0, .tv_usec = 0};
4213
4214 #define MB_SIZE (1024 * 1024ULL)
4215 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
4216
4217 extern int32_t max_kill_priority;
4218
4219 int
no_paging_space_action()4220 no_paging_space_action()
4221 {
4222 proc_t p;
4223 struct no_paging_space nps;
4224 struct timeval now;
4225 os_reason_t kill_reason;
4226
4227 /*
4228 * Throttle how often we come through here. Once every 5 seconds should be plenty.
4229 */
4230 microtime(&now);
4231
4232 if (now.tv_sec <= last_no_space_action.tv_sec + 5) {
4233 return 0;
4234 }
4235
4236 /*
4237 * Examine all processes and find the biggest (biggest is based on the number of pages this
4238 * task has in the compressor pool) that has been marked to have some action
4239 * taken when swap space runs out... we also find the biggest that hasn't been marked for
4240 * action.
4241 *
4242 * If the biggest non-actionable task is over the "dangerously big" threashold (currently 50% of
4243 * the total number of pages held by the compressor, we go ahead and kill it since no other task
4244 * can have any real effect on the situation. Otherwise, we go after the actionable process.
4245 */
4246 bzero(&nps, sizeof(nps));
4247
4248 proc_iterate(PROC_ALLPROCLIST, proc_pcontrol_null, (void *)NULL, proc_pcontrol_filter, (void *)&nps);
4249
4250 #if NO_PAGING_SPACE_DEBUG
4251 printf("low swap: npcs_proc_count = %d, npcs_total_size = %qd, npcs_max_size = %qd\n",
4252 nps.npcs_proc_count, nps.npcs_total_size, nps.npcs_max_size);
4253 printf("low swap: pcs_proc_count = %d, pcs_total_size = %qd, pcs_max_size = %qd\n",
4254 nps.pcs_proc_count, nps.pcs_total_size, nps.pcs_max_size);
4255 printf("low swap: apcs_proc_count = %d, apcs_total_size = %qd\n",
4256 nps.apcs_proc_count, nps.apcs_total_size);
4257 #endif
4258 if (nps.npcs_max_size > (vm_compressor_pages_compressed() * 50) / 100) {
4259 /*
4260 * for now we'll knock out any task that has more then 50% of the pages
4261 * held by the compressor
4262 */
4263 if ((p = proc_find(nps.npcs_pid)) != PROC_NULL) {
4264 if (nps.npcs_uniqueid == proc_uniqueid(p)) {
4265 /*
4266 * verify this is still the same process
4267 * in case the proc exited and the pid got reused while
4268 * we were finishing the proc_iterate and getting to this point
4269 */
4270 last_no_space_action = now;
4271
4272 printf("low swap: killing largest compressed process with pid %d (%s) and size %llu MB\n", proc_getpid(p), p->p_comm, (nps.pcs_max_size / MB_SIZE));
4273 kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4274 psignal_with_reason(p, SIGKILL, kill_reason);
4275
4276 proc_rele(p);
4277
4278 return 0;
4279 }
4280
4281 proc_rele(p);
4282 }
4283 }
4284
4285 /*
4286 * We have some processes within our jetsam bands of consideration and hence can be killed.
4287 * So we will invoke the memorystatus thread to go ahead and kill something.
4288 */
4289 if (memorystatus_get_proccnt_upto_priority(max_kill_priority) > 0) {
4290 last_no_space_action = now;
4291 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
4292 return 1;
4293 }
4294
4295 /*
4296 * No eligible processes to kill. So let's suspend/kill the largest
4297 * process depending on its policy control specifications.
4298 */
4299
4300 if (nps.pcs_max_size > 0) {
4301 if ((p = proc_find(nps.pcs_pid)) != PROC_NULL) {
4302 if (nps.pcs_uniqueid == proc_uniqueid(p)) {
4303 /*
4304 * verify this is still the same process
4305 * in case the proc exited and the pid got reused while
4306 * we were finishing the proc_iterate and getting to this point
4307 */
4308 last_no_space_action = now;
4309
4310 proc_dopcontrol(p);
4311
4312 proc_rele(p);
4313
4314 return 1;
4315 }
4316
4317 proc_rele(p);
4318 }
4319 }
4320 last_no_space_action = now;
4321
4322 printf("low swap: unable to find any eligible processes to take action on\n");
4323
4324 return 0;
4325 }
4326
4327 int
proc_trace_log(__unused proc_t p,struct proc_trace_log_args * uap,__unused int * retval)4328 proc_trace_log(__unused proc_t p, struct proc_trace_log_args *uap, __unused int *retval)
4329 {
4330 int ret = 0;
4331 proc_t target_proc = PROC_NULL;
4332 pid_t target_pid = uap->pid;
4333 uint64_t target_uniqueid = uap->uniqueid;
4334 task_t target_task = NULL;
4335
4336 if (priv_check_cred(kauth_cred_get(), PRIV_PROC_TRACE_INSPECT, 0)) {
4337 ret = EPERM;
4338 goto out;
4339 }
4340 target_proc = proc_find(target_pid);
4341 if (target_proc != PROC_NULL) {
4342 if (target_uniqueid != proc_uniqueid(target_proc)) {
4343 ret = ENOENT;
4344 goto out;
4345 }
4346
4347 target_task = proc_task(target_proc);
4348 if (task_send_trace_memory(target_task, target_pid, target_uniqueid)) {
4349 ret = EINVAL;
4350 goto out;
4351 }
4352 } else {
4353 ret = ENOENT;
4354 }
4355
4356 out:
4357 if (target_proc != PROC_NULL) {
4358 proc_rele(target_proc);
4359 }
4360 return ret;
4361 }
4362
4363 #if VM_SCAN_FOR_SHADOW_CHAIN
4364 extern int vm_map_shadow_max(vm_map_t map);
4365 int proc_shadow_max(void);
4366 int
proc_shadow_max(void)4367 proc_shadow_max(void)
4368 {
4369 int retval, max;
4370 proc_t p;
4371 task_t task;
4372 vm_map_t map;
4373
4374 max = 0;
4375 proc_list_lock();
4376 for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) {
4377 if (p->p_stat == SIDL) {
4378 continue;
4379 }
4380 task = p->task;
4381 if (task == NULL) {
4382 continue;
4383 }
4384 map = get_task_map(task);
4385 if (map == NULL) {
4386 continue;
4387 }
4388 retval = vm_map_shadow_max(map);
4389 if (retval > max) {
4390 max = retval;
4391 }
4392 }
4393 proc_list_unlock();
4394 return max;
4395 }
4396 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
4397
4398 void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid);
4399 void
proc_set_responsible_pid(proc_t target_proc,pid_t responsible_pid)4400 proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid)
4401 {
4402 if (target_proc != NULL) {
4403 target_proc->p_responsible_pid = responsible_pid;
4404 }
4405 return;
4406 }
4407
4408 int
proc_chrooted(proc_t p)4409 proc_chrooted(proc_t p)
4410 {
4411 int retval = 0;
4412
4413 if (p) {
4414 proc_fdlock(p);
4415 retval = (p->p_fd.fd_rdir != NULL) ? 1 : 0;
4416 proc_fdunlock(p);
4417 }
4418
4419 return retval;
4420 }
4421
4422 boolean_t
proc_send_synchronous_EXC_RESOURCE(proc_t p)4423 proc_send_synchronous_EXC_RESOURCE(proc_t p)
4424 {
4425 if (p == PROC_NULL) {
4426 return FALSE;
4427 }
4428
4429 /* Send sync EXC_RESOURCE if the process is traced */
4430 if (ISSET(p->p_lflag, P_LTRACED)) {
4431 return TRUE;
4432 }
4433 return FALSE;
4434 }
4435
4436 #if CONFIG_MACF
4437 size_t
proc_get_syscall_filter_mask_size(int which)4438 proc_get_syscall_filter_mask_size(int which)
4439 {
4440 switch (which) {
4441 case SYSCALL_MASK_UNIX:
4442 return nsysent;
4443 case SYSCALL_MASK_MACH:
4444 return mach_trap_count;
4445 case SYSCALL_MASK_KOBJ:
4446 return mach_kobj_count;
4447 default:
4448 return 0;
4449 }
4450 }
4451
4452 int
proc_set_syscall_filter_mask(proc_t p,int which,unsigned char * maskptr,size_t masklen)4453 proc_set_syscall_filter_mask(proc_t p, int which, unsigned char *maskptr, size_t masklen)
4454 {
4455 #if DEVELOPMENT || DEBUG
4456 if (syscallfilter_disable) {
4457 printf("proc_set_syscall_filter_mask: attempt to set policy for pid %d, but disabled by boot-arg\n", proc_pid(p));
4458 return 0;
4459 }
4460 #endif // DEVELOPMENT || DEBUG
4461
4462 switch (which) {
4463 case SYSCALL_MASK_UNIX:
4464 if (maskptr != NULL && masklen != nsysent) {
4465 return EINVAL;
4466 }
4467 proc_syscall_filter_mask_set(p, maskptr);
4468 break;
4469 case SYSCALL_MASK_MACH:
4470 if (maskptr != NULL && masklen != (size_t)mach_trap_count) {
4471 return EINVAL;
4472 }
4473 mac_task_set_mach_filter_mask(p->task, maskptr);
4474 break;
4475 case SYSCALL_MASK_KOBJ:
4476 if (maskptr != NULL && masklen != (size_t)mach_kobj_count) {
4477 return EINVAL;
4478 }
4479 mac_task_set_kobj_filter_mask(p->task, maskptr);
4480 break;
4481 default:
4482 return EINVAL;
4483 }
4484
4485 return 0;
4486 }
4487
4488 int
proc_set_syscall_filter_callbacks(syscall_filter_cbs_t cbs)4489 proc_set_syscall_filter_callbacks(syscall_filter_cbs_t cbs)
4490 {
4491 if (cbs->version != SYSCALL_FILTER_CALLBACK_VERSION) {
4492 return EINVAL;
4493 }
4494
4495 /* XXX register unix filter callback instead of using MACF hook. */
4496
4497 if (cbs->mach_filter_cbfunc || cbs->kobj_filter_cbfunc) {
4498 if (mac_task_register_filter_callbacks(cbs->mach_filter_cbfunc,
4499 cbs->kobj_filter_cbfunc) != 0) {
4500 return EPERM;
4501 }
4502 }
4503
4504 return 0;
4505 }
4506
4507 int
proc_set_syscall_filter_index(int which,int num,int index)4508 proc_set_syscall_filter_index(int which, int num, int index)
4509 {
4510 switch (which) {
4511 case SYSCALL_MASK_KOBJ:
4512 if (ipc_kobject_set_kobjidx(num, index) != 0) {
4513 return ENOENT;
4514 }
4515 break;
4516 default:
4517 return EINVAL;
4518 }
4519
4520 return 0;
4521 }
4522 #endif /* CONFIG_MACF */
4523
4524 int
proc_set_filter_message_flag(proc_t p,boolean_t flag)4525 proc_set_filter_message_flag(proc_t p, boolean_t flag)
4526 {
4527 if (p == PROC_NULL) {
4528 return EINVAL;
4529 }
4530
4531 task_set_filter_msg_flag(proc_task(p), flag);
4532
4533 return 0;
4534 }
4535
4536 int
proc_get_filter_message_flag(proc_t p,boolean_t * flag)4537 proc_get_filter_message_flag(proc_t p, boolean_t *flag)
4538 {
4539 if (p == PROC_NULL || flag == NULL) {
4540 return EINVAL;
4541 }
4542
4543 *flag = task_get_filter_msg_flag(proc_task(p));
4544
4545 return 0;
4546 }
4547
4548 bool
proc_is_traced(proc_t p)4549 proc_is_traced(proc_t p)
4550 {
4551 bool ret = FALSE;
4552 assert(p != PROC_NULL);
4553 proc_lock(p);
4554 if (p->p_lflag & P_LTRACED) {
4555 ret = TRUE;
4556 }
4557 proc_unlock(p);
4558 return ret;
4559 }
4560
4561 #ifdef CONFIG_32BIT_TELEMETRY
4562 void
proc_log_32bit_telemetry(proc_t p)4563 proc_log_32bit_telemetry(proc_t p)
4564 {
4565 /* Gather info */
4566 char signature_buf[MAX_32BIT_EXEC_SIG_SIZE] = { 0 };
4567 char * signature_cur_end = &signature_buf[0];
4568 char * signature_buf_end = &signature_buf[MAX_32BIT_EXEC_SIG_SIZE - 1];
4569 int bytes_printed = 0;
4570
4571 const char * teamid = NULL;
4572 const char * identity = NULL;
4573 struct cs_blob * csblob = NULL;
4574
4575 proc_list_lock();
4576
4577 /*
4578 * Get proc name and parent proc name; if the parent execs, we'll get a
4579 * garbled name.
4580 */
4581 bytes_printed = scnprintf(signature_cur_end,
4582 signature_buf_end - signature_cur_end,
4583 "%s,%s,", p->p_name,
4584 (p->p_pptr ? p->p_pptr->p_name : ""));
4585
4586 if (bytes_printed > 0) {
4587 signature_cur_end += bytes_printed;
4588 }
4589
4590 proc_list_unlock();
4591
4592 /* Get developer info. */
4593 vnode_t v = proc_getexecutablevnode(p);
4594
4595 if (v) {
4596 csblob = csvnode_get_blob(v, 0);
4597
4598 if (csblob) {
4599 teamid = csblob_get_teamid(csblob);
4600 identity = csblob_get_identity(csblob);
4601 }
4602 }
4603
4604 if (teamid == NULL) {
4605 teamid = "";
4606 }
4607
4608 if (identity == NULL) {
4609 identity = "";
4610 }
4611
4612 bytes_printed = scnprintf(signature_cur_end,
4613 signature_buf_end - signature_cur_end,
4614 "%s,%s", teamid, identity);
4615
4616 if (bytes_printed > 0) {
4617 signature_cur_end += bytes_printed;
4618 }
4619
4620 if (v) {
4621 vnode_put(v);
4622 }
4623
4624 /*
4625 * We may want to rate limit here, although the SUMMARIZE key should
4626 * help us aggregate events in userspace.
4627 */
4628
4629 /* Emit log */
4630 kern_asl_msg(LOG_DEBUG, "messagetracer", 3,
4631 /* 0 */ "com.apple.message.domain", "com.apple.kernel.32bit_exec",
4632 /* 1 */ "com.apple.message.signature", signature_buf,
4633 /* 2 */ "com.apple.message.summarize", "YES",
4634 NULL);
4635 }
4636 #endif /* CONFIG_32BIT_TELEMETRY */
4637
4638 #if CONFIG_PROC_RESOURCE_LIMITS
4639 int
proc_set_filedesc_limits(proc_t p,int soft_limit,int hard_limit)4640 proc_set_filedesc_limits(proc_t p, int soft_limit, int hard_limit)
4641 {
4642 struct filedesc *fdp = &p->p_fd;
4643 int retval = 0;
4644
4645 proc_fdlock(p);
4646
4647 if (hard_limit > 0) {
4648 if (soft_limit >= hard_limit) {
4649 soft_limit = 0;
4650 }
4651 }
4652 fdp->fd_nfiles_soft_limit = soft_limit;
4653 fdp->fd_nfiles_hard_limit = hard_limit;
4654 /* Make sure that current fd_nfiles hasn't already exceeded these limits */
4655 fd_check_limit_exceeded(fdp);
4656
4657 proc_fdunlock(p);
4658
4659 return retval;
4660 }
4661 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
4662
4663 void
proc_filedesc_ast(__unused task_t task)4664 proc_filedesc_ast(__unused task_t task)
4665 {
4666 #if CONFIG_PROC_RESOURCE_LIMITS
4667 int current_size, soft_limit, hard_limit;
4668 assert(task == current_task());
4669 proc_t p = get_bsdtask_info(task);
4670 struct filedesc *fdp = &p->p_fd;
4671
4672 proc_fdlock(p);
4673 current_size = fdp->fd_nfiles_open;
4674 hard_limit = fdp->fd_nfiles_hard_limit;
4675 soft_limit = fdp->fd_nfiles_soft_limit;
4676
4677 /*
4678 * Check if the thread sending the soft limit notification arrives after
4679 * the one that sent the hard limit notification
4680 */
4681
4682 if (hard_limit > 0 && current_size >= hard_limit) {
4683 if (fd_hard_limit_already_notified(fdp)) {
4684 soft_limit = hard_limit = 0;
4685 } else {
4686 fd_hard_limit_notified(fdp);
4687 soft_limit = 0;
4688 }
4689 } else if (soft_limit > 0 && current_size >= soft_limit) {
4690 if (fd_soft_limit_already_notified(fdp)) {
4691 soft_limit = hard_limit = 0;
4692 } else {
4693 fd_soft_limit_notified(fdp);
4694 hard_limit = 0;
4695 }
4696 }
4697
4698 proc_fdunlock(p);
4699
4700 if (hard_limit || soft_limit) {
4701 task_filedesc_ast(task, current_size, soft_limit, hard_limit);
4702 }
4703 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
4704 }
4705
4706 proc_ro_t
proc_ro_alloc(proc_t p,proc_ro_data_t p_data,task_t t,task_ro_data_t t_data)4707 proc_ro_alloc(proc_t p, proc_ro_data_t p_data, task_t t, task_ro_data_t t_data)
4708 {
4709 proc_ro_t pr;
4710 struct proc_ro pr_local = {};
4711
4712 pr = (proc_ro_t)zalloc_ro(ZONE_ID_PROC_RO, Z_WAITOK | Z_NOFAIL | Z_ZERO);
4713
4714 if (p != PROC_NULL) {
4715 pr_local.pr_proc = p;
4716 pr_local.proc_data = *p_data;
4717 }
4718
4719 if (t != TASK_NULL) {
4720 pr_local.pr_task = t;
4721 pr_local.task_data = *t_data;
4722 }
4723
4724 if ((p != PROC_NULL) || (t != TASK_NULL)) {
4725 zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
4726 }
4727
4728 return pr;
4729 }
4730
4731 void
proc_ro_free(proc_ro_t pr)4732 proc_ro_free(proc_ro_t pr)
4733 {
4734 zfree_ro(ZONE_ID_PROC_RO, pr);
4735 }
4736
4737 static proc_ro_t
proc_ro_ref_proc(proc_ro_t pr,proc_t p,proc_ro_data_t p_data)4738 proc_ro_ref_proc(proc_ro_t pr, proc_t p, proc_ro_data_t p_data)
4739 {
4740 struct proc_ro pr_local;
4741
4742 if (pr->pr_proc != PROC_NULL) {
4743 panic("%s: proc_ro already has an owning proc", __func__);
4744 }
4745
4746 pr_local = *pr;
4747 pr_local.pr_proc = p;
4748 pr_local.proc_data = *p_data;
4749
4750 /* make sure readers of the proc_ro always see initialized data */
4751 os_atomic_thread_fence(release);
4752 zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
4753
4754 return pr;
4755 }
4756
4757 proc_ro_t
proc_ro_ref_task(proc_ro_t pr,task_t t,task_ro_data_t t_data)4758 proc_ro_ref_task(proc_ro_t pr, task_t t, task_ro_data_t t_data)
4759 {
4760 struct proc_ro pr_local;
4761
4762 if (pr->pr_task != TASK_NULL) {
4763 panic("%s: proc_ro already has an owning task", __func__);
4764 }
4765
4766 pr_local = *pr;
4767 pr_local.pr_task = t;
4768 pr_local.task_data = *t_data;
4769
4770 zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
4771
4772 return pr;
4773 }
4774
4775 void
proc_switch_ro(proc_t p,proc_ro_t new_ro)4776 proc_switch_ro(proc_t p, proc_ro_t new_ro)
4777 {
4778 proc_ro_t old_ro;
4779
4780 proc_list_lock();
4781
4782 old_ro = p->p_proc_ro;
4783
4784 p->p_proc_ro = proc_ro_ref_proc(new_ro, p, &old_ro->proc_data);
4785 old_ro = proc_ro_release_proc(old_ro);
4786
4787 if (old_ro != NULL) {
4788 proc_ro_free(old_ro);
4789 }
4790
4791 proc_list_unlock();
4792 }
4793
4794 proc_ro_t
proc_ro_release_proc(proc_ro_t pr)4795 proc_ro_release_proc(proc_ro_t pr)
4796 {
4797 /*
4798 * No need to take a lock in here: when called in the racy case
4799 * (reap_child_locked vs task_deallocate_internal), the proc list is
4800 * already held. All other callsites are not racy.
4801 */
4802 if (pr->pr_task == TASK_NULL) {
4803 /* We're dropping the last ref. */
4804 return pr;
4805 } else if (pr->pr_proc != PROC_NULL) {
4806 /* Task still has a ref, so just clear the proc owner. */
4807 zalloc_ro_clear_field(ZONE_ID_PROC_RO, pr, pr_proc);
4808 }
4809
4810 return NULL;
4811 }
4812
4813 proc_ro_t
proc_ro_release_task(proc_ro_t pr)4814 proc_ro_release_task(proc_ro_t pr)
4815 {
4816 /*
4817 * We take the proc list here to avoid a race between a child proc being
4818 * reaped and the associated task being deallocated.
4819 *
4820 * This function is only ever called in the racy case
4821 * (task_deallocate_internal), so we always take the lock.
4822 */
4823 proc_list_lock();
4824
4825 if (pr->pr_proc == PROC_NULL) {
4826 /* We're dropping the last ref. */
4827 proc_list_unlock();
4828 return pr;
4829 } else if (pr->pr_task != TASK_NULL) {
4830 /* Proc still has a ref, so just clear the task owner. */
4831 zalloc_ro_clear_field(ZONE_ID_PROC_RO, pr, pr_task);
4832 }
4833
4834 proc_list_unlock();
4835 return NULL;
4836 }
4837
4838 __abortlike
4839 static void
panic_proc_ro_proc_backref_mismatch(proc_t p,proc_ro_t ro)4840 panic_proc_ro_proc_backref_mismatch(proc_t p, proc_ro_t ro)
4841 {
4842 panic("proc_ro->proc backref mismatch: p=%p, ro=%p, "
4843 "proc_ro_proc(ro)=%p", p, ro, proc_ro_proc(ro));
4844 }
4845
4846 proc_ro_t
proc_get_ro(proc_t p)4847 proc_get_ro(proc_t p)
4848 {
4849 proc_ro_t ro = p->p_proc_ro;
4850
4851 zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
4852 if (__improbable(proc_ro_proc(ro) != p)) {
4853 panic_proc_ro_proc_backref_mismatch(p, ro);
4854 }
4855
4856 return ro;
4857 }
4858
4859 proc_ro_t
current_proc_ro(void)4860 current_proc_ro(void)
4861 {
4862 if (__improbable(current_thread_ro()->tro_proc == NULL)) {
4863 return kernproc->p_proc_ro;
4864 }
4865
4866 return current_thread_ro()->tro_proc_ro;
4867 }
4868
4869 proc_t
proc_ro_proc(proc_ro_t pr)4870 proc_ro_proc(proc_ro_t pr)
4871 {
4872 return pr->pr_proc;
4873 }
4874
4875 task_t
proc_ro_task(proc_ro_t pr)4876 proc_ro_task(proc_ro_t pr)
4877 {
4878 return pr->pr_task;
4879 }
4880