1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 /*-
37 * Copyright (c) 1982, 1986, 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 * must display the following acknowledgement:
55 * This product includes software developed by the University of
56 * California, Berkeley and its contributors.
57 * 4. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74 */
75 /*
76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77 * support for mandatory and extensible security protections. This notice
78 * is included in support of clause 2.2 (b) of the Apple Public License,
79 * Version 2.0.
80 */
81 #include <machine/reg.h>
82 #include <machine/cpu_capabilities.h>
83
84 #include <sys/cdefs.h>
85 #include <sys/param.h>
86 #include <sys/systm.h>
87 #include <sys/filedesc.h>
88 #include <sys/kernel.h>
89 #include <sys/proc_internal.h>
90 #include <sys/kauth.h>
91 #include <sys/user.h>
92 #include <sys/socketvar.h>
93 #include <sys/malloc.h>
94 #include <sys/namei.h>
95 #include <sys/mount_internal.h>
96 #include <sys/vnode_internal.h>
97 #include <sys/file_internal.h>
98 #include <sys/stat.h>
99 #include <sys/uio_internal.h>
100 #include <sys/acct.h>
101 #include <sys/exec.h>
102 #include <sys/kdebug.h>
103 #include <sys/signal.h>
104 #include <sys/aio_kern.h>
105 #include <sys/lockdown_mode.h>
106 #include <sys/sysproto.h>
107 #include <sys/sysctl.h>
108 #include <sys/persona.h>
109 #include <sys/reason.h>
110 #if SYSV_SHM
111 #include <sys/shm_internal.h> /* shmexec() */
112 #endif
113 #include <sys/ubc_internal.h> /* ubc_map() */
114 #include <sys/spawn.h>
115 #include <sys/spawn_internal.h>
116 #include <sys/process_policy.h>
117 #include <sys/codesign.h>
118 #include <sys/random.h>
119 #include <crypto/sha1.h>
120
121 #include <libkern/libkern.h>
122 #include <libkern/amfi/amfi.h>
123 #include <libkern/crypto/sha2.h>
124 #include <security/audit/audit.h>
125
126 #include <ipc/ipc_types.h>
127
128 #include <mach/mach_param.h>
129 #include <mach/mach_types.h>
130 #include <mach/port.h>
131 #include <mach/task.h>
132 #include <mach/task_access.h>
133 #include <mach/thread_act.h>
134 #include <mach/vm_map.h>
135 #include <mach/mach_vm.h>
136 #include <mach/vm_param.h>
137 #include <mach_debug/mach_debug_types.h>
138
139 #include <kern/sched_prim.h> /* thread_wakeup() */
140 #include <kern/affinity.h>
141 #include <kern/assert.h>
142 #include <kern/ipc_kobject.h>
143 #include <kern/task.h>
144 #include <kern/thread.h>
145 #include <kern/coalition.h>
146 #include <kern/policy_internal.h>
147 #include <kern/kalloc.h>
148 #include <kern/zalloc.h> /* zone_userspace_reboot_checks() */
149
150 #include <os/log.h>
151
152 #if CONFIG_MACF
153 #include <security/mac_framework.h>
154 #include <security/mac_mach_internal.h>
155 #endif
156
157 #if CONFIG_AUDIT
158 #include <bsm/audit_kevents.h>
159 #endif
160
161 #if CONFIG_ARCADE
162 #include <kern/arcade.h>
163 #endif
164
165 #include <vm/vm_map_xnu.h>
166 #include <vm/vm_kern_xnu.h>
167 #include <vm/vm_protos.h>
168 #include <vm/vm_fault.h>
169 #include <vm/vm_pageout_xnu.h>
170 #include <vm/pmap.h>
171 #include <vm/vm_reclaim_xnu.h>
172
173 #include <kdp/kdp_dyld.h>
174
175 #include <machine/machine_routines.h>
176 #include <machine/pal_routines.h>
177
178 #include <pexpert/pexpert.h>
179 #include <pexpert/device_tree.h>
180
181 #if CONFIG_MEMORYSTATUS
182 #include <sys/kern_memorystatus.h>
183 #endif
184
185 #include <IOKit/IOBSD.h>
186 #include <IOKit/IOKitKeys.h> /* kIODriverKitEntitlementKey */
187
188 #include "kern_exec_internal.h"
189
190 #include <CodeSignature/Entitlements.h>
191
192 #include <mach/exclaves.h>
193
194 #if HAS_MTE
195 #include <arm64/mte_xnu.h>
196 #endif /* HAS_MTE */
197
198 extern boolean_t vm_darkwake_mode;
199
200 /* enable crash reports on various exec failures */
201 static TUNABLE(bool, bootarg_execfailurereports, "execfailurecrashes", false);
202
203 #if XNU_TARGET_OS_OSX
204 #if __has_feature(ptrauth_calls)
205 static TUNABLE(bool, bootarg_arm64e_preview_abi, "-arm64e_preview_abi", false);
206 #endif /* __has_feature(ptrauth_calls) */
207
208 #if DEBUG || DEVELOPMENT
209 static TUNABLE(bool, unentitled_ios_sim_launch, "unentitled_ios_sim_launch", false);
210 #endif /* DEBUG || DEVELOPMENT */
211 #endif /* XNU_TARGET_OS_OSX */
212
213 #if DEVELOPMENT || DEBUG
214 os_log_t exec_log_handle = NULL;
215 #define EXEC_LOG(fmt, ...) \
216 do { \
217 if (exec_log_handle) { \
218 os_log_with_type(exec_log_handle, OS_LOG_TYPE_INFO, "exec - %s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \
219 } \
220 } while (0)
221 #else /* DEVELOPMENT || DEBUG */
222 #define EXEC_LOG(fmt, ...) do { } while (0)
223 #endif /* DEVELOPMENT || DEBUG */
224
225 #if CONFIG_DTRACE
226 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
227 extern void dtrace_proc_exec(proc_t);
228 extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
229
230 /*
231 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
232 * we will store its value before actually calling it.
233 */
234 static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
235
236 #include <sys/dtrace_ptss.h>
237 #endif
238
239 #if __has_feature(ptrauth_calls)
240 static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_per_team_id,
241 "vm_shared_region_per_team_id", 1);
242 static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_by_entitlement,
243 "vm_shared_region_by_entitlement", 1);
244
245 /* Upon userland request, reslide the shared cache. */
246 static TUNABLE_DEV_WRITEABLE(int, vm_shared_region_reslide_aslr,
247 "vm_shared_region_reslide_aslr",
248 #if CONFIG_RESLIDE_SHARED_CACHE
249 1
250 #else
251 0
252 #endif /* CONFIG_RESLIDE_SHARED_CACHE */
253 );
254
255 /*
256 * Flag to control what processes should get shared cache randomize resliding
257 * after a fault in the shared cache region:
258 *
259 * 0 - all processes get a new randomized slide
260 * 1 - only platform processes get a new randomized slide
261 */
262 TUNABLE_DEV_WRITEABLE(int, vm_shared_region_reslide_restrict,
263 "vm_shared_region_reslide_restrict", 1);
264
265 #if DEVELOPMENT || DEBUG
266 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_per_team_id,
267 CTLFLAG_RW, &vm_shared_region_per_team_id, 0, "");
268 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_by_entitlement,
269 CTLFLAG_RW, &vm_shared_region_by_entitlement, 0, "");
270 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_restrict,
271 CTLFLAG_RW, &vm_shared_region_reslide_restrict, 0, "");
272 SYSCTL_INT(_vm, OID_AUTO, vm_shared_region_reslide_aslr,
273 CTLFLAG_RW, &vm_shared_region_reslide_aslr, 0, "");
274 #endif
275 #endif /* __has_feature(ptrauth_calls) */
276
277 #if DEVELOPMENT || DEBUG
278 static TUNABLE(bool, enable_dext_coredumps_on_panic, "dext_panic_coredump", true);
279 #else
280 static TUNABLE(bool, enable_dext_coredumps_on_panic, "dext_panic_coredump", false);
281 #endif
282 extern kern_return_t kern_register_userspace_coredump(task_t task, const char * name);
283 #define USERSPACE_COREDUMP_PANIC_ENTITLEMENT "com.apple.private.enable-coredump-on-panic"
284 #define USERSPACE_COREDUMP_PANIC_SEED_ENTITLEMENT \
285 "com.apple.private.enable-coredump-on-panic-seed-privacy-approved"
286
287 extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
288 extern void task_set_did_exec_flag(task_t task);
289 extern void task_clear_exec_copy_flag(task_t task);
290 proc_t proc_exec_switch_task(proc_t old_proc, proc_t new_proc, task_t old_task,
291 task_t new_task, struct image_params *imgp, void **inherit);
292 boolean_t task_is_active(task_t);
293 boolean_t thread_is_active(thread_t thread);
294 void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
295 void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
296 extern void ipc_importance_release(void *elem);
297 extern boolean_t task_has_watchports(task_t task);
298 extern void task_set_no_smt(task_t task);
299 #if defined(HAS_APPLE_PAC)
300 char *task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid);
301 #endif
302 task_t convert_port_to_task(ipc_port_t port);
303
304 #if CONFIG_EXCLAVES
305 int task_add_conclave(task_t task, void *vnode, int64_t off, const char *task_conclave_id);
306 kern_return_t task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off);
307 #endif /* CONFIG_EXCLAVES */
308
309 /*
310 * Mach things for which prototypes are unavailable from Mach headers
311 */
312 extern void ipc_task_enable(task_t task);
313 extern void ipc_task_reset(task_t task);
314 extern void ipc_thread_reset(thread_t thread);
315
316 #if DEVELOPMENT || DEBUG
317 void task_importance_update_owner_info(task_t);
318 #endif
319
320 extern struct savearea *get_user_regs(thread_t);
321
322 __attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
323
324 #include <kern/thread.h>
325 #include <kern/task.h>
326 #include <kern/ast.h>
327 #include <kern/mach_loader.h>
328 #include <kern/mach_fat.h>
329 #include <mach-o/fat.h>
330 #include <mach-o/loader.h>
331 #include <machine/vmparam.h>
332 #include <sys/imgact.h>
333
334 #include <sys/sdt.h>
335
336
337 /*
338 * EAI_ITERLIMIT The maximum number of times to iterate an image
339 * activator in exec_activate_image() before treating
340 * it as malformed/corrupt.
341 */
342 #define EAI_ITERLIMIT 3
343
344 /*
345 * For #! interpreter parsing
346 */
347 #define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
348 #define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
349
350 extern vm_map_t bsd_pageable_map;
351 extern const struct fileops vnops;
352 extern int nextpidversion;
353
354
355 #define USER_ADDR_ALIGN(addr, val) \
356 ( ( (user_addr_t)(addr) + (val) - 1) \
357 & ~((val) - 1) )
358
359 /*
360 * For subsystem root support
361 */
362 #define SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT "com.apple.private.spawn-subsystem-root"
363
364 /*
365 * Allow setting p_crash_behavior to trigger panic on crash
366 */
367 #define SPAWN_SET_PANIC_CRASH_BEHAVIOR "com.apple.private.spawn-panic-crash-behavior"
368
369 /* Platform Code Exec Logging */
370 static int platform_exec_logging = 0;
371
372 SYSCTL_DECL(_security_mac);
373
374 SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, 0,
375 "log cdhashes for all platform binary executions");
376
377 static os_log_t peLog = OS_LOG_DEFAULT;
378
379
380 struct exception_port_action_t {
381 ipc_port_t port;
382 _ps_port_action_t *port_action;
383 };
384
385 struct exec_port_actions {
386 uint32_t exception_port_count;
387 uint32_t portwatch_count;
388 uint32_t registered_count;
389 struct exception_port_action_t *excport_array;
390 ipc_port_t *portwatch_array;
391 ipc_port_t registered_array[TASK_PORT_REGISTER_MAX];
392 };
393
394 struct image_params; /* Forward */
395 static int exec_activate_image(struct image_params *imgp);
396 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
397 static int load_return_to_errno(load_return_t lrtn);
398 static int execargs_alloc(struct image_params *imgp);
399 static int execargs_free(struct image_params *imgp);
400 static int exec_check_permissions(struct image_params *imgp);
401 static int exec_extract_strings(struct image_params *imgp);
402 static int exec_add_apple_strings(struct image_params *imgp, const load_result_t *load_result, task_t task);
403 static int exec_handle_sugid(struct image_params *imgp);
404 static int sugid_scripts = 0;
405 SYSCTL_INT(_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
406 static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
407 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
408 static void exec_resettextvp(proc_t, struct image_params *);
409 static int process_signature(proc_t, struct image_params *);
410 static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
411 static errno_t exec_handle_port_actions(struct image_params *imgp,
412 struct exec_port_actions *port_actions);
413 static errno_t exec_handle_exception_port_actions(const struct image_params *imgp,
414 const struct exec_port_actions *port_actions);
415 static errno_t exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
416 task_role_t psa_darwin_role, struct exec_port_actions *port_actions);
417 static void exec_port_actions_destroy(struct exec_port_actions *port_actions);
418
419 /*
420 * exec_add_user_string
421 *
422 * Add the requested string to the string space area.
423 *
424 * Parameters; struct image_params * image parameter block
425 * user_addr_t string to add to strings area
426 * int segment from which string comes
427 * boolean_t TRUE if string contributes to NCARGS
428 *
429 * Returns: 0 Success
430 * !0 Failure errno from copyinstr()
431 *
432 * Implicit returns:
433 * (imgp->ip_strendp) updated location of next add, if any
434 * (imgp->ip_strspace) updated byte count of space remaining
435 * (imgp->ip_argspace) updated byte count of space in NCARGS
436 */
437 __attribute__((noinline))
438 static int
exec_add_user_string(struct image_params * imgp,user_addr_t str,int seg,boolean_t is_ncargs)439 exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
440 {
441 int error = 0;
442
443 do {
444 size_t len = 0;
445 int space;
446
447 if (is_ncargs) {
448 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
449 } else {
450 space = imgp->ip_strspace;
451 }
452
453 if (space <= 0) {
454 error = E2BIG;
455 break;
456 }
457
458 if (!UIO_SEG_IS_USER_SPACE(seg)) {
459 char *kstr = CAST_DOWN(char *, str); /* SAFE */
460 error = copystr(kstr, imgp->ip_strendp, space, &len);
461 } else {
462 error = copyinstr(str, imgp->ip_strendp, space, &len);
463 }
464
465 imgp->ip_strendp += len;
466 imgp->ip_strspace -= len;
467 if (is_ncargs) {
468 imgp->ip_argspace -= len;
469 }
470 } while (error == ENAMETOOLONG);
471
472 return error;
473 }
474
475 /*
476 * dyld is now passed the executable path as a getenv-like variable
477 * in the same fashion as the stack_guard and malloc_entropy keys.
478 */
479 #define EXECUTABLE_KEY "executable_path="
480
481 /*
482 * exec_save_path
483 *
484 * To support new app package launching for Mac OS X, the dyld needs the
485 * first argument to execve() stored on the user stack.
486 *
487 * Save the executable path name at the bottom of the strings area and set
488 * the argument vector pointer to the location following that to indicate
489 * the start of the argument and environment tuples, setting the remaining
490 * string space count to the size of the string area minus the path length.
491 *
492 * Parameters; struct image_params * image parameter block
493 * char * path used to invoke program
494 * int segment from which path comes
495 *
496 * Returns: int 0 Success
497 * EFAULT Bad address
498 * copy[in]str:EFAULT Bad address
499 * copy[in]str:ENAMETOOLONG Filename too long
500 *
501 * Implicit returns:
502 * (imgp->ip_strings) saved path
503 * (imgp->ip_strspace) space remaining in ip_strings
504 * (imgp->ip_strendp) start of remaining copy area
505 * (imgp->ip_argspace) space remaining of NCARGS
506 * (imgp->ip_applec) Initial applev[0]
507 *
508 * Note: We have to do this before the initial namei() since in the
509 * path contains symbolic links, namei() will overwrite the
510 * original path buffer contents. If the last symbolic link
511 * resolved was a relative pathname, we would lose the original
512 * "path", which could be an absolute pathname. This might be
513 * unacceptable for dyld.
514 */
515 static int
exec_save_path(struct image_params * imgp,user_addr_t path,int seg,const char ** excpath)516 exec_save_path(struct image_params *imgp, user_addr_t path, int seg, const char **excpath)
517 {
518 int error;
519 size_t len;
520 char *kpath;
521
522 // imgp->ip_strings can come out of a cache, so we need to obliterate the
523 // old path.
524 memset(imgp->ip_strings, '\0', strlen(EXECUTABLE_KEY) + MAXPATHLEN);
525
526 len = MIN(MAXPATHLEN, imgp->ip_strspace);
527
528 switch (seg) {
529 case UIO_USERSPACE32:
530 case UIO_USERSPACE64: /* Same for copyin()... */
531 error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
532 break;
533 case UIO_SYSSPACE:
534 kpath = CAST_DOWN(char *, path); /* SAFE */
535 error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
536 break;
537 default:
538 error = EFAULT;
539 break;
540 }
541
542 if (!error) {
543 bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
544 len += strlen(EXECUTABLE_KEY);
545
546 imgp->ip_strendp += len;
547 imgp->ip_strspace -= len;
548
549 if (excpath) {
550 *excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
551 }
552 }
553
554 return error;
555 }
556
557 /*
558 * exec_reset_save_path
559 *
560 * If we detect a shell script, we need to reset the string area
561 * state so that the interpreter can be saved onto the stack.
562 *
563 * Parameters; struct image_params * image parameter block
564 *
565 * Returns: int 0 Success
566 *
567 * Implicit returns:
568 * (imgp->ip_strings) saved path
569 * (imgp->ip_strspace) space remaining in ip_strings
570 * (imgp->ip_strendp) start of remaining copy area
571 * (imgp->ip_argspace) space remaining of NCARGS
572 *
573 */
574 static int
exec_reset_save_path(struct image_params * imgp)575 exec_reset_save_path(struct image_params *imgp)
576 {
577 imgp->ip_strendp = imgp->ip_strings;
578 imgp->ip_argspace = NCARGS;
579 imgp->ip_strspace = (NCARGS + PAGE_SIZE);
580
581 return 0;
582 }
583
584 /*
585 * exec_shell_imgact
586 *
587 * Image activator for interpreter scripts. If the image begins with
588 * the characters "#!", then it is an interpreter script. Verify the
589 * length of the script line indicating the interpreter is not in
590 * excess of the maximum allowed size. If this is the case, then
591 * break out the arguments, if any, which are separated by white
592 * space, and copy them into the argument save area as if they were
593 * provided on the command line before all other arguments. The line
594 * ends when we encounter a comment character ('#') or newline.
595 *
596 * Parameters; struct image_params * image parameter block
597 *
598 * Returns: -1 not an interpreter (keep looking)
599 * -3 Success: interpreter: relookup
600 * >0 Failure: interpreter: error number
601 *
602 * A return value other than -1 indicates subsequent image activators should
603 * not be given the opportunity to attempt to activate the image.
604 */
605 static int
exec_shell_imgact(struct image_params * imgp)606 exec_shell_imgact(struct image_params *imgp)
607 {
608 char *vdata = imgp->ip_vdata;
609 char *ihp;
610 char *line_startp, *line_endp;
611 char *interp;
612
613 /*
614 * Make sure it's a shell script. If we've already redirected
615 * from an interpreted file once, don't do it again.
616 */
617 if (vdata[0] != '#' ||
618 vdata[1] != '!' ||
619 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
620 return -1;
621 }
622
623 if (imgp->ip_origcputype != 0) {
624 /* Fat header previously matched, don't allow shell script inside */
625 return -1;
626 }
627
628 imgp->ip_flags |= IMGPF_INTERPRET;
629 imgp->ip_interp_sugid_fd = -1;
630 imgp->ip_interp_buffer[0] = '\0';
631
632 /* Check to see if SUGID scripts are permitted. If they aren't then
633 * clear the SUGID bits.
634 * imgp->ip_vattr is known to be valid.
635 */
636 if (sugid_scripts == 0) {
637 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
638 }
639
640 /* Try to find the first non-whitespace character */
641 for (ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++) {
642 if (IS_EOL(*ihp)) {
643 /* Did not find interpreter, "#!\n" */
644 return ENOEXEC;
645 } else if (IS_WHITESPACE(*ihp)) {
646 /* Whitespace, like "#! /bin/sh\n", keep going. */
647 } else {
648 /* Found start of interpreter */
649 break;
650 }
651 }
652
653 if (ihp == &vdata[IMG_SHSIZE]) {
654 /* All whitespace, like "#! " */
655 return ENOEXEC;
656 }
657
658 line_startp = ihp;
659
660 /* Try to find the end of the interpreter+args string */
661 for (; ihp < &vdata[IMG_SHSIZE]; ihp++) {
662 if (IS_EOL(*ihp)) {
663 /* Got it */
664 break;
665 } else {
666 /* Still part of interpreter or args */
667 }
668 }
669
670 if (ihp == &vdata[IMG_SHSIZE]) {
671 /* A long line, like "#! blah blah blah" without end */
672 return ENOEXEC;
673 }
674
675 /* Backtrack until we find the last non-whitespace */
676 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
677 ihp--;
678 }
679
680 /* The character after the last non-whitespace is our logical end of line */
681 line_endp = ihp + 1;
682
683 /*
684 * Now we have pointers to the usable part of:
685 *
686 * "#! /usr/bin/int first second third \n"
687 * ^ line_startp ^ line_endp
688 */
689
690 /* copy the interpreter name */
691 interp = imgp->ip_interp_buffer;
692 for (ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++) {
693 *interp++ = *ihp;
694 }
695 *interp = '\0';
696
697 exec_reset_save_path(imgp);
698 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
699 UIO_SYSSPACE, NULL);
700
701 /* Copy the entire interpreter + args for later processing into argv[] */
702 interp = imgp->ip_interp_buffer;
703 for (ihp = line_startp; (ihp < line_endp); ihp++) {
704 *interp++ = *ihp;
705 }
706 *interp = '\0';
707
708 #if CONFIG_SETUID
709 /*
710 * If we have an SUID or SGID script, create a file descriptor
711 * from the vnode and pass /dev/fd/%d instead of the actual
712 * path name so that the script does not get opened twice
713 */
714 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
715 proc_t p;
716 struct fileproc *fp;
717 int fd;
718 int error;
719
720 p = vfs_context_proc(imgp->ip_vfs_context);
721 error = falloc_exec(p, imgp->ip_vfs_context, &fp, &fd);
722 if (error) {
723 return error;
724 }
725
726 fp->fp_glob->fg_flag = FREAD;
727 fp->fp_glob->fg_ops = &vnops;
728 fp_set_data(fp, imgp->ip_vp);
729
730 proc_fdlock(p);
731 procfdtbl_releasefd(p, fd, NULL);
732 fp_drop(p, fd, fp, 1);
733 proc_fdunlock(p);
734 vnode_ref(imgp->ip_vp);
735
736 imgp->ip_interp_sugid_fd = fd;
737 }
738 #endif /* CONFIG_SETUID */
739
740 return -3;
741 }
742
743
744
745 /*
746 * exec_fat_imgact
747 *
748 * Image activator for fat 1.0 binaries. If the binary is fat, then we
749 * need to select an image from it internally, and make that the image
750 * we are going to attempt to execute. At present, this consists of
751 * reloading the first page for the image with a first page from the
752 * offset location indicated by the fat header.
753 *
754 * Parameters; struct image_params * image parameter block
755 *
756 * Returns: -1 not a fat binary (keep looking)
757 * -2 Success: encapsulated binary: reread
758 * >0 Failure: error number
759 *
760 * Important: This image activator is byte order neutral.
761 *
762 * Note: A return value other than -1 indicates subsequent image
763 * activators should not be given the opportunity to attempt
764 * to activate the image.
765 *
766 * If we find an encapsulated binary, we make no assertions
767 * about its validity; instead, we leave that up to a rescan
768 * for an activator to claim it, and, if it is claimed by one,
769 * that activator is responsible for determining validity.
770 */
771 static int
exec_fat_imgact(struct image_params * imgp)772 exec_fat_imgact(struct image_params *imgp)
773 {
774 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
775 kauth_cred_t cred = kauth_cred_proc_ref(p);
776 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
777 struct _posix_spawnattr *psa = NULL;
778 struct fat_arch fat_arch;
779 int resid, error;
780 load_return_t lret;
781
782 if (imgp->ip_origcputype != 0) {
783 /* Fat header previously matched, don't allow another fat file inside */
784 error = -1; /* not claimed */
785 goto bad;
786 }
787
788 /* Make sure it's a fat binary */
789 if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
790 error = -1; /* not claimed */
791 goto bad;
792 }
793
794 /* imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller */
795 lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE,
796 (off_t)imgp->ip_vattr->va_data_size);
797 if (lret != LOAD_SUCCESS) {
798 error = load_return_to_errno(lret);
799 goto bad;
800 }
801
802 /* If posix_spawn binprefs exist, respect those prefs. */
803 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
804 if (psa != NULL && psa->psa_binprefs[0] != 0) {
805 uint32_t pr = 0;
806
807 /* Check each preference listed against all arches in header */
808 for (pr = 0; pr < NBINPREFS; pr++) {
809 cpu_type_t pref = psa->psa_binprefs[pr];
810 cpu_type_t subpref = psa->psa_subcpuprefs[pr];
811
812 if (pref == 0) {
813 /* No suitable arch in the pref list */
814 error = EBADARCH;
815 goto bad;
816 }
817
818 if (pref == CPU_TYPE_ANY) {
819 /* Fall through to regular grading */
820 goto regular_grading;
821 }
822
823 lret = fatfile_getbestarch_for_cputype(pref,
824 subpref,
825 (vm_offset_t)fat_header,
826 PAGE_SIZE,
827 imgp,
828 &fat_arch);
829 if (lret == LOAD_SUCCESS) {
830 goto use_arch;
831 }
832 }
833
834 /* Requested binary preference was not honored */
835 error = EBADEXEC;
836 goto bad;
837 }
838
839 regular_grading:
840 /* Look up our preferred architecture in the fat file. */
841 lret = fatfile_getbestarch((vm_offset_t)fat_header,
842 PAGE_SIZE,
843 imgp,
844 &fat_arch,
845 (p->p_flag & P_AFFINITY) != 0);
846 if (lret != LOAD_SUCCESS) {
847 error = load_return_to_errno(lret);
848 goto bad;
849 }
850
851 use_arch:
852 /* Read the Mach-O header out of fat_arch */
853 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
854 PAGE_SIZE, fat_arch.offset,
855 UIO_SYSSPACE, (IO_UNIT | IO_NODELOCKED),
856 cred, &resid, p);
857 if (error) {
858 if (error == ERESTART) {
859 error = EINTR;
860 }
861 goto bad;
862 }
863
864 if (resid) {
865 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
866 }
867
868 /* Success. Indicate we have identified an encapsulated binary */
869 error = -2;
870 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
871 imgp->ip_arch_size = (user_size_t)fat_arch.size;
872 imgp->ip_origcputype = fat_arch.cputype;
873 imgp->ip_origcpusubtype = fat_arch.cpusubtype;
874
875 bad:
876 kauth_cred_unref(&cred);
877 return error;
878 }
879
880 static int
activate_exec_state(task_t task,proc_t p,thread_t thread,load_result_t * result)881 activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
882 {
883 int ret;
884
885 (void)task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0, false);
886 task_set_64bit(task, result->is_64bit_addr, result->is_64bit_data);
887 if (result->is_64bit_addr) {
888 OSBitOrAtomic(P_LP64, &p->p_flag);
889 get_bsdthread_info(thread)->uu_flag |= UT_LP64;
890 } else {
891 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
892 get_bsdthread_info(thread)->uu_flag &= ~UT_LP64;
893 }
894 task_set_mach_header_address(task, result->mach_header);
895
896 ret = thread_state_initialize(thread);
897 if (ret != KERN_SUCCESS) {
898 return ret;
899 }
900
901 if (result->threadstate) {
902 uint32_t *ts = result->threadstate;
903 uint32_t total_size = (uint32_t)result->threadstate_sz;
904
905 while (total_size > 0) {
906 uint32_t flavor = *ts++;
907 uint32_t size = *ts++;
908
909 ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
910 if (ret) {
911 return ret;
912 }
913 ts += size;
914 total_size -= (size + 2) * sizeof(uint32_t);
915 }
916 }
917
918 thread_setentrypoint(thread, result->entry_point);
919
920 return KERN_SUCCESS;
921 }
922
923 #if (DEVELOPMENT || DEBUG)
924 extern char panic_on_proc_crash[];
925 extern int use_panic_on_proc_crash;
926
927 extern char panic_on_proc_exit[];
928 extern int use_panic_on_proc_exit;
929
930 extern char panic_on_proc_spawn_fail[];
931 extern int use_panic_on_proc_spawn_fail;
932
933 static inline void
set_crash_behavior_from_bootarg(proc_t p)934 set_crash_behavior_from_bootarg(proc_t p)
935 {
936 if (use_panic_on_proc_crash && strcmp(p->p_comm, panic_on_proc_crash) == 0) {
937 printf("will panic on proc crash: %s\n", p->p_comm);
938 p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_CRASH;
939 }
940
941 if (use_panic_on_proc_exit && strcmp(p->p_comm, panic_on_proc_exit) == 0) {
942 printf("will panic on proc exit: %s\n", p->p_comm);
943 p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_EXIT;
944 }
945
946 if (use_panic_on_proc_spawn_fail && strcmp(p->p_comm, panic_on_proc_spawn_fail) == 0) {
947 printf("will panic on proc spawn fail: %s\n", p->p_comm);
948 p->p_crash_behavior |= POSIX_SPAWN_PANIC_ON_SPAWN_FAIL;
949 }
950 }
951 #endif
952
953 void
set_proc_name(struct image_params * imgp,proc_t p)954 set_proc_name(struct image_params *imgp, proc_t p)
955 {
956 uint64_t buflen = imgp->ip_ndp->ni_cnd.cn_namelen;
957 const int p_name_len = sizeof(p->p_name) - 1;
958 const int p_comm_len = sizeof(p->p_comm) - 1;
959
960 if (buflen > p_name_len) {
961 buflen = p_name_len;
962 }
963
964 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name, buflen);
965 p->p_name[buflen] = '\0';
966
967 if (buflen > p_comm_len) {
968 static_assert(MAXCOMLEN + 1 == sizeof(p->p_comm));
969 buflen = p_comm_len;
970 }
971
972 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm, buflen);
973 p->p_comm[buflen] = '\0';
974
975 #if (DEVELOPMENT || DEBUG)
976 /*
977 * This happens during image activation, so the crash behavior flags from
978 * posix_spawn will have already been set. So we don't have to worry about
979 * this being overridden.
980 */
981 set_crash_behavior_from_bootarg(p);
982 #endif
983 }
984
985 #if __has_feature(ptrauth_calls)
986 /**
987 * Returns a team ID string that may be used to assign a shared region.
988 *
989 * Platform binaries do not have team IDs and will return NULL. Non-platform
990 * binaries without a team ID will be assigned an artificial team ID of ""
991 * (empty string) so that they will not be assigned to the default shared
992 * region.
993 *
994 * @param imgp image parameter block
995 * @return NULL if this is a platform binary, or an appropriate team ID string
996 * otherwise
997 */
998 static inline const char *
get_teamid_for_shared_region(struct image_params * imgp)999 get_teamid_for_shared_region(struct image_params *imgp)
1000 {
1001 assert(imgp->ip_vp != NULL);
1002
1003 const char *ret = csvnode_get_teamid(imgp->ip_vp, imgp->ip_arch_offset);
1004 if (ret) {
1005 return ret;
1006 }
1007
1008 struct cs_blob *blob = csvnode_get_blob(imgp->ip_vp, imgp->ip_arch_offset);
1009 if (csblob_get_platform_binary(blob)) {
1010 return NULL;
1011 } else {
1012 static const char *NO_TEAM_ID = "";
1013 return NO_TEAM_ID;
1014 }
1015 }
1016
1017 /**
1018 * Determines whether ptrauth should be enabled for the provided arm64 CPU subtype.
1019 *
1020 * @param cpusubtype Mach-O style CPU subtype
1021 * @return whether the CPU subtype matches arm64e with the current ptrauth ABI
1022 */
1023 static inline bool
arm64_cpusubtype_uses_ptrauth(cpu_subtype_t cpusubtype)1024 arm64_cpusubtype_uses_ptrauth(cpu_subtype_t cpusubtype)
1025 {
1026 int ptrauth_abi_version = (int)CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(cpusubtype);
1027 return (cpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
1028 (ptrauth_abi_version >= CPU_SUBTYPE_ARM64_PTR_AUTHV0_VERSION &&
1029 ptrauth_abi_version <= CPU_SUBTYPE_ARM64_PTR_AUTH_MAX_PREFERRED_VERSION);
1030 }
1031
1032 #endif /* __has_feature(ptrauth_calls) */
1033
1034 /**
1035 * Returns whether a type/subtype slice matches the requested
1036 * type/subtype.
1037 *
1038 * @param mask Bits to mask from the requested/tested cpu type
1039 * @param req_cpu Requested cpu type
1040 * @param req_subcpu Requested cpu subtype
1041 * @param test_cpu Tested slice cpu type
1042 * @param test_subcpu Tested slice cpu subtype
1043 */
1044 boolean_t
binary_match(cpu_type_t mask,cpu_type_t req_cpu,cpu_subtype_t req_subcpu,cpu_type_t test_cpu,cpu_subtype_t test_subcpu)1045 binary_match(cpu_type_t mask, cpu_type_t req_cpu,
1046 cpu_subtype_t req_subcpu, cpu_type_t test_cpu,
1047 cpu_subtype_t test_subcpu)
1048 {
1049 if ((test_cpu & ~mask) != (req_cpu & ~mask)) {
1050 return FALSE;
1051 }
1052
1053 test_subcpu &= ~CPU_SUBTYPE_MASK;
1054 req_subcpu &= ~CPU_SUBTYPE_MASK;
1055
1056 if (test_subcpu != req_subcpu && req_subcpu != (CPU_SUBTYPE_ANY & ~CPU_SUBTYPE_MASK)) {
1057 return FALSE;
1058 }
1059
1060 return TRUE;
1061 }
1062
1063
1064 /*
1065 * Check entitlements to see if this is a platform restrictions binary.
1066 * Save this in load_result until later for two purposes:
1067 * 1. We can mark the task at a certain security level once it's been created
1068 * 2. We can propagate which entitlements are present to the apple array
1069 */
1070 static inline void
encode_HR_entitlement(const char * entitlement,hardened_browser_flags_t mask,const struct image_params * imgp,load_result_t * load_result)1071 encode_HR_entitlement(const char *entitlement, hardened_browser_flags_t mask,
1072 const struct image_params *imgp, load_result_t *load_result)
1073 {
1074 if (IOVnodeHasEntitlement(imgp->ip_vp, (int64_t)imgp->ip_arch_offset, entitlement)) {
1075 load_result->hardened_browser |= mask;
1076 }
1077 }
1078
1079 /*
1080 * If the passed in executable's vnode should use the RSR
1081 * shared region, then this should return TRUE, otherwise, return FALSE.
1082 */
1083 static uint32_t rsr_current_version = 0;
1084 boolean_t (*rsr_check_vnode)(void *vnode) = NULL;
1085
1086 boolean_t
vnode_is_rsr(vnode_t vp)1087 vnode_is_rsr(vnode_t vp)
1088 {
1089 if (!(vnode_isreg(vp) && vnode_tag(vp) == VT_APFS)) {
1090 return FALSE;
1091 }
1092
1093 if (rsr_check_vnode != NULL && rsr_check_vnode((void *)vp)) {
1094 return TRUE;
1095 }
1096 return FALSE;
1097 }
1098
1099 static struct {
1100 char *legacy;
1101 char *security;
1102 } exec_security_mitigation_entitlement[] = {
1103 /* The following entries must match the enum declaration in kern_exec_internal.h */
1104 [HARDENED_PROCESS] = {
1105 "com.apple.developer.hardened-process",
1106 "com.apple.security.hardened-process"
1107 },
1108 [HARDENED_HEAP] = {
1109 "com.apple.developer.hardened-process.hardened-heap",
1110 "com.apple.security.hardened-process.hardened-heap"
1111 },
1112 [TPRO] = {
1113 NULL,
1114 "com.apple.security.hardened-process.dyld-ro",
1115 },
1116 #if HAS_MTE
1117 [CHECKED_ALLOCATIONS] = {
1118 "com.apple.developer.hardened-process.checked-allocations",
1119 "com.apple.security.hardened-process.checked-allocations"
1120 },
1121 [CHECKED_ALLOCATIONS_PURE_DATA] = {
1122 NULL,
1123 "com.apple.security.hardened-process.checked-allocations.enable-pure-data"
1124 },
1125 [CHECKED_ALLOCATIONS_NO_TAGGED_RECEIVE] = {
1126 NULL,
1127 "com.apple.security.hardened-process.checked-allocations.no-tagged-receive"
1128 },
1129 [CHECKED_ALLOCATIONS_SOFT_MODE] = {
1130 NULL,
1131 "com.apple.security.hardened-process.checked-allocations.soft-mode"
1132 },
1133 #endif /* HAS_MTE */
1134 };
1135
1136 /*
1137 * Platform Restrictions
1138 *
1139 * This mitigation opts you into the grab bag of various kernel mitigations
1140 * including IPC security restrictions
1141 * The presence of the entitlement opts the binary into the feature.
1142 * The entitlement is an <integer> entitlement containing a version number
1143 * for the platform restrictions you are opting into.
1144 */
1145 #define SPAWN_ENABLE_PLATFORM_RESTRICTIONS "com.apple.security.hardened-process.platform-restrictions"
1146
1147 /*
1148 * Version number for enhanced security
1149 * Currently stored with 3 bits in `hardened_process_version`
1150 */
1151 #define HARDENED_PROCESS_VERSION "com.apple.security.hardened-process.enhanced-security-version"
1152
1153 /* See kern_exec_internal.h for the extensive documentation. */
1154 exec_security_err_t
exec_check_security_entitlement(struct image_params * imgp,exec_security_mitigation_entitlement_t entitlement)1155 exec_check_security_entitlement(struct image_params *imgp,
1156 exec_security_mitigation_entitlement_t entitlement)
1157 {
1158 bool has_legacy_entitlement = false, has_security_entitlement = false;
1159 assert(exec_security_mitigation_entitlement[entitlement].security != NULL);
1160
1161 if (exec_security_mitigation_entitlement[entitlement].legacy != NULL) {
1162 has_legacy_entitlement =
1163 IOVnodeHasEntitlement(imgp->ip_vp, (int64_t)imgp->ip_arch_offset,
1164 exec_security_mitigation_entitlement[entitlement].legacy);
1165 }
1166
1167 has_security_entitlement =
1168 IOVnodeHasEntitlement(imgp->ip_vp, (int64_t)imgp->ip_arch_offset,
1169 exec_security_mitigation_entitlement[entitlement].security);
1170
1171 /* If both entitlements are present, this is an invalid configuration. */
1172 if (has_legacy_entitlement && has_security_entitlement) {
1173 EXEC_LOG("Binary has both legacy (%s) and security (%s) entitlements\n",
1174 exec_security_mitigation_entitlement[entitlement].legacy,
1175 exec_security_mitigation_entitlement[entitlement].security);
1176
1177 return EXEC_SECURITY_INVALID_CONFIG;
1178 }
1179
1180 if (has_legacy_entitlement || has_security_entitlement) {
1181 return EXEC_SECURITY_ENTITLED;
1182 }
1183
1184 return EXEC_SECURITY_NOT_ENTITLED;
1185 }
1186
1187
1188 /*
1189 * Entitled binaries get hardened_heap
1190 */
1191 static inline errno_t
imgact_setup_hardened_heap(struct image_params * imgp,task_t task)1192 imgact_setup_hardened_heap(struct image_params *imgp, task_t task)
1193 {
1194 exec_security_err_t ret = exec_check_security_entitlement(imgp, HARDENED_HEAP);
1195 if (ret == EXEC_SECURITY_ENTITLED) {
1196 task_set_hardened_heap(task);
1197 } else {
1198 task_clear_hardened_heap(task);
1199 }
1200 switch (ret) {
1201 case EXEC_SECURITY_INVALID_CONFIG:
1202 return EINVAL;
1203 case EXEC_SECURITY_ENTITLED:
1204 case EXEC_SECURITY_NOT_ENTITLED:
1205 return 0;
1206 }
1207 }
1208
1209 /*
1210 * Configure the platform restrictions security features on the task
1211 * This must be done before `ipc_task_enable` so that the bits
1212 * can be propogated to the ipc space.
1213 *
1214 * Requires `exectextresetvp` to be called on `task` previously so
1215 * that we can use the `IOTaskGetEntitlement` API
1216 */
1217 static inline void
exec_setup_platform_restrictions(task_t task)1218 exec_setup_platform_restrictions(task_t task)
1219 {
1220 uint64_t value = 0;
1221 /* Set platform restrictions version */
1222 if (task_get_platform_binary(task)) {
1223 task_set_platform_restrictions_version(task, 2);
1224 } else if (IOTaskGetIntegerEntitlement(task, SPAWN_ENABLE_PLATFORM_RESTRICTIONS, &value) &&
1225 value > 1) {
1226 task_set_platform_restrictions_version(task, value);
1227 }
1228
1229 /* Set hardened process version*/
1230 if (IOTaskGetIntegerEntitlement(task, HARDENED_PROCESS_VERSION, &value)) {
1231 task_set_hardened_process_version(task, value);
1232 }
1233 }
1234
1235 #if HAS_MTE || HAS_MTE_EMULATION_SHIMS
1236
1237 #if DEVELOPMENT || DEBUG
1238 static inline void config_sec_inheritance(task_t, task_t);
1239 #endif /* DEVELOPMENT || DEBUG */
1240 static inline void config_sec_spawnflags(struct _posix_spawnattr *, task_t);
1241
1242 #if HAS_MTE
1243
1244 static inline errno_t config_checked_allocations_entitlements(struct image_params *,
1245 load_result_t *, task_t, struct cs_blob *, proc_t);
1246
1247 static inline exec_security_err_t
imgact_setup_has_checked_allocations_entitlement(struct image_params * imgp,load_result_t * load_result,__unused task_t new_task,__unused struct cs_blob * cs_blob)1248 imgact_setup_has_checked_allocations_entitlement(struct image_params *imgp, load_result_t *load_result,
1249 __unused task_t new_task, __unused struct cs_blob *cs_blob)
1250 {
1251 /* First-party DriverKit always gets MTE regardless of our normal entilement knobs */
1252 if (load_result->platform_binary && IOVnodeHasEntitlement(imgp->ip_vp,
1253 (int64_t)imgp->ip_arch_offset, kIODriverKitEntitlementKey)) {
1254 /* In soft mode, to mitigate risks on the build */
1255 EXEC_LOG("Enabling MTE because we're launching a first-party dext");
1256 return EXEC_SECURITY_ENTITLED;
1257 }
1258
1259 /* If not a hardened-process, bail out. */
1260 if (!load_result->is_hardened_process) {
1261 return EXEC_SECURITY_NOT_ENTITLED;
1262 }
1263
1264 /* Check the entitlement. */
1265 exec_security_err_t ret = exec_check_security_entitlement(imgp, CHECKED_ALLOCATIONS);
1266
1267 /* Bail out early on invalid configuration. These will fail execution. */
1268 if (ret == EXEC_SECURITY_INVALID_CONFIG) {
1269 return ret;
1270 }
1271
1272 /*
1273 * We need a couple of extra checks for first party binaries, mostly around
1274 * AMFI and reporting early (forbidden) usage of the entitlement.
1275 */
1276 if (load_result->platform_binary) {
1277 #if KERN_AMFI_SUPPORTS_MTE >= 2
1278 if (__improbable(amfi->has_mte_opt_out && amfi->has_mte_opt_out(cs_blob))) {
1279 EXEC_LOG("Binary checked-allocations enablement was denied by AMFI static list\n");
1280 return EXEC_SECURITY_NOT_ENTITLED;
1281 }
1282 #endif /* KERN_AMFI_SUPPORTS_MTE */
1283
1284 /*
1285 * At this stage we are an hardened-process and AMFI hasn't said that we should
1286 * not enable MTE, therefore we just force enable even if the entitlement is not
1287 * present (until we can publicly require checked-allocations to be true).
1288 */
1289 return EXEC_SECURITY_ENTITLED;
1290 }
1291
1292 /*
1293 * The third-party flow is more linear: whatever the entitlement said it was the
1294 * setting, we'll run with it.
1295 */
1296 return ret;
1297 }
1298 #endif /* HAS_MTE */
1299
1300 /*
1301 * Checked-allocations is a security feature that leverages MTE (Memory Tagging Extensions)
1302 * inside userspace allocators to protect dynamic memory allocations.
1303 *
1304 * MTE is a hardware security feature available in recent hardware devices. For legacy
1305 * devices, we support an internal-only readiness tool based on Rosetta that aims at
1306 * qualifying binaries for the new hardware, but that is not meant to be used in production.
1307 *
1308 * Checked-allocations enablement (generally referred to as MTE enabled here) and
1309 * configuration is controlled by:
1310 * - inheritance (debugging feature for bringup/readiness/performance evaluation)
1311 * - posix_spawn flags (no downgrade flags supported on RELEASE)
1312 * - entitlements (hardware only, no emulation. Main RELEASE configuration)
1313 *
1314 * The algorithm to decide whether checked-allocations should be enabled on the target
1315 * process is summarized here.
1316 * For Rosetta binaries, only posix_spawn flags are supported.
1317 *
1318 * ┌────────────────┐ ┌───────────────┐
1319 * │ Inheritance │ ┌───────────────┐ │ Configure MTE │
1320 * │ enabled? ├─YES──▶│ Enable MTE │──────▶│ mirroring │
1321 * └──────────┬─────┘ └───────────────┘ │ parent state │
1322 * │ └───────────────┘
1323 * NO
1324 * │
1325 * │ ┌─────────────────────────────┐
1326 * └─────▶│posix_spawn explicit enable? │
1327 * └──┬─────────────┬────────────┘
1328 * │ │
1329 * │ │
1330 * YES NO
1331 * │ │ ┌──────────────────────────────────┐
1332 * ┌───────────────┐ │ └──▶│ hardened-process entitlement or │
1333 * │ Enable MTE │◀────┘ │ (1p && DriverKit entitlement)? │
1334 * └───────┬───────┘ └───────────┬─────────────┬────────┘
1335 * │ YES NO
1336 * ┌───────▼───────┐ │ │
1337 * │ Configure MTE │ ┌──────────▼────┐ ┌────▼─────────┐
1338 * │ through │ │ Enable MTE │ │ Disable MTE │
1339 * │ posix_spawn │ └──────────┬────┘ └──────────────┘
1340 * │ flags │ │
1341 * └───────────────┘ ┌──────────▼────┐
1342 * │ Configure MTE │
1343 * │ through │
1344 * │ entitlements │
1345 * └───────────────┘
1346 *
1347 *
1348 * The above algorithm covers the decision of enabling checked-allocations but doesn't
1349 * cover the configuration options which are described later.
1350 *
1351 * This function returns false only in case POSIX_SPAWN_SECFLAG_EXPLICIT_REQUIRE_ENABLE is passed
1352 * and the binary fails to satisfy the requirement.
1353 */
1354 static inline errno_t
imgact_setup_sec(struct image_params * imgp,__unused load_result_t * load_result,task_t old_task,task_t new_task,__unused vm_map_t new_map,__unused proc_t new_proc)1355 imgact_setup_sec(struct image_params *imgp, __unused load_result_t *load_result, task_t old_task,
1356 task_t new_task, __unused vm_map_t new_map, __unused proc_t new_proc)
1357 {
1358 #if DEVELOPMENT || DEBUG
1359 #if HAS_MTE
1360 /* Nothing to do if we have disabled MTE system-wide */
1361 if (!is_mte_enabled) {
1362 EXEC_LOG("MTE enablement is skipped due to system-wide disablement\n");
1363 return 0;
1364 }
1365 #endif /* HAS_MTE */
1366
1367 #if HAS_MTE_EMULATION_SHIMS
1368 /* Ignore any emulation attempt if we are not running under Rosetta. */
1369 if ((imgp->ip_flags & (IMGPF_ROSETTA | IMGPF_ALT_ROSETTA)) == 0) {
1370 return 0;
1371 }
1372 #endif /* HAS_MTE_EMULATION_SHIMS */
1373 #endif /* DEVELOPMENT || DEBUG */
1374
1375 /* Reset to a clear view on the target task - we'll decide the configuration here. */
1376 task_clear_sec(new_task);
1377 task_clear_sec_policy(new_task);
1378
1379 /*
1380 * If the parent has sec inherit, propagate the security settings.
1381 * Inheritance is currently aimed only at debug sessions and will trump
1382 * any existing configuration. Inheritance should be seen as the same posix_spawn
1383 * flags used to enable it (+ configure the feature) re-applied over and over on
1384 * every descendant.
1385 */
1386 if (task_has_sec_inherit(old_task)) {
1387 EXEC_LOG("Task will be configured based on inheritance\n");
1388 /* Inheritance propagates to the next task. */
1389 task_set_sec_inherit(new_task);
1390
1391 if (task_has_sec(old_task)) {
1392 task_set_sec(new_task);
1393 #if DEVELOPMENT || DEBUG
1394 config_sec_inheritance(old_task, new_task);
1395 #endif /* DEVELOPMENT || DEBUG */
1396 }
1397 return 0;
1398 }
1399
1400 /* Check posix_spawn flags now if any */
1401 struct _posix_spawnattr *px_sa = imgp->ip_px_sa;
1402
1403 if (px_sa != NULL) {
1404 #if DEVELOPMENT || DEBUG
1405 /*
1406 * Do we have a request to explicitly disable?
1407 */
1408 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_DISABLE) != 0) {
1409 EXEC_LOG("Task configured to disable the security feature due to posix_spawn\n");
1410 /* For A/B testing, allow DISABLE to propagate through inheritance. */
1411 config_sec_spawnflags(px_sa, new_task);
1412 /* Clear we were, clear we stay. */
1413 return 0;
1414 }
1415 #endif /* DEVELOPMENT || DEBUG */
1416
1417 /* Do we have a request to explicitly enable? */
1418 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_ENABLE) != 0) {
1419 EXEC_LOG("Task is explicitly enabled via posix_spawn flags\n");
1420 task_set_sec(new_task);
1421 config_sec_spawnflags(px_sa, new_task);
1422 return 0;
1423 }
1424
1425 #if HAS_MTE
1426 /* Do we have a request to enforce that the target is properly entitled? */
1427 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_REQUIRE_ENABLE) != 0) {
1428 if (!load_result->is_hardened_process) {
1429 EXEC_LOG("Caller requested the explicit presence of the hardened-process entitlement"
1430 " which the binary doesn't have\n");
1431 return EINVAL;
1432 }
1433 /* FALLTHROUGH to entitlement evaluation */
1434 }
1435 #endif /* HAS_MTE */
1436 }
1437
1438 #if HAS_MTE
1439
1440 #if DEVELOPMENT || DEBUG
1441 /* Runtime options solely affect entitlement-driven choices. */
1442 if (!mte_user_enabled()) {
1443 /* Clear we were, clear we stay. */
1444 return 0;
1445 }
1446 #endif /* DEVELOPMENT || DEBUG */
1447
1448 struct cs_blob* cs_blob = csvnode_get_blob(imgp->ip_vp, imgp->ip_arch_offset);
1449
1450 switch (imgact_setup_has_checked_allocations_entitlement(imgp, load_result, new_task, cs_blob)) {
1451 case EXEC_SECURITY_INVALID_CONFIG:
1452 EXEC_LOG("Invalid configuration detected\n");
1453 return EINVAL;
1454 case EXEC_SECURITY_ENTITLED:
1455 EXEC_LOG("Task is explicitly configured via entitlements\n");
1456 task_set_sec(new_task);
1457 return config_checked_allocations_entitlements(imgp, load_result, new_task, cs_blob,
1458 new_proc);
1459 case EXEC_SECURITY_NOT_ENTITLED:
1460 #if DEVELOPMENT || DEBUG
1461 /* Last chance: everything 1p is force-enabled. */
1462 if (mte_force_all_enabled() && load_result->platform_binary) {
1463 EXEC_LOG("Task is explicitly configured via enable-all boot-arg\n");
1464 task_set_sec(new_task);
1465 }
1466 #endif /* DEVELOPMENT || DEBUG */
1467 return 0;
1468 default:
1469 panic("Invalid return value from entitlement evaluation");
1470 }
1471 #endif /* HAS_MTE */
1472
1473 return 0;
1474 }
1475
1476 /*
1477 * MTE/Checked-allocation configuration.
1478 *
1479 * There are three configuration vectors: inheritance, posix_spawn flags and entitlements.
1480 * Each of the functions below covers one configuration vector.
1481 *
1482 * Configuration vectors are designed to be exclusive when it comes to define how the
1483 * feature will behave. This means that if configurations happens through inheritance,
1484 * it will trump any posix_spawn flag or entitlement and if it happens through
1485 * posix_spawn flag, it will trump entitlements.
1486 *
1487 * Inheritance is provided as a dev feature and needs to be explicitly "enabled" via posix_spawn.
1488 * Just like posix_spawn, it's not allowed to downgrade MTE state.
1489 *
1490 * While there are several posix_spawn flags, the majority of them is again only for
1491 * DEVELOPMENT || DEBUG. Only flags that do not _decrease_ the security posture of the
1492 * target are supported in production (essentially, only flags that _enable_ features).
1493 * posix_spawn flags are also the only way to control the emulation of MTE via the
1494 * readiness tool based on Rosetta.
1495 *
1496 * Entitlements are the expected and preferred way to configure MTE/checked-allocations
1497 * for the system. They are not supported for the Rosetta based emulation.
1498 */
1499
1500 #if DEVELOPMENT || DEBUG
1501 static inline void
config_sec_inheritance(task_t current,task_t new_task)1502 config_sec_inheritance(task_t current, task_t new_task)
1503 {
1504 /* Configure the target task based on current task */
1505 if (task_has_sec_never_check(current)) {
1506 task_set_sec_never_check(new_task);
1507 vm_map_set_sec_disabled(get_task_map(new_task));
1508 }
1509
1510 if (task_has_sec_user_data(current)) {
1511 task_set_sec_user_data(new_task);
1512 }
1513
1514 /* Allow soft-mode to propagate for internal testing */
1515 if (task_has_sec_soft_mode(current)) {
1516 task_set_sec_soft_mode(new_task);
1517 }
1518 }
1519
1520 #endif /* DEVELOPMENT || DEBUG */
1521
1522 static inline void
config_sec_spawnflags(struct _posix_spawnattr * px_sa,task_t new_task)1523 config_sec_spawnflags(struct _posix_spawnattr *px_sa, task_t new_task)
1524 {
1525 /* We cannot be here if there were no posix_spawn attributes */
1526 assert(px_sa);
1527
1528 /*
1529 * Most configurations are not available on RELEASE, but we need to
1530 * allow inheritance for Xcode debugging workflows.
1531 */
1532 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_ENABLE_INHERIT) != 0) {
1533 EXEC_LOG("Task explicitly enables inheritance via posix_spawn flags\n");
1534 task_set_sec_inherit(new_task);
1535 }
1536
1537 #if DEVELOPMENT || DEBUG
1538 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_ENABLE_PURE_DATA) != 0) {
1539 EXEC_LOG("Task explicitly enables userspace coverage via posix_spawn flags\n");
1540 task_set_sec_user_data(new_task);
1541 }
1542
1543 /* Allow testing of soft-mode via posix_spawn */
1544 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_CHECK_BYPASS) != 0) {
1545 EXEC_LOG("Task explicitly enables soft-mode via posix_spawn flags\n");
1546 task_set_sec_soft_mode(new_task);
1547 }
1548
1549 if ((px_sa->psa_sec_flags & POSIX_SPAWN_SECFLAG_EXPLICIT_NEVER_CHECK_ENABLE) != 0) {
1550 task_set_sec_never_check(new_task);
1551 vm_map_set_sec_disabled(get_task_map(new_task));
1552 }
1553 #endif /* DEVELOPMENT || DEBUG */
1554 }
1555
1556 #if HAS_MTE
1557
1558 static inline errno_t
config_checked_allocations_entitlements(struct image_params * imgp,load_result_t * load_result,task_t new_task,__unused struct cs_blob * cs_blob,__unused proc_t new_proc)1559 config_checked_allocations_entitlements(struct image_params *imgp, load_result_t *load_result,
1560 task_t new_task, __unused struct cs_blob *cs_blob, __unused proc_t new_proc)
1561 {
1562 /* Determine whether we should enable pure-data allocations. */
1563 exec_security_err_t ret = exec_check_security_entitlement(imgp,
1564 CHECKED_ALLOCATIONS_PURE_DATA);
1565 assert(ret == EXEC_SECURITY_ENTITLED || ret == EXEC_SECURITY_NOT_ENTITLED);
1566
1567 if (ret == EXEC_SECURITY_ENTITLED) {
1568 task_set_sec_user_data(new_task);
1569 EXEC_LOG("Enabling user data tagging due to entitlement\n");
1570 }
1571
1572
1573 /*
1574 * Check whether we need to restrict receiving aliases to MTE memory (which are, by policy,
1575 * untagged) from other actors.
1576 */
1577 ret = exec_check_security_entitlement(imgp, CHECKED_ALLOCATIONS_NO_TAGGED_RECEIVE);
1578 assert(ret == EXEC_SECURITY_ENTITLED || ret == EXEC_SECURITY_NOT_ENTITLED);
1579
1580 if (ret == EXEC_SECURITY_ENTITLED) {
1581 EXEC_LOG("Restricting receiving aliases to tagged memory due to entitlement\n");
1582 task_set_sec_restrict_receiving_aliases_to_tagged_memory(new_task);
1583 }
1584
1585
1586 ret = exec_check_security_entitlement(imgp, CHECKED_ALLOCATIONS_SOFT_MODE);
1587 assert(ret == EXEC_SECURITY_ENTITLED || ret == EXEC_SECURITY_NOT_ENTITLED);
1588
1589 /*
1590 * All 1p processes run in hard-mode in lockdown mode, regardless of their
1591 * entitlement configuration.
1592 */
1593 if (load_result->platform_binary && get_lockdown_mode_state() != 0) {
1594 ret = EXEC_SECURITY_NOT_ENTITLED;
1595 }
1596
1597 /*
1598 * Force enable soft-mode for anything that is not a platform binary.
1599 */
1600 if (ret == EXEC_SECURITY_ENTITLED || !load_result->platform_binary) {
1601 EXEC_LOG("Enabling soft-mode from entitlement\n");
1602 task_set_sec_soft_mode(new_task);
1603 }
1604
1605 return EXEC_SECURITY_NOT_ENTITLED;
1606 }
1607
1608 #endif /* HAS_MTE */
1609 #endif /* HAS_MTE || HAS_MTE_EMULATION_SHIMS */
1610
1611 /*
1612 * This routine configures the various runtime mitigations we can apply to a process
1613 * during image activation. This occurs before `imgact_setup_runtime_mitigations`
1614 *
1615 * Returns true on success, false on failure. Failure will be fatal in exec_mach_imgact().
1616 */
1617 static inline errno_t
imgact_setup_runtime_mitigations(struct image_params * imgp,__unused load_result_t * load_result,__unused task_t old_task,task_t new_task,__unused vm_map_t map,__unused proc_t proc)1618 imgact_setup_runtime_mitigations(struct image_params *imgp, __unused load_result_t *load_result,
1619 __unused task_t old_task, task_t new_task, __unused vm_map_t map, __unused proc_t proc)
1620 {
1621 /*
1622 * It's safe to check entitlements anytime after `load_machfile` if you check
1623 * based on the vnode in imgp. We must perform this entitlement check
1624 * before we start using load_result->hardened_browser further down
1625 */
1626 load_result->hardened_browser = 0;
1627 encode_HR_entitlement(kCSWebBrowserHostEntitlement, BrowserHostEntitlementMask, imgp, load_result);
1628 encode_HR_entitlement(kCSWebBrowserGPUEntitlement, BrowserGPUEntitlementMask, imgp, load_result);
1629 encode_HR_entitlement(kCSWebBrowserNetworkEntitlement, BrowserNetworkEntitlementMask, imgp, load_result);
1630 encode_HR_entitlement(kCSWebBrowserWebContentEntitlement, BrowserWebContentEntitlementMask, imgp, load_result);
1631
1632 if (load_result->hardened_browser) {
1633 task_set_platform_restrictions_version(new_task, 1);
1634 }
1635
1636 errno_t retval = 0;
1637
1638 /*
1639 * Hardened-heap enables a set of extra security features in our system memory allocator.
1640 */
1641 if ((retval = imgact_setup_hardened_heap(imgp, new_task)) != 0) {
1642 EXEC_LOG("Invalid configuration detected for hardened-heap");
1643 return retval;
1644 }
1645
1646 #if HAS_MTE || HAS_MTE_EMULATION_SHIMS
1647 /*
1648 * Sec-shims a.k.a. checked-allocations a.k.a. MTE (due to several hoops around secrecy)
1649 * control whether the target process system allocators leverage MTE or not to provide
1650 * further security mitigations.
1651 */
1652 if ((retval = imgact_setup_sec(imgp, load_result, old_task, new_task, map, proc)) != 0) {
1653 EXEC_LOG("Invalid configuration detected for the security shim");
1654 return retval;
1655 }
1656 #endif /* HAS_MTE || HAS_MTE_EMULATION_SHIMS */
1657
1658
1659
1660 return retval;
1661 }
1662
1663 uint32_t
rsr_get_version(void)1664 rsr_get_version(void)
1665 {
1666 return os_atomic_load(&rsr_current_version, relaxed);
1667 }
1668
1669 void
rsr_bump_version(void)1670 rsr_bump_version(void)
1671 {
1672 os_atomic_inc(&rsr_current_version, relaxed);
1673 }
1674
1675 #if XNU_TARGET_OS_OSX
1676 static int
1677 rsr_version_sysctl SYSCTL_HANDLER_ARGS
1678 {
1679 #pragma unused(arg1, arg2, oidp)
1680 int value = rsr_get_version();
1681 int error = SYSCTL_OUT(req, &value, sizeof(int));
1682 if (error) {
1683 return error;
1684 }
1685
1686 if (!req->newptr) {
1687 return 0;
1688 }
1689
1690 error = SYSCTL_IN(req, &value, sizeof(int));
1691 if (error) {
1692 return error;
1693 }
1694 if (value != 0) {
1695 rsr_bump_version();
1696 }
1697 return 0;
1698 }
1699
1700
1701 SYSCTL_PROC(_vm, OID_AUTO, shared_region_control,
1702 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED,
1703 0, 0, rsr_version_sysctl, "I", "");
1704 #endif /* XNU_TARGET_OS_OSX */
1705
1706 /*
1707 * exec_mach_imgact
1708 *
1709 * Image activator for mach-o 1.0 binaries.
1710 *
1711 * Parameters; struct image_params * image parameter block
1712 *
1713 * Returns: -1 not a fat binary (keep looking)
1714 * -2 Success: encapsulated binary: reread
1715 * >0 Failure: error number
1716 * EBADARCH Mach-o binary, but with an unrecognized
1717 * architecture
1718 * ENOMEM No memory for child process after -
1719 * can only happen after vfork()
1720 *
1721 * Important: This image activator is NOT byte order neutral.
1722 *
1723 * Note: A return value other than -1 indicates subsequent image
1724 * activators should not be given the opportunity to attempt
1725 * to activate the image.
1726 */
1727 static int
exec_mach_imgact(struct image_params * imgp)1728 exec_mach_imgact(struct image_params *imgp)
1729 {
1730 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
1731 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1732 int error = 0;
1733 task_t task;
1734 task_t new_task = NULL; /* protected by vfexec */
1735 thread_t thread;
1736 struct uthread *uthread;
1737 vm_map_switch_context_t switch_ctx;
1738 vm_map_t old_map = VM_MAP_NULL;
1739 vm_map_t map = VM_MAP_NULL;
1740 load_return_t lret;
1741 load_result_t load_result = {};
1742 struct _posix_spawnattr *psa = NULL;
1743 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
1744 const int vfexec = 0;
1745 int exec = (imgp->ip_flags & IMGPF_EXEC);
1746 os_reason_t exec_failure_reason = OS_REASON_NULL;
1747 boolean_t reslide = FALSE;
1748 char * userspace_coredump_name = NULL;
1749
1750 /*
1751 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
1752 * is a reserved field on the end, so for the most part, we can
1753 * treat them as if they were identical. Reverse-endian Mach-O
1754 * binaries are recognized but not compatible.
1755 */
1756 if ((mach_header->magic == MH_CIGAM) ||
1757 (mach_header->magic == MH_CIGAM_64)) {
1758 error = EBADARCH;
1759 goto bad;
1760 }
1761
1762 if ((mach_header->magic != MH_MAGIC) &&
1763 (mach_header->magic != MH_MAGIC_64)) {
1764 error = -1;
1765 goto bad;
1766 }
1767
1768 if (mach_header->filetype != MH_EXECUTE) {
1769 error = -1;
1770 goto bad;
1771 }
1772
1773 if (imgp->ip_origcputype != 0) {
1774 /* Fat header previously had an idea about this thin file */
1775 if (imgp->ip_origcputype != mach_header->cputype ||
1776 imgp->ip_origcpusubtype != mach_header->cpusubtype) {
1777 error = EBADARCH;
1778 goto bad;
1779 }
1780 } else {
1781 imgp->ip_origcputype = mach_header->cputype;
1782 imgp->ip_origcpusubtype = mach_header->cpusubtype;
1783 }
1784
1785 task = current_task();
1786 thread = current_thread();
1787 uthread = get_bsdthread_info(thread);
1788
1789 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) {
1790 imgp->ip_flags |= IMGPF_IS_64BIT_ADDR | IMGPF_IS_64BIT_DATA;
1791 }
1792
1793
1794 /* If posix_spawn binprefs exist, respect those prefs. */
1795 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1796 if (psa != NULL && psa->psa_binprefs[0] != 0) {
1797 int pr = 0;
1798 for (pr = 0; pr < NBINPREFS; pr++) {
1799 cpu_type_t pref = psa->psa_binprefs[pr];
1800 cpu_subtype_t subpref = psa->psa_subcpuprefs[pr];
1801
1802 if (pref == 0) {
1803 /* No suitable arch in the pref list */
1804 error = EBADARCH;
1805 goto bad;
1806 }
1807
1808 if (pref == CPU_TYPE_ANY) {
1809 /* Jump to regular grading */
1810 goto grade;
1811 }
1812
1813 if (binary_match(CPU_ARCH_MASK, pref, subpref,
1814 imgp->ip_origcputype, imgp->ip_origcpusubtype)) {
1815 goto grade;
1816 }
1817 }
1818 error = EBADARCH;
1819 goto bad;
1820 }
1821 grade:
1822 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK,
1823 imgp->ip_origcpusubtype & CPU_SUBTYPE_MASK, TRUE)) {
1824 error = EBADARCH;
1825 goto bad;
1826 }
1827
1828 if (validate_potential_simulator_binary(imgp->ip_origcputype, imgp,
1829 imgp->ip_arch_offset, imgp->ip_arch_size) != LOAD_SUCCESS) {
1830 #if __x86_64__
1831 const char *excpath;
1832 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1833 os_log_error(OS_LOG_DEFAULT, "Unsupported 32-bit executable: \"%s\"", (error) ? imgp->ip_vp->v_name : excpath);
1834 #endif
1835 error = EBADARCH;
1836 goto bad;
1837 }
1838
1839 #if defined(HAS_APPLE_PAC)
1840 assert(mach_header->cputype == CPU_TYPE_ARM64
1841 );
1842
1843 if ((mach_header->cputype == CPU_TYPE_ARM64 &&
1844 arm64_cpusubtype_uses_ptrauth(mach_header->cpusubtype))
1845 ) {
1846 imgp->ip_flags &= ~IMGPF_NOJOP;
1847 } else {
1848 imgp->ip_flags |= IMGPF_NOJOP;
1849 }
1850 #endif
1851
1852 /* Copy in arguments/environment from the old process */
1853 error = exec_extract_strings(imgp);
1854 if (error) {
1855 goto bad;
1856 }
1857
1858 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
1859 imgp->ip_endargv - imgp->ip_startargv);
1860 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
1861 imgp->ip_endenvv - imgp->ip_endargv);
1862
1863 /* reset local idea of thread, uthread, task */
1864 thread = imgp->ip_new_thread;
1865 uthread = get_bsdthread_info(thread);
1866 task = new_task = get_threadtask(thread);
1867
1868 /*
1869 * Load the Mach-O file.
1870 *
1871 * NOTE: An error after this point indicates we have potentially
1872 * destroyed or overwritten some process state while attempting an
1873 * execve() following a vfork(), which is an unrecoverable condition.
1874 * We send the new process an immediate SIGKILL to avoid it executing
1875 * any instructions in the mutated address space. For true spawns,
1876 * this is not the case, and "too late" is still not too late to
1877 * return an error code to the parent process.
1878 */
1879
1880 /*
1881 * Actually load the image file we previously decided to load.
1882 */
1883 lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
1884 if (lret != LOAD_SUCCESS) {
1885 error = load_return_to_errno(lret);
1886
1887 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
1888 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, 0, 0);
1889 if (lret == LOAD_BADMACHO_UPX) {
1890 set_proc_name(imgp, p);
1891 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
1892 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1893 } else {
1894 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1895
1896 if (bootarg_execfailurereports) {
1897 set_proc_name(imgp, p);
1898 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1899 }
1900 }
1901
1902 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
1903
1904 goto badtoolate;
1905 }
1906
1907 assert(imgp->ip_free_map == NULL);
1908
1909 /*
1910 * ERROR RECOVERY
1911 *
1912 * load_machfile() returned the new VM map ("map") but we haven't
1913 * committed to it yet.
1914 * Any error path between here and the point where we commit to using
1915 * the new "map" (with swap_task_map()) should deallocate "map".
1916 */
1917
1918 #ifndef KASAN
1919 /*
1920 * Security: zone sanity checks on fresh boot or initproc re-exec.
1921 * launchd by design does not tear down its own service port on USR (rdar://72797967),
1922 * which means here is the earliest point we can assert on empty service port label zone,
1923 * after load_machfile() above terminates old launchd's IPC space.
1924 *
1925 * Disable on KASAN builds since zone_size_allocated() accounts for elements
1926 * under quarantine.
1927 */
1928 if (task_pid(task) == 1) {
1929 zone_userspace_reboot_checks();
1930 }
1931 #endif
1932
1933 proc_lock(p);
1934 p->p_cputype = imgp->ip_origcputype;
1935 p->p_cpusubtype = imgp->ip_origcpusubtype;
1936 proc_setplatformdata(p, load_result.ip_platform, load_result.lr_min_sdk, load_result.lr_sdk);
1937
1938 vm_map_set_size_limit(map, proc_limitgetcur(p, RLIMIT_AS));
1939 vm_map_set_data_limit(map, proc_limitgetcur(p, RLIMIT_DATA));
1940 vm_map_set_user_wire_limit(map, (vm_size_t)proc_limitgetcur(p, RLIMIT_MEMLOCK));
1941
1942 #if XNU_TARGET_OS_OSX
1943 if (proc_platform(p) == PLATFORM_IOS) {
1944 assert(vm_map_is_alien(map));
1945 } else {
1946 assert(!vm_map_is_alien(map));
1947 }
1948 #endif /* XNU_TARGET_OS_OSX */
1949 proc_unlock(p);
1950
1951 /*
1952 * Setup runtime mitigations.
1953 */
1954 if ((error = imgact_setup_runtime_mitigations(imgp, &load_result, current_task(), new_task, map, p)) != 0) {
1955 set_proc_name(imgp, p);
1956 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1957 if (bootarg_execfailurereports) {
1958 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
1959 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
1960 }
1961 /* release new address space since we won't use it */
1962 imgp->ip_free_map = map;
1963 map = VM_MAP_NULL;
1964 goto badtoolate;
1965 }
1966
1967 /*
1968 * Set code-signing flags if this binary is signed, or if parent has
1969 * requested them on exec.
1970 */
1971 if (load_result.csflags & CS_VALID) {
1972 imgp->ip_csflags |= load_result.csflags &
1973 (CS_VALID | CS_SIGNED | CS_DEV_CODE | CS_LINKER_SIGNED |
1974 CS_HARD | CS_KILL | CS_RESTRICT | CS_ENFORCEMENT | CS_REQUIRE_LV |
1975 CS_FORCED_LV | CS_ENTITLEMENTS_VALIDATED | CS_NO_UNTRUSTED_HELPERS | CS_RUNTIME |
1976 CS_ENTITLEMENT_FLAGS |
1977 CS_EXEC_SET_HARD | CS_EXEC_SET_KILL | CS_EXEC_SET_ENFORCEMENT);
1978 } else {
1979 imgp->ip_csflags &= ~CS_VALID;
1980 }
1981
1982 if (proc_getcsflags(p) & CS_EXEC_SET_HARD) {
1983 imgp->ip_csflags |= CS_HARD;
1984 }
1985 if (proc_getcsflags(p) & CS_EXEC_SET_KILL) {
1986 imgp->ip_csflags |= CS_KILL;
1987 }
1988 if (proc_getcsflags(p) & CS_EXEC_SET_ENFORCEMENT) {
1989 imgp->ip_csflags |= CS_ENFORCEMENT;
1990 }
1991 if (proc_getcsflags(p) & CS_EXEC_INHERIT_SIP) {
1992 if (proc_getcsflags(p) & CS_INSTALLER) {
1993 imgp->ip_csflags |= CS_INSTALLER;
1994 }
1995 if (proc_getcsflags(p) & CS_DATAVAULT_CONTROLLER) {
1996 imgp->ip_csflags |= CS_DATAVAULT_CONTROLLER;
1997 }
1998 if (proc_getcsflags(p) & CS_NVRAM_UNRESTRICTED) {
1999 imgp->ip_csflags |= CS_NVRAM_UNRESTRICTED;
2000 }
2001 }
2002
2003 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2004 /*
2005 * ptrauth version 0 is a preview ABI. Developers can opt into running
2006 * their own arm64e binaries for local testing, with the understanding
2007 * that future OSes may break ABI.
2008 */
2009 if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
2010 CPU_SUBTYPE_ARM64_PTR_AUTH_VERSION(imgp->ip_origcpusubtype) == 0 &&
2011 !load_result.platform_binary &&
2012 !bootarg_arm64e_preview_abi) {
2013 static bool logged_once = false;
2014 set_proc_name(imgp, p);
2015
2016 printf("%s: not running binary \"%s\" built against preview arm64e ABI\n", __func__, p->p_name);
2017 if (!os_atomic_xchg(&logged_once, true, relaxed)) {
2018 printf("%s: (to allow this, add \"-arm64e_preview_abi\" to boot-args)\n", __func__);
2019 }
2020
2021 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
2022 if (bootarg_execfailurereports) {
2023 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2024 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
2025 }
2026
2027 /* release new address space since we won't use it */
2028 imgp->ip_free_map = map;
2029 map = VM_MAP_NULL;
2030 goto badtoolate;
2031 }
2032
2033 if ((imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E &&
2034 imgp->ip_origcputype == CPU_TYPE_ARM64 &&
2035 load_result.platform_binary &&
2036 (imgp->ip_flags & IMGPF_DRIVER) != 0) {
2037 set_proc_name(imgp, p);
2038 printf("%s: disallowing arm64 platform driverkit binary \"%s\", should be arm64e\n", __func__, p->p_name);
2039 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
2040 if (bootarg_execfailurereports) {
2041 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2042 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
2043 }
2044
2045 /* release new address space since we won't use it */
2046 imgp->ip_free_map = map;
2047 map = VM_MAP_NULL;
2048 goto badtoolate;
2049 }
2050 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2051
2052
2053
2054 /*
2055 * Set up the shared cache region in the new process.
2056 *
2057 * Normally there is a single shared region per architecture.
2058 * However on systems with Pointer Authentication, we can create
2059 * multiple shared caches with the amount of sharing determined
2060 * by team-id or entitlement. Inherited shared region IDs are used
2061 * for system processes that need to match and be able to inspect
2062 * a pre-existing task.
2063 */
2064 int cpu_subtype = 0; /* all cpu_subtypes use the same shared region */
2065 #if __has_feature(ptrauth_calls)
2066 char *shared_region_id = NULL;
2067 size_t len;
2068 char *base;
2069 const char *cbase;
2070 #define HARDENED_RUNTIME_CONTENT_ID "C-"
2071 #define TEAM_ID_PREFIX "T-"
2072 #define ENTITLE_PREFIX "E-"
2073 #define SR_PREFIX_LEN 2
2074 #define SR_ENTITLEMENT "com.apple.pac.shared_region_id"
2075
2076 if (cpu_type() == CPU_TYPE_ARM64 &&
2077 arm64_cpusubtype_uses_ptrauth(p->p_cpusubtype) &&
2078 (imgp->ip_flags & IMGPF_NOJOP) == 0) {
2079 assertf(p->p_cputype == CPU_TYPE_ARM64,
2080 "p %p cpu_type() 0x%x p->p_cputype 0x%x p->p_cpusubtype 0x%x",
2081 p, cpu_type(), p->p_cputype, p->p_cpusubtype);
2082
2083 /*
2084 * arm64e uses pointer authentication, so request a separate
2085 * shared region for this CPU subtype.
2086 */
2087 cpu_subtype = p->p_cpusubtype & ~CPU_SUBTYPE_MASK;
2088
2089 /*
2090 * Determine which shared cache to select based on being told,
2091 * matching a team-id or matching an entitlement.
2092 */
2093 if (load_result.hardened_browser & BrowserWebContentEntitlementMask) {
2094 len = sizeof(HARDENED_RUNTIME_CONTENT_ID);
2095 shared_region_id = kalloc_data(len, Z_WAITOK | Z_NOFAIL);
2096 strlcpy(shared_region_id, HARDENED_RUNTIME_CONTENT_ID, len);
2097 } else if (imgp->ip_inherited_shared_region_id) {
2098 len = strlen(imgp->ip_inherited_shared_region_id);
2099 shared_region_id = kalloc_data(len + 1, Z_WAITOK | Z_NOFAIL);
2100 memcpy(shared_region_id, imgp->ip_inherited_shared_region_id, len + 1);
2101 } else if ((cbase = get_teamid_for_shared_region(imgp)) != NULL) {
2102 len = strlen(cbase);
2103 if (vm_shared_region_per_team_id) {
2104 shared_region_id = kalloc_data(len + SR_PREFIX_LEN + 1,
2105 Z_WAITOK | Z_NOFAIL);
2106 memcpy(shared_region_id, TEAM_ID_PREFIX, SR_PREFIX_LEN);
2107 memcpy(shared_region_id + SR_PREFIX_LEN, cbase, len + 1);
2108 }
2109 } else if ((base = IOVnodeGetEntitlement(imgp->ip_vp,
2110 (int64_t)imgp->ip_arch_offset, SR_ENTITLEMENT)) != NULL) {
2111 len = strlen(base);
2112 if (vm_shared_region_by_entitlement) {
2113 shared_region_id = kalloc_data(len + SR_PREFIX_LEN + 1,
2114 Z_WAITOK | Z_NOFAIL);
2115 memcpy(shared_region_id, ENTITLE_PREFIX, SR_PREFIX_LEN);
2116 memcpy(shared_region_id + SR_PREFIX_LEN, base, len + 1);
2117 }
2118 /* Discard the copy of the entitlement */
2119 kfree_data(base, len + 1);
2120 }
2121 }
2122
2123 if (imgp->ip_flags & IMGPF_RESLIDE) {
2124 reslide = TRUE;
2125 }
2126
2127 /* use "" as the default shared_region_id */
2128 if (shared_region_id == NULL) {
2129 shared_region_id = kalloc_data(1, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2130 }
2131
2132 /* ensure there's a unique pointer signing key for this shared_region_id */
2133 shared_region_key_alloc(shared_region_id,
2134 imgp->ip_inherited_shared_region_id != NULL, imgp->ip_inherited_jop_pid);
2135 task_set_shared_region_id(task, shared_region_id);
2136 shared_region_id = NULL;
2137 #endif /* __has_feature(ptrauth_calls) */
2138
2139 #if CONFIG_ROSETTA
2140 if (imgp->ip_flags & IMGPF_ROSETTA) {
2141 OSBitOrAtomic(P_TRANSLATED, &p->p_flag);
2142 } else if (p->p_flag & P_TRANSLATED) {
2143 OSBitAndAtomic(~P_TRANSLATED, &p->p_flag);
2144 }
2145 #endif
2146
2147 int cputype = cpu_type();
2148
2149 uint32_t rsr_version = 0;
2150 #if XNU_TARGET_OS_OSX
2151 if (vnode_is_rsr(imgp->ip_vp)) {
2152 rsr_version = rsr_get_version();
2153 os_atomic_or(&p->p_ladvflag, P_RSR, relaxed);
2154 os_atomic_or(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_ALTLINK, relaxed);
2155 }
2156 #endif /* XNU_TARGET_OS_OSX */
2157
2158 error = vm_map_exec(map, task, load_result.is_64bit_addr,
2159 (void *)p->p_fd.fd_rdir, cputype, cpu_subtype, reslide,
2160 (imgp->ip_flags & IMGPF_DRIVER) != 0,
2161 rsr_version);
2162
2163 if (error) {
2164 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2165 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_MAP_EXEC_FAILURE, 0, 0);
2166
2167 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MAP_EXEC_FAILURE);
2168 if (bootarg_execfailurereports) {
2169 set_proc_name(imgp, p);
2170 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2171 exec_failure_reason->osr_flags |= OS_REASON_FLAG_CONSISTENT_FAILURE;
2172 }
2173 /* release new address space since we won't use it */
2174 imgp->ip_free_map = map;
2175 map = VM_MAP_NULL;
2176 goto badtoolate;
2177 }
2178
2179 /*
2180 * Close file descriptors which specify close-on-exec.
2181 */
2182 fdt_exec(p, vfs_context_ucred(imgp->ip_vfs_context),
2183 psa != NULL ? psa->psa_flags : 0, imgp->ip_new_thread, exec);
2184
2185 /*
2186 * deal with set[ug]id.
2187 */
2188 error = exec_handle_sugid(imgp);
2189 if (error) {
2190 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2191 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0);
2192
2193 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
2194 if (bootarg_execfailurereports) {
2195 set_proc_name(imgp, p);
2196 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2197 }
2198
2199 /* release new address space since we won't use it */
2200 imgp->ip_free_map = map;
2201 map = VM_MAP_NULL;
2202 goto badtoolate;
2203 }
2204
2205 /*
2206 * Commit to new map.
2207 *
2208 * Swap the new map for the old for target task, which consumes
2209 * our new map reference but each leaves us responsible for the
2210 * old_map reference. That lets us get off the pmap associated
2211 * with it, and then we can release it.
2212 *
2213 * The map needs to be set on the target task which is different
2214 * than current task, thus swap_task_map is used instead of
2215 * vm_map_switch.
2216 */
2217 old_map = swap_task_map(task, thread, map);
2218 #if MACH_ASSERT
2219 /*
2220 * Reset the pmap's process info to prevent ledger checks
2221 * which might fail due to the ledgers being shared between
2222 * the old and new pmaps.
2223 */
2224 vm_map_pmap_set_process(old_map, -1, "<old_map>");
2225 #endif /* MACH_ASSERT */
2226 imgp->ip_free_map = old_map;
2227 old_map = NULL;
2228
2229 lret = activate_exec_state(task, p, thread, &load_result);
2230 if (lret != KERN_SUCCESS) {
2231 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2232 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, 0, 0);
2233
2234 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
2235 if (bootarg_execfailurereports) {
2236 set_proc_name(imgp, p);
2237 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2238 }
2239
2240 goto badtoolate;
2241 }
2242
2243 /*
2244 * deal with voucher on exec-calling thread.
2245 */
2246 if (imgp->ip_new_thread == NULL) {
2247 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
2248 }
2249
2250 /* Make sure we won't interrupt ourself signalling a partial process */
2251 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) {
2252 psignal(p, SIGTRAP);
2253 }
2254
2255 if (load_result.unixproc &&
2256 create_unix_stack(get_task_map(task),
2257 &load_result,
2258 p) != KERN_SUCCESS) {
2259 error = load_return_to_errno(LOAD_NOSPACE);
2260
2261 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2262 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, 0, 0);
2263
2264 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
2265 if (bootarg_execfailurereports) {
2266 set_proc_name(imgp, p);
2267 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2268 }
2269
2270 goto badtoolate;
2271 }
2272
2273 /*
2274 * The load result will have already been munged by AMFI to include the
2275 * platform binary flag if boot-args dictated it (AMFI will mark anything
2276 * that doesn't go through the upcall path as a platform binary if its
2277 * enforcement is disabled).
2278 */
2279 if (load_result.platform_binary) {
2280 if (cs_debug) {
2281 printf("setting platform binary on task: pid = %d\n", proc_getpid(p));
2282 }
2283
2284 /*
2285 * We must use 'task' here because the proc's task has not yet been
2286 * switched to the new one.
2287 */
2288 task_set_platform_binary(task, TRUE);
2289 } else {
2290 if (cs_debug) {
2291 printf("clearing platform binary on task: pid = %d\n", proc_getpid(p));
2292 }
2293
2294 task_set_platform_binary(task, FALSE);
2295 }
2296
2297 #if XNU_TARGET_OS_OSX
2298 /* Disable mach hardening for all 1P tasks which load 3P plugins */
2299 if (imgp->ip_flags & IMGPF_3P_PLUGINS) {
2300 if (cs_debug) {
2301 printf("Disabling some mach hardening on task due to 3P plugins: pid = %d\n", proc_getpid(p));
2302 }
2303 task_disable_mach_hardening(task);
2304 }
2305 #if DEVELOPMENT || DEBUG
2306 /* Disable mach hardening for all tasks if amfi_get_out_of_my_way is set.
2307 * Customers will have to turn SIP off to use this boot-arg, and so this is
2308 * only needed internally since we disable this feature when SIP is off. */
2309 if (AMFI_bootarg_disable_mach_hardening) {
2310 if (cs_debug) {
2311 printf("Disabling some mach hardening on task due to AMFI boot-args: pid = %d\n", proc_getpid(p));
2312 }
2313 task_disable_mach_hardening(task);
2314 }
2315 #endif /* DEVELOPMENT || DEBUG */
2316 #endif /* XNU_TARGET_OS_OSX */
2317
2318 error = exec_add_apple_strings(imgp, &load_result, task); /* copies out main thread port */
2319
2320 if (error) {
2321 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2322 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, 0, 0);
2323
2324 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
2325 if (bootarg_execfailurereports) {
2326 set_proc_name(imgp, p);
2327 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2328 }
2329 goto badtoolate;
2330 }
2331
2332 /* Switch to target task's map to copy out strings */
2333 switch_ctx = vm_map_switch_to(get_task_map(task));
2334
2335 if (load_result.unixproc) {
2336 user_addr_t ap;
2337
2338 /*
2339 * Copy the strings area out into the new process address
2340 * space.
2341 */
2342 ap = p->user_stack;
2343 error = exec_copyout_strings(imgp, &ap);
2344 if (error) {
2345 vm_map_switch_back(switch_ctx);
2346
2347 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2348 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0);
2349
2350 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
2351 if (bootarg_execfailurereports) {
2352 set_proc_name(imgp, p);
2353 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2354 }
2355 goto badtoolate;
2356 }
2357 /* Set the stack */
2358 thread_setuserstack(thread, ap);
2359 }
2360
2361 if (load_result.dynlinker || load_result.is_rosetta) {
2362 user_addr_t ap;
2363 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
2364
2365 /* Adjust the stack */
2366 ap = thread_adjuserstack(thread, -new_ptr_size);
2367 error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
2368
2369 if (error) {
2370 vm_map_switch_back(switch_ctx);
2371
2372 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2373 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0);
2374
2375 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
2376 if (bootarg_execfailurereports) {
2377 set_proc_name(imgp, p);
2378 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2379 }
2380 goto badtoolate;
2381 }
2382 error = task_set_dyld_info(task, load_result.all_image_info_addr,
2383 load_result.all_image_info_size, false);
2384 if (error) {
2385 vm_map_switch_back(switch_ctx);
2386
2387 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2388 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO, 0, 0);
2389
2390 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO);
2391 if (bootarg_execfailurereports) {
2392 set_proc_name(imgp, p);
2393 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2394 }
2395 error = EINVAL;
2396 goto badtoolate;
2397 }
2398 } else {
2399 /*
2400 * No dyld or rosetta loaded, set the TF_DYLD_ALL_IMAGE_FINAL bit on task.
2401 */
2402 error = task_set_dyld_info(task, MACH_VM_MIN_ADDRESS,
2403 0, true);
2404 if (error) {
2405 vm_map_switch_back(switch_ctx);
2406
2407 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2408 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO, 0, 0);
2409
2410 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SET_DYLD_INFO);
2411 if (bootarg_execfailurereports) {
2412 set_proc_name(imgp, p);
2413 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2414 }
2415 error = EINVAL;
2416 goto badtoolate;
2417 }
2418 }
2419
2420 #if CONFIG_ROSETTA
2421 if (load_result.is_rosetta) {
2422 // Add an fd for the executable file for Rosetta's use
2423 int main_binary_fd;
2424 struct fileproc *fp;
2425
2426 error = falloc_exec(p, imgp->ip_vfs_context, &fp, &main_binary_fd);
2427 if (error) {
2428 vm_map_switch_back(switch_ctx);
2429
2430 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2431 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC, 0, 0);
2432
2433 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC);
2434 if (bootarg_execfailurereports) {
2435 set_proc_name(imgp, p);
2436 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2437 }
2438 goto badtoolate;
2439 }
2440
2441 error = VNOP_OPEN(imgp->ip_vp, FREAD, imgp->ip_vfs_context);
2442 if (error) {
2443 vm_map_switch_back(switch_ctx);
2444
2445 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2446 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC, 0, 0);
2447
2448 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MAIN_FD_ALLOC);
2449 if (bootarg_execfailurereports) {
2450 set_proc_name(imgp, p);
2451 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2452 }
2453 goto cleanup_rosetta_fp;
2454 }
2455
2456 fp->fp_glob->fg_flag = FREAD;
2457 fp->fp_glob->fg_ops = &vnops;
2458 fp_set_data(fp, imgp->ip_vp);
2459
2460 proc_fdlock(p);
2461 procfdtbl_releasefd(p, main_binary_fd, NULL);
2462 fp_drop(p, main_binary_fd, fp, 1);
2463 proc_fdunlock(p);
2464
2465 vnode_ref(imgp->ip_vp);
2466
2467 // Pass the dyld load address, main binary fd, and dyld fd on the stack
2468 uint64_t ap = thread_adjuserstack(thread, -24);
2469
2470 error = copyoutptr((user_addr_t)load_result.dynlinker_fd, ap, 8);
2471 if (error) {
2472 vm_map_switch_back(switch_ctx);
2473
2474 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2475 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0);
2476
2477 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA);
2478 if (bootarg_execfailurereports) {
2479 set_proc_name(imgp, p);
2480 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2481 }
2482 goto cleanup_rosetta_fp;
2483 }
2484
2485 error = copyoutptr(load_result.dynlinker_mach_header, ap + 8, 8);
2486 if (error) {
2487 vm_map_switch_back(switch_ctx);
2488
2489 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2490 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0);
2491
2492 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA);
2493 if (bootarg_execfailurereports) {
2494 set_proc_name(imgp, p);
2495 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2496 }
2497 goto cleanup_rosetta_fp;
2498 }
2499
2500 error = copyoutptr((user_addr_t)main_binary_fd, ap + 16, 8);
2501 if (error) {
2502 vm_map_switch_back(switch_ctx);
2503
2504 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
2505 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA, 0, 0);
2506
2507 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_ROSETTA);
2508 if (bootarg_execfailurereports) {
2509 set_proc_name(imgp, p);
2510 exec_failure_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
2511 }
2512 goto cleanup_rosetta_fp;
2513 }
2514
2515 cleanup_rosetta_fp:
2516 if (error) {
2517 fp_free(p, load_result.dynlinker_fd, load_result.dynlinker_fp);
2518 fp_free(p, main_binary_fd, fp);
2519 goto badtoolate;
2520 }
2521 }
2522
2523 #endif
2524
2525 /* Avoid immediate VM faults back into kernel */
2526 exec_prefault_data(p, imgp, &load_result);
2527
2528 vm_map_switch_back(switch_ctx);
2529
2530 /*
2531 * Reset signal state.
2532 */
2533 execsigs(p, thread);
2534
2535 /*
2536 * need to cancel async IO requests that can be cancelled and wait for those
2537 * already active. MAY BLOCK!
2538 */
2539 _aio_exec( p );
2540
2541 #if SYSV_SHM
2542 /* FIXME: Till vmspace inherit is fixed: */
2543 if (!vfexec && p->vm_shm) {
2544 shmexec(p);
2545 }
2546 #endif
2547 #if SYSV_SEM
2548 /* Clean up the semaphores */
2549 semexit(p);
2550 #endif
2551
2552 /*
2553 * Remember file name for accounting.
2554 */
2555 p->p_acflag &= ~AFORK;
2556
2557 set_proc_name(imgp, p);
2558
2559 #if CONFIG_SECLUDED_MEMORY
2560 if (secluded_for_apps &&
2561 load_result.platform_binary) {
2562 if (strncmp(p->p_name,
2563 "Camera",
2564 sizeof(p->p_name)) == 0) {
2565 task_set_could_use_secluded_mem(task, TRUE);
2566 } else {
2567 task_set_could_use_secluded_mem(task, FALSE);
2568 }
2569 if (strncmp(p->p_name,
2570 "mediaserverd",
2571 sizeof(p->p_name)) == 0) {
2572 task_set_could_also_use_secluded_mem(task, TRUE);
2573 }
2574 if (strncmp(p->p_name,
2575 "cameracaptured",
2576 sizeof(p->p_name)) == 0) {
2577 task_set_could_also_use_secluded_mem(task, TRUE);
2578 }
2579 }
2580 #endif /* CONFIG_SECLUDED_MEMORY */
2581
2582 #if __arm64__
2583 if (load_result.legacy_footprint) {
2584 task_set_legacy_footprint(task);
2585 }
2586 #endif /* __arm64__ */
2587
2588 pal_dbg_set_task_name(task);
2589
2590 #if DEVELOPMENT || DEBUG
2591 /*
2592 * Update the pid an proc name for importance base if any
2593 */
2594 task_importance_update_owner_info(task);
2595 #endif
2596
2597 proc_setexecutableuuid(p, &load_result.uuid[0]);
2598
2599 #if CONFIG_DTRACE
2600 dtrace_proc_exec(p);
2601 #endif
2602
2603 if (kdebug_enable) {
2604 long args[4] = {};
2605
2606 uintptr_t fsid = 0, fileid = 0;
2607 if (imgp->ip_vattr) {
2608 uint64_t fsid64 = vnode_get_va_fsid(imgp->ip_vattr);
2609 fsid = (uintptr_t)fsid64;
2610 fileid = (uintptr_t)imgp->ip_vattr->va_fileid;
2611 // check for (unexpected) overflow and trace zero in that case
2612 if (fsid != fsid64 || fileid != imgp->ip_vattr->va_fileid) {
2613 fsid = fileid = 0;
2614 }
2615 }
2616 KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, proc_getpid(p), fsid, fileid, 0,
2617 (uintptr_t)thread_tid(thread));
2618
2619 extern void kdebug_proc_name_args(struct proc *proc, long args[static 4]);
2620 kdebug_proc_name_args(p, args);
2621 KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[0], args[1],
2622 args[2], args[3], (uintptr_t)thread_tid(thread));
2623 }
2624
2625
2626 /*
2627 * If posix_spawned with the START_SUSPENDED flag, stop the
2628 * process before it runs.
2629 */
2630 if (imgp->ip_px_sa != NULL) {
2631 psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
2632 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
2633 proc_lock(p);
2634 p->p_stat = SSTOP;
2635 proc_unlock(p);
2636 (void) task_suspend_internal(task);
2637 }
2638 }
2639
2640 /*
2641 * mark as execed
2642 */
2643 OSBitOrAtomic(P_EXEC, &p->p_flag);
2644 proc_resetregister(p);
2645 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
2646 proc_lock(p);
2647 p->p_lflag &= ~P_LPPWAIT;
2648 proc_unlock(p);
2649 wakeup((caddr_t)p->p_pptr);
2650 }
2651
2652 /*
2653 * Set up dext coredumps on kernel panic.
2654 * This requires the following:
2655 * - dext_panic_coredump=1 boot-arg (enabled by default on DEVELOPMENT, DEBUG and certain Seed builds)
2656 * - process must be a driver
2657 * - process must have the com.apple.private.enable-coredump-on-panic entitlement, and the
2658 * entitlement has a string value.
2659 * - process must have the com.apple.private.enable-coredump-on-panic-seed-privacy-approved
2660 * entitlement (Seed builds only).
2661 *
2662 * The core dump file name is formatted with the entitlement string value, followed by a hyphen
2663 * and the process PID.
2664 */
2665 if (enable_dext_coredumps_on_panic &&
2666 (imgp->ip_flags & IMGPF_DRIVER) != 0 &&
2667 (userspace_coredump_name = IOVnodeGetEntitlement(imgp->ip_vp,
2668 (int64_t)imgp->ip_arch_offset, USERSPACE_COREDUMP_PANIC_ENTITLEMENT)) != NULL) {
2669 size_t userspace_coredump_name_len = strlen(userspace_coredump_name);
2670
2671 char core_name[MACH_CORE_FILEHEADER_NAMELEN];
2672 /* 16 - NULL char - strlen("-") - maximum of 5 digits for pid */
2673 snprintf(core_name, MACH_CORE_FILEHEADER_NAMELEN, "%.9s-%d", userspace_coredump_name, proc_getpid(p));
2674
2675 kern_register_userspace_coredump(task, core_name);
2676
2677 /* Discard the copy of the entitlement */
2678 kfree_data(userspace_coredump_name, userspace_coredump_name_len + 1);
2679 userspace_coredump_name = NULL;
2680 }
2681
2682 goto done;
2683
2684 badtoolate:
2685 /* Don't allow child process to execute any instructions */
2686 if (!spawn) {
2687 {
2688 assert(exec_failure_reason != OS_REASON_NULL);
2689 if (bootarg_execfailurereports) {
2690 set_proc_name(imgp, current_proc());
2691 }
2692 psignal_with_reason(current_proc(), SIGKILL, exec_failure_reason);
2693 exec_failure_reason = OS_REASON_NULL;
2694
2695 if (exec) {
2696 /* Terminate the exec copy task */
2697 task_terminate_internal(task);
2698 }
2699 }
2700
2701 /* We can't stop this system call at this point, so just pretend we succeeded */
2702 error = 0;
2703 } else {
2704 os_reason_free(exec_failure_reason);
2705 exec_failure_reason = OS_REASON_NULL;
2706 }
2707
2708 done:
2709 if (load_result.threadstate) {
2710 kfree_data(load_result.threadstate, load_result.threadstate_sz);
2711 load_result.threadstate = NULL;
2712 }
2713
2714 bad:
2715 /* If we hit this, we likely would have leaked an exit reason */
2716 assert(exec_failure_reason == OS_REASON_NULL);
2717 return error;
2718 }
2719
2720
2721
2722
2723 /*
2724 * Our image activator table; this is the table of the image types we are
2725 * capable of loading. We list them in order of preference to ensure the
2726 * fastest image load speed.
2727 *
2728 * XXX hardcoded, for now; should use linker sets
2729 */
2730 struct execsw {
2731 int(*const ex_imgact)(struct image_params *);
2732 const char *ex_name;
2733 }const execsw[] = {
2734 { exec_mach_imgact, "Mach-o Binary" },
2735 { exec_fat_imgact, "Fat Binary" },
2736 { exec_shell_imgact, "Interpreter Script" },
2737 { NULL, NULL}
2738 };
2739
2740
2741 /*
2742 * exec_activate_image
2743 *
2744 * Description: Iterate through the available image activators, and activate
2745 * the image associated with the imgp structure. We start with
2746 * the activator for Mach-o binaries followed by that for Fat binaries
2747 * for Interpreter scripts.
2748 *
2749 * Parameters: struct image_params * Image parameter block
2750 *
2751 * Returns: 0 Success
2752 * ENOEXEC No activator for image.
2753 * EBADEXEC The executable is corrupt/unknown
2754 * execargs_alloc:EINVAL Invalid argument
2755 * execargs_alloc:EACCES Permission denied
2756 * execargs_alloc:EINTR Interrupted function
2757 * execargs_alloc:ENOMEM Not enough space
2758 * exec_save_path:EFAULT Bad address
2759 * exec_save_path:ENAMETOOLONG Filename too long
2760 * exec_check_permissions:EACCES Permission denied
2761 * exec_check_permissions:ENOEXEC Executable file format error
2762 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
2763 * exec_check_permissions:???
2764 * namei:???
2765 * vn_rdwr:??? [anything vn_rdwr can return]
2766 * <ex_imgact>:??? [anything an imgact can return]
2767 * EDEADLK Process is being terminated
2768 */
2769 static int
exec_activate_image(struct image_params * imgp)2770 exec_activate_image(struct image_params *imgp)
2771 {
2772 struct nameidata *ndp = NULL;
2773 const char *excpath;
2774 int error;
2775 int resid;
2776 int once = 1; /* save SGUID-ness for interpreted files */
2777 int i;
2778 int itercount = 0;
2779 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2780
2781 /*
2782 * For exec, the translock needs to be taken on old proc and not
2783 * on new shadow proc.
2784 */
2785 if (imgp->ip_flags & IMGPF_EXEC) {
2786 p = current_proc();
2787 }
2788
2789 error = execargs_alloc(imgp);
2790 if (error) {
2791 goto bad_notrans;
2792 }
2793
2794 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
2795 if (error) {
2796 goto bad_notrans;
2797 }
2798
2799 /* Use excpath, which contains the copyin-ed exec path */
2800 DTRACE_PROC1(exec, uintptr_t, excpath);
2801
2802 ndp = kalloc_type(struct nameidata, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2803
2804 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2805 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
2806
2807 again:
2808 error = namei(ndp);
2809 if (error) {
2810 if (error == ERESTART) {
2811 error = EINTR;
2812 }
2813 goto bad_notrans;
2814 }
2815 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
2816 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
2817
2818 /*
2819 * Before we start the transition from binary A to binary B, make
2820 * sure another thread hasn't started exiting the process. We grab
2821 * the proc lock to check p_lflag initially, and the transition
2822 * mechanism ensures that the value doesn't change after we release
2823 * the lock.
2824 */
2825 proc_lock(p);
2826 if (p->p_lflag & P_LEXIT) {
2827 error = EDEADLK;
2828 proc_unlock(p);
2829 goto bad_notrans;
2830 }
2831 error = proc_transstart(p, 1, 0);
2832 proc_unlock(p);
2833 if (error) {
2834 goto bad_notrans;
2835 }
2836
2837 error = exec_check_permissions(imgp);
2838 if (error) {
2839 goto bad;
2840 }
2841
2842 /* Copy; avoid invocation of an interpreter overwriting the original */
2843 if (once) {
2844 once = 0;
2845 *imgp->ip_origvattr = *imgp->ip_vattr;
2846 }
2847
2848 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
2849 UIO_SYSSPACE, IO_NODELOCKED,
2850 vfs_context_ucred(imgp->ip_vfs_context),
2851 &resid, vfs_context_proc(imgp->ip_vfs_context));
2852 if (error) {
2853 goto bad;
2854 }
2855
2856 if (resid) {
2857 memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
2858 }
2859
2860 encapsulated_binary:
2861 /* Limit the number of iterations we will attempt on each binary */
2862 if (++itercount > EAI_ITERLIMIT) {
2863 error = EBADEXEC;
2864 goto bad;
2865 }
2866 error = -1;
2867 for (i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
2868 error = (*execsw[i].ex_imgact)(imgp);
2869
2870 switch (error) {
2871 /* case -1: not claimed: continue */
2872 case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
2873 goto encapsulated_binary;
2874
2875 case -3: /* Interpreter */
2876 #if CONFIG_MACF
2877 /*
2878 * Copy the script label for later use. Note that
2879 * the label can be different when the script is
2880 * actually read by the interpreter.
2881 */
2882 if (imgp->ip_scriptlabelp) {
2883 mac_vnode_label_free(imgp->ip_scriptlabelp);
2884 imgp->ip_scriptlabelp = NULL;
2885 }
2886 imgp->ip_scriptlabelp = mac_vnode_label_alloc(NULL);
2887 if (imgp->ip_scriptlabelp == NULL) {
2888 error = ENOMEM;
2889 break;
2890 }
2891 mac_vnode_label_copy(mac_vnode_label(imgp->ip_vp),
2892 imgp->ip_scriptlabelp);
2893
2894 /*
2895 * Take a ref of the script vnode for later use.
2896 */
2897 if (imgp->ip_scriptvp) {
2898 vnode_put(imgp->ip_scriptvp);
2899 imgp->ip_scriptvp = NULLVP;
2900 }
2901 if (vnode_getwithref(imgp->ip_vp) == 0) {
2902 imgp->ip_scriptvp = imgp->ip_vp;
2903 }
2904 #endif
2905
2906 nameidone(ndp);
2907
2908 vnode_put(imgp->ip_vp);
2909 imgp->ip_vp = NULL; /* already put */
2910 imgp->ip_ndp = NULL; /* already nameidone */
2911
2912 /* Use excpath, which exec_shell_imgact reset to the interpreter */
2913 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
2914 UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
2915
2916 proc_transend(p, 0);
2917 goto again;
2918
2919 default:
2920 break;
2921 }
2922 }
2923
2924 if (error == -1) {
2925 error = ENOEXEC;
2926 } else if (error == 0) {
2927 if (imgp->ip_flags & IMGPF_INTERPRET && ndp->ni_vp) {
2928 AUDIT_ARG(vnpath, ndp->ni_vp, ARG_VNODE2);
2929 }
2930
2931 /*
2932 * Call out to allow 3rd party notification of exec.
2933 * Ignore result of kauth_authorize_fileop call.
2934 */
2935 if (kauth_authorize_fileop_has_listeners()) {
2936 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
2937 KAUTH_FILEOP_EXEC,
2938 (uintptr_t)ndp->ni_vp, 0);
2939 }
2940 }
2941 bad:
2942 proc_transend(p, 0);
2943
2944 bad_notrans:
2945 if (imgp->ip_strings) {
2946 execargs_free(imgp);
2947 }
2948 if (imgp->ip_ndp) {
2949 nameidone(imgp->ip_ndp);
2950 }
2951 kfree_type(struct nameidata, ndp);
2952
2953 return error;
2954 }
2955
2956
2957 /*
2958 * exec_validate_spawnattr_policy
2959 *
2960 * Description: Validates the entitlements required to set the apptype.
2961 *
2962 * Parameters: int psa_apptype posix spawn attribute apptype
2963 *
2964 * Returns: 0 Success
2965 * EPERM Failure
2966 */
2967 static errno_t
exec_validate_spawnattr_policy(int psa_apptype)2968 exec_validate_spawnattr_policy(int psa_apptype)
2969 {
2970 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
2971 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
2972 if (proctype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
2973 if (!IOCurrentTaskHasEntitlement(POSIX_SPAWN_ENTITLEMENT_DRIVER)) {
2974 return EPERM;
2975 }
2976 }
2977 }
2978
2979 return 0;
2980 }
2981
2982 /*
2983 * exec_handle_spawnattr_policy
2984 *
2985 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
2986 *
2987 * Parameters: proc_t p process to apply attributes to
2988 * int psa_apptype posix spawn attribute apptype
2989 *
2990 * Returns: 0 Success
2991 */
2992 static errno_t
exec_handle_spawnattr_policy(proc_t p,thread_t thread,int psa_apptype,uint64_t psa_qos_clamp,task_role_t psa_darwin_role,struct exec_port_actions * port_actions)2993 exec_handle_spawnattr_policy(proc_t p, thread_t thread, int psa_apptype, uint64_t psa_qos_clamp,
2994 task_role_t psa_darwin_role, struct exec_port_actions *port_actions)
2995 {
2996 int apptype = TASK_APPTYPE_NONE;
2997 int qos_clamp = THREAD_QOS_UNSPECIFIED;
2998 task_role_t role = TASK_UNSPECIFIED;
2999
3000 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
3001 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
3002
3003 switch (proctype) {
3004 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
3005 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
3006 break;
3007 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
3008 apptype = TASK_APPTYPE_DAEMON_STANDARD;
3009 break;
3010 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
3011 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
3012 break;
3013 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
3014 apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
3015 break;
3016 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
3017 apptype = TASK_APPTYPE_APP_DEFAULT;
3018 break;
3019 case POSIX_SPAWN_PROC_TYPE_APP_NONUI:
3020 apptype = TASK_APPTYPE_APP_NONUI;
3021 break;
3022 case POSIX_SPAWN_PROC_TYPE_DRIVER:
3023 apptype = TASK_APPTYPE_DRIVER;
3024 break;
3025 default:
3026 apptype = TASK_APPTYPE_NONE;
3027 /* TODO: Should an invalid value here fail the spawn? */
3028 break;
3029 }
3030 }
3031
3032 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
3033 switch (psa_qos_clamp) {
3034 case POSIX_SPAWN_PROC_CLAMP_UTILITY:
3035 qos_clamp = THREAD_QOS_UTILITY;
3036 break;
3037 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
3038 qos_clamp = THREAD_QOS_BACKGROUND;
3039 break;
3040 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
3041 qos_clamp = THREAD_QOS_MAINTENANCE;
3042 break;
3043 default:
3044 qos_clamp = THREAD_QOS_UNSPECIFIED;
3045 /* TODO: Should an invalid value here fail the spawn? */
3046 break;
3047 }
3048 }
3049
3050 if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
3051 proc_darwin_role_to_task_role(psa_darwin_role, &role);
3052 }
3053
3054 if (apptype != TASK_APPTYPE_NONE ||
3055 qos_clamp != THREAD_QOS_UNSPECIFIED ||
3056 role != TASK_UNSPECIFIED ||
3057 port_actions->portwatch_count) {
3058 proc_set_task_spawnpolicy(proc_task(p), thread, apptype, qos_clamp, role,
3059 port_actions->portwatch_array, port_actions->portwatch_count);
3060 }
3061
3062 if (port_actions->registered_count) {
3063 if (_kernelrpc_mach_ports_register3(proc_task(p),
3064 port_actions->registered_array[0],
3065 port_actions->registered_array[1],
3066 port_actions->registered_array[2])) {
3067 return EINVAL;
3068 }
3069 /* mach_ports_register() consumed the array */
3070 bzero(port_actions->registered_array,
3071 sizeof(port_actions->registered_array));
3072 port_actions->registered_count = 0;
3073 }
3074
3075 return 0;
3076 }
3077
3078 static void
exec_port_actions_destroy(struct exec_port_actions * port_actions)3079 exec_port_actions_destroy(struct exec_port_actions *port_actions)
3080 {
3081 if (port_actions->excport_array) {
3082 for (uint32_t i = 0; i < port_actions->exception_port_count; i++) {
3083 ipc_port_t port = NULL;
3084 if ((port = port_actions->excport_array[i].port) != NULL) {
3085 ipc_port_release_send(port);
3086 }
3087 }
3088 kfree_type(struct exception_port_action_t, port_actions->exception_port_count,
3089 port_actions->excport_array);
3090 }
3091
3092 if (port_actions->portwatch_array) {
3093 for (uint32_t i = 0; i < port_actions->portwatch_count; i++) {
3094 ipc_port_t port = NULL;
3095 if ((port = port_actions->portwatch_array[i]) != NULL) {
3096 ipc_port_release_send(port);
3097 }
3098 }
3099 kfree_type(ipc_port_t, port_actions->portwatch_count,
3100 port_actions->portwatch_array);
3101 }
3102
3103 for (uint32_t i = 0; i < port_actions->registered_count; i++) {
3104 ipc_port_t port = NULL;
3105 if ((port = port_actions->registered_array[i]) != NULL) {
3106 ipc_port_release_send(port);
3107 }
3108 }
3109 }
3110
3111 /*
3112 * exec_handle_port_actions
3113 *
3114 * Description: Go through the _posix_port_actions_t contents,
3115 * calling task_set_special_port, task_set_exception_ports
3116 * and/or audit_session_spawnjoin for the current task.
3117 *
3118 * Parameters: struct image_params * Image parameter block
3119 *
3120 * Returns: 0 Success
3121 * EINVAL Failure
3122 * ENOTSUP Illegal posix_spawn attr flag was set
3123 */
3124 static errno_t
exec_handle_port_actions(struct image_params * imgp,struct exec_port_actions * actions)3125 exec_handle_port_actions(struct image_params *imgp,
3126 struct exec_port_actions *actions)
3127 {
3128 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
3129 #if CONFIG_AUDIT
3130 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3131 #endif
3132 _ps_port_action_t *act = NULL;
3133 task_t task = get_threadtask(imgp->ip_new_thread);
3134 ipc_port_t port = NULL;
3135 errno_t ret = 0;
3136 int i = 0, portwatch_i = 0, registered_i = 0, excport_i = 0;
3137 kern_return_t kr;
3138 boolean_t task_has_watchport_boost = task_has_watchports(current_task());
3139 boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC);
3140 int ptrauth_task_port_count = 0;
3141
3142 for (i = 0; i < pacts->pspa_count; i++) {
3143 act = &pacts->pspa_actions[i];
3144
3145 switch (act->port_type) {
3146 case PSPA_SPECIAL:
3147 #if CONFIG_AUDIT
3148 case PSPA_AU_SESSION:
3149 #endif
3150 break;
3151 case PSPA_EXCEPTION:
3152 if (++actions->exception_port_count > TASK_MAX_EXCEPTION_PORT_COUNT) {
3153 ret = EINVAL;
3154 goto done;
3155 }
3156 break;
3157 case PSPA_IMP_WATCHPORTS:
3158 if (++actions->portwatch_count > TASK_MAX_WATCHPORT_COUNT) {
3159 ret = EINVAL;
3160 goto done;
3161 }
3162 break;
3163 case PSPA_REGISTERED_PORTS:
3164 if (++actions->registered_count > TASK_PORT_REGISTER_MAX) {
3165 ret = EINVAL;
3166 goto done;
3167 }
3168 break;
3169 case PSPA_PTRAUTH_TASK_PORT:
3170 if (++ptrauth_task_port_count > 1) {
3171 ret = EINVAL;
3172 goto done;
3173 }
3174 break;
3175 default:
3176 ret = EINVAL;
3177 goto done;
3178 }
3179 }
3180
3181 if (actions->exception_port_count) {
3182 actions->excport_array = kalloc_type(struct exception_port_action_t,
3183 actions->exception_port_count, Z_WAITOK | Z_ZERO);
3184
3185 if (actions->excport_array == NULL) {
3186 ret = ENOMEM;
3187 goto done;
3188 }
3189 }
3190 if (actions->portwatch_count) {
3191 if (in_exec && task_has_watchport_boost) {
3192 ret = EINVAL;
3193 goto done;
3194 }
3195 actions->portwatch_array = kalloc_type(ipc_port_t,
3196 actions->portwatch_count, Z_WAITOK | Z_ZERO);
3197 if (actions->portwatch_array == NULL) {
3198 ret = ENOMEM;
3199 goto done;
3200 }
3201 }
3202
3203 for (i = 0; i < pacts->pspa_count; i++) {
3204 act = &pacts->pspa_actions[i];
3205
3206 if (MACH_PORT_VALID(act->new_port)) {
3207 kr = ipc_typed_port_copyin_send(get_task_ipcspace(current_task()),
3208 act->new_port, IOT_ANY, &port);
3209
3210 if (kr != KERN_SUCCESS) {
3211 ret = EINVAL;
3212 goto done;
3213 }
3214 } else {
3215 /* it's NULL or DEAD */
3216 port = CAST_MACH_NAME_TO_PORT(act->new_port);
3217 }
3218
3219 switch (act->port_type) {
3220 case PSPA_SPECIAL:
3221 kr = task_set_special_port(task, act->which, port);
3222
3223 if (kr != KERN_SUCCESS) {
3224 ret = EINVAL;
3225 }
3226 break;
3227
3228 #if CONFIG_AUDIT
3229 case PSPA_AU_SESSION:
3230 ret = audit_session_spawnjoin(p, port);
3231 if (ret) {
3232 /* audit_session_spawnjoin() has already dropped the reference in case of error. */
3233 goto done;
3234 }
3235
3236 break;
3237 #endif
3238 case PSPA_EXCEPTION:
3239 assert(excport_i < actions->exception_port_count);
3240 /* hold on to this till end of spawn */
3241 actions->excport_array[excport_i].port_action = act;
3242 actions->excport_array[excport_i].port = port;
3243 excport_i++;
3244 break;
3245 case PSPA_IMP_WATCHPORTS:
3246 assert(portwatch_i < actions->portwatch_count);
3247 /* hold on to this till end of spawn */
3248 actions->portwatch_array[portwatch_i++] = port;
3249 break;
3250 case PSPA_REGISTERED_PORTS:
3251 assert(registered_i < actions->registered_count);
3252 /* hold on to this till end of spawn */
3253 actions->registered_array[registered_i++] = port;
3254 break;
3255
3256 case PSPA_PTRAUTH_TASK_PORT:
3257 #if (DEVELOPMENT || DEBUG)
3258 #if defined(HAS_APPLE_PAC)
3259 {
3260 task_t ptr_auth_task = convert_port_to_task(port);
3261
3262 if (ptr_auth_task == TASK_NULL) {
3263 ret = EINVAL;
3264 break;
3265 }
3266
3267 imgp->ip_inherited_shared_region_id =
3268 task_get_vm_shared_region_id_and_jop_pid(ptr_auth_task,
3269 &imgp->ip_inherited_jop_pid);
3270
3271 /* Deallocate task ref returned by convert_port_to_task */
3272 task_deallocate(ptr_auth_task);
3273 }
3274 #endif /* HAS_APPLE_PAC */
3275 #endif /* (DEVELOPMENT || DEBUG) */
3276
3277 /* consume the port right in case of success */
3278 ipc_port_release_send(port);
3279 break;
3280 default:
3281 ret = EINVAL;
3282 break;
3283 }
3284
3285 if (ret) {
3286 /* action failed, so release port resources */
3287 ipc_port_release_send(port);
3288 break;
3289 }
3290 }
3291
3292 done:
3293 if (0 != ret) {
3294 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
3295 }
3296 return ret;
3297 }
3298
3299
3300 /*
3301 * exec_handle_exception_port_actions
3302 *
3303 * Description: Go through the saved exception ports in exec_port_actions,
3304 * calling task_set_exception_ports for the current Task.
3305 * This must happen after image activation, and after exec_resettextvp()
3306 * because task_set_exception_ports checks the `TF_PLATFORM` bit and entitlements.
3307 *
3308 * Parameters: struct image_params * Image parameter block
3309 * struct exec_port_actions * Saved Port Actions
3310 *
3311 * Returns: 0 Success
3312 * EINVAL task_set_exception_ports failed
3313 */
3314 static errno_t
exec_handle_exception_port_actions(const struct image_params * imgp,const struct exec_port_actions * actions)3315 exec_handle_exception_port_actions(const struct image_params *imgp,
3316 const struct exec_port_actions *actions)
3317 {
3318 task_t task = get_threadtask(imgp->ip_new_thread);
3319
3320 for (int i = 0; i < actions->exception_port_count; i++) {
3321 ipc_port_t port = actions->excport_array[i].port;
3322 _ps_port_action_t *act = actions->excport_array[i].port_action;
3323 assert(act != NULL);
3324 kern_return_t kr = task_set_exception_ports(task, act->mask, port,
3325 act->behavior, act->flavor);
3326 if (kr != KERN_SUCCESS) {
3327 DTRACE_PROC1(spawn__exception__port__failure, mach_port_name_t, act->new_port);
3328 return EINVAL;
3329 }
3330 actions->excport_array[i].port = NULL;
3331 }
3332
3333 return 0;
3334 }
3335
3336
3337 /*
3338 * exec_handle_file_actions
3339 *
3340 * Description: Go through the _posix_file_actions_t contents applying the
3341 * open, close, and dup2 operations to the open file table for
3342 * the current process.
3343 *
3344 * Parameters: struct image_params * Image parameter block
3345 *
3346 * Returns: 0 Success
3347 * ???
3348 *
3349 * Note: Actions are applied in the order specified, with the credential
3350 * of the parent process. This is done to permit the parent
3351 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
3352 * the child following operations the child may in fact not be
3353 * normally permitted to perform.
3354 */
3355 static int
exec_handle_file_actions(struct image_params * imgp,short psa_flags)3356 exec_handle_file_actions(struct image_params *imgp, short psa_flags)
3357 {
3358 int error = 0;
3359 int action;
3360 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3361 kauth_cred_t p_cred = vfs_context_ucred(imgp->ip_vfs_context);
3362 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
3363 int ival[2]; /* dummy retval for system calls) */
3364 #if CONFIG_AUDIT
3365 struct uthread *uthread = current_uthread();
3366 #endif
3367
3368 for (action = 0; action < px_sfap->psfa_act_count; action++) {
3369 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
3370
3371 switch (psfa->psfaa_type) {
3372 case PSFA_OPEN: {
3373 /*
3374 * Open is different, in that it requires the use of
3375 * a path argument, which is normally copied in from
3376 * user space; because of this, we have to support an
3377 * open from kernel space that passes an address space
3378 * context of UIO_SYSSPACE, and casts the address
3379 * argument to a user_addr_t.
3380 */
3381 struct vnode_attr *vap;
3382 struct nameidata *ndp;
3383 int mode = psfa->psfaa_openargs.psfao_mode;
3384 int origfd;
3385 struct {
3386 struct vnode_attr va;
3387 struct nameidata nd;
3388 } *__open_data;
3389
3390 __open_data = kalloc_type(typeof(*__open_data), Z_WAITOK | Z_ZERO);
3391 if (__open_data == NULL) {
3392 error = ENOMEM;
3393 break;
3394 }
3395
3396 vap = &__open_data->va;
3397 ndp = &__open_data->nd;
3398
3399 VATTR_INIT(vap);
3400 /* Mask off all but regular access permissions */
3401 mode = ((mode & ~p->p_fd.fd_cmask) & ALLPERMS) & ~S_ISTXT;
3402 VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
3403
3404 AUDIT_SUBCALL_ENTER(OPEN, p, uthread);
3405
3406 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
3407 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
3408 imgp->ip_vfs_context);
3409
3410 error = open1(imgp->ip_vfs_context, ndp,
3411 psfa->psfaa_openargs.psfao_oflag,
3412 vap, NULL, NULL, &origfd, AUTH_OPEN_NOAUTHFD);
3413
3414 kfree_type(typeof(*__open_data), __open_data);
3415
3416 AUDIT_SUBCALL_EXIT(uthread, error);
3417
3418 /*
3419 * If there's an error, or we get the right fd by
3420 * accident, then drop out here. This is easier than
3421 * reworking all the open code to preallocate fd
3422 * slots, and internally taking one as an argument.
3423 */
3424 if (error || origfd == psfa->psfaa_filedes) {
3425 break;
3426 }
3427
3428 /*
3429 * If we didn't fall out from an error, we ended up
3430 * with the wrong fd; so now we've got to try to dup2
3431 * it to the right one.
3432 */
3433 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
3434 error = dup2(p, p_cred, origfd, psfa->psfaa_filedes, ival);
3435 AUDIT_SUBCALL_EXIT(uthread, error);
3436 if (error) {
3437 break;
3438 }
3439
3440 /*
3441 * Finally, close the original fd.
3442 */
3443 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
3444 error = close_nocancel(p, p_cred, origfd);
3445 AUDIT_SUBCALL_EXIT(uthread, error);
3446 }
3447 break;
3448
3449 case PSFA_DUP2: {
3450 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
3451 error = dup2(p, p_cred, psfa->psfaa_filedes,
3452 psfa->psfaa_dup2args.psfad_newfiledes, ival);
3453 AUDIT_SUBCALL_EXIT(uthread, error);
3454 }
3455 break;
3456
3457 case PSFA_FILEPORT_DUP2: {
3458 ipc_port_t port;
3459 kern_return_t kr;
3460 int origfd;
3461
3462 if (!MACH_PORT_VALID(psfa->psfaa_fileport)) {
3463 error = EINVAL;
3464 break;
3465 }
3466
3467 kr = ipc_typed_port_copyin_send(get_task_ipcspace(current_task()),
3468 psfa->psfaa_fileport, IKOT_FILEPORT, &port);
3469
3470 if (kr != KERN_SUCCESS) {
3471 error = EINVAL;
3472 break;
3473 }
3474
3475 error = fileport_makefd(p, port, 0, &origfd);
3476
3477 if (IPC_PORT_NULL != port) {
3478 ipc_typed_port_release_send(port, IKOT_FILEPORT);
3479 }
3480
3481 if (error || origfd == psfa->psfaa_dup2args.psfad_newfiledes) {
3482 break;
3483 }
3484
3485 AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
3486 error = dup2(p, p_cred, origfd,
3487 psfa->psfaa_dup2args.psfad_newfiledes, ival);
3488 AUDIT_SUBCALL_EXIT(uthread, error);
3489 if (error) {
3490 break;
3491 }
3492
3493 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
3494 error = close_nocancel(p, p_cred, origfd);
3495 AUDIT_SUBCALL_EXIT(uthread, error);
3496 }
3497 break;
3498
3499 case PSFA_CLOSE: {
3500 AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
3501 error = close_nocancel(p, p_cred, psfa->psfaa_filedes);
3502 AUDIT_SUBCALL_EXIT(uthread, error);
3503 }
3504 break;
3505
3506 case PSFA_INHERIT: {
3507 struct fileproc *fp;
3508
3509 /*
3510 * Check to see if the descriptor exists, and
3511 * ensure it's -not- marked as close-on-exec.
3512 *
3513 * Attempting to "inherit" a guarded fd will
3514 * result in a error.
3515 */
3516
3517 proc_fdlock(p);
3518 if ((fp = fp_get_noref_locked(p, psfa->psfaa_filedes)) == NULL) {
3519 error = EBADF;
3520 } else if (fp->fp_guard_attrs) {
3521 error = fp_guard_exception(p, psfa->psfaa_filedes,
3522 fp, kGUARD_EXC_NOCLOEXEC);
3523 } else {
3524 fp->fp_flags &= ~FP_CLOEXEC;
3525 error = 0;
3526 }
3527 proc_fdunlock(p);
3528 }
3529 break;
3530
3531 case PSFA_CHDIR: {
3532 /*
3533 * Chdir is different, in that it requires the use of
3534 * a path argument, which is normally copied in from
3535 * user space; because of this, we have to support a
3536 * chdir from kernel space that passes an address space
3537 * context of UIO_SYSSPACE, and casts the address
3538 * argument to a user_addr_t.
3539 */
3540 struct nameidata *nd;
3541 nd = kalloc_type(struct nameidata,
3542 Z_WAITOK | Z_ZERO | Z_NOFAIL);
3543
3544 AUDIT_SUBCALL_ENTER(CHDIR, p, uthread);
3545 NDINIT(nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
3546 CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path),
3547 imgp->ip_vfs_context);
3548
3549 error = chdir_internal(p, imgp->ip_vfs_context, nd, 0);
3550 kfree_type(struct nameidata, nd);
3551 AUDIT_SUBCALL_EXIT(uthread, error);
3552 }
3553 break;
3554
3555 case PSFA_FCHDIR: {
3556 AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread);
3557 error = fchdir(p, imgp->ip_vfs_context,
3558 psfa->psfaa_filedes, false);
3559 AUDIT_SUBCALL_EXIT(uthread, error);
3560 }
3561 break;
3562
3563 default:
3564 error = EINVAL;
3565 break;
3566 }
3567
3568 /* All file actions failures are considered fatal, per POSIX */
3569
3570 if (error) {
3571 if (PSFA_OPEN == psfa->psfaa_type) {
3572 DTRACE_PROC1(spawn__open__failure, uintptr_t,
3573 psfa->psfaa_openargs.psfao_path);
3574 } else {
3575 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
3576 }
3577 break;
3578 }
3579 }
3580
3581 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) {
3582 return error;
3583 }
3584
3585 /*
3586 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
3587 * this spawn only) as if "close on exec" is the default
3588 * disposition of all pre-existing file descriptors. In this case,
3589 * the list of file descriptors mentioned in the file actions
3590 * are the only ones that can be inherited, so mark them now.
3591 *
3592 * The actual closing part comes later, in fdt_exec().
3593 */
3594 proc_fdlock(p);
3595 for (action = 0; action < px_sfap->psfa_act_count; action++) {
3596 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
3597 int fd = psfa->psfaa_filedes;
3598
3599 switch (psfa->psfaa_type) {
3600 case PSFA_DUP2:
3601 case PSFA_FILEPORT_DUP2:
3602 fd = psfa->psfaa_dup2args.psfad_newfiledes;
3603 OS_FALLTHROUGH;
3604 case PSFA_OPEN:
3605 case PSFA_INHERIT:
3606 *fdflags(p, fd) |= UF_INHERIT;
3607 break;
3608
3609 case PSFA_CLOSE:
3610 case PSFA_CHDIR:
3611 case PSFA_FCHDIR:
3612 /*
3613 * Although PSFA_FCHDIR does have a file descriptor, it is not
3614 * *creating* one, thus we do not automatically mark it for
3615 * inheritance under POSIX_SPAWN_CLOEXEC_DEFAULT. A client that
3616 * wishes it to be inherited should use the PSFA_INHERIT action
3617 * explicitly.
3618 */
3619 break;
3620 }
3621 }
3622 proc_fdunlock(p);
3623
3624 return 0;
3625 }
3626
3627 #if CONFIG_MACF
3628 /*
3629 * Check that the extension's data is within the bounds of the
3630 * allocation storing all extensions' data
3631 */
3632 static inline errno_t
exec_spawnattr_validate_policyext_data(const struct ip_px_smpx_s * px_s,const _ps_mac_policy_extension_t * ext)3633 exec_spawnattr_validate_policyext_data(const struct ip_px_smpx_s *px_s,
3634 const _ps_mac_policy_extension_t *ext)
3635 {
3636 uint64_t dataend;
3637
3638 if (__improbable(os_add_overflow(ext->dataoff, ext->datalen, &dataend))) {
3639 return EOVERFLOW;
3640 }
3641 if (__improbable(dataend > px_s->datalen)) {
3642 return EINVAL;
3643 }
3644
3645 return 0;
3646 }
3647
3648 /*
3649 * exec_spawnattr_getmacpolicyinfo
3650 */
3651 void *
exec_spawnattr_getmacpolicyinfo(const void * macextensions,const char * policyname,size_t * lenp)3652 exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
3653 {
3654 const struct ip_px_smpx_s *px_s = macextensions;
3655 const struct _posix_spawn_mac_policy_extensions *psmx = NULL;
3656 int i;
3657
3658 if (px_s == NULL) {
3659 return NULL;
3660 }
3661
3662 psmx = px_s->array;
3663 if (psmx == NULL) {
3664 return NULL;
3665 }
3666
3667 for (i = 0; i < psmx->psmx_count; i++) {
3668 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
3669 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
3670 if (__improbable(exec_spawnattr_validate_policyext_data(px_s, extension))) {
3671 panic("invalid mac policy extension data");
3672 }
3673 if (lenp != NULL) {
3674 *lenp = (size_t)extension->datalen;
3675 }
3676 return (void *)((uintptr_t)px_s->data + extension->dataoff);
3677 }
3678 }
3679
3680 if (lenp != NULL) {
3681 *lenp = 0;
3682 }
3683 return NULL;
3684 }
3685
3686 static int
spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc * px_args,struct ip_px_smpx_s * pxsp)3687 spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args,
3688 struct ip_px_smpx_s *pxsp)
3689 {
3690 _posix_spawn_mac_policy_extensions_t psmx = NULL;
3691 uint8_t *data = NULL;
3692 uint64_t datalen = 0;
3693 uint64_t dataoff = 0;
3694 int error = 0;
3695
3696 bzero(pxsp, sizeof(*pxsp));
3697
3698 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
3699 px_args->mac_extensions_size > PAGE_SIZE) {
3700 error = EINVAL;
3701 goto bad;
3702 }
3703
3704 psmx = kalloc_data(px_args->mac_extensions_size, Z_WAITOK);
3705 if (psmx == NULL) {
3706 error = ENOMEM;
3707 goto bad;
3708 }
3709
3710 error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size);
3711 if (error) {
3712 goto bad;
3713 }
3714
3715 size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count);
3716 if (extsize == 0 || extsize > px_args->mac_extensions_size) {
3717 error = EINVAL;
3718 goto bad;
3719 }
3720
3721 for (int i = 0; i < psmx->psmx_count; i++) {
3722 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
3723 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
3724 error = EINVAL;
3725 goto bad;
3726 }
3727 if (__improbable(os_add_overflow(datalen, extension->datalen, &datalen))) {
3728 error = ENOMEM;
3729 goto bad;
3730 }
3731 }
3732
3733 data = kalloc_data((vm_size_t)datalen, Z_WAITOK);
3734 if (data == NULL) {
3735 error = ENOMEM;
3736 goto bad;
3737 }
3738
3739 for (int i = 0; i < psmx->psmx_count; i++) {
3740 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
3741
3742 #if !__LP64__
3743 if (extension->data > UINT32_MAX) {
3744 goto bad;
3745 }
3746 #endif
3747 error = copyin((user_addr_t)extension->data, &data[dataoff], (size_t)extension->datalen);
3748 if (error) {
3749 error = ENOMEM;
3750 goto bad;
3751 }
3752 extension->dataoff = dataoff;
3753 dataoff += extension->datalen;
3754 }
3755
3756 pxsp->array = psmx;
3757 pxsp->data = data;
3758 pxsp->datalen = datalen;
3759 return 0;
3760
3761 bad:
3762 kfree_data(psmx, px_args->mac_extensions_size);
3763 kfree_data(data, (vm_size_t)datalen);
3764 return error;
3765 }
3766 #endif /* CONFIG_MACF */
3767
3768 #if CONFIG_COALITIONS
3769 static inline void
spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])3770 spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
3771 {
3772 for (int c = 0; c < COALITION_NUM_TYPES; c++) {
3773 if (coal[c]) {
3774 coalition_remove_active(coal[c]);
3775 coalition_release(coal[c]);
3776 }
3777 }
3778 }
3779 #endif
3780
3781 #if CONFIG_PERSONAS
3782 static int
spawn_validate_persona(struct _posix_spawn_persona_info * px_persona)3783 spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
3784 {
3785 int error = 0;
3786 struct persona *persona = NULL;
3787 kauth_cred_t mycred = kauth_cred_get();
3788
3789 if (!IOCurrentTaskHasEntitlement( PERSONA_MGMT_ENTITLEMENT)) {
3790 return EPERM;
3791 }
3792
3793 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
3794 if (px_persona->pspi_ngroups > NGROUPS_MAX) {
3795 return EINVAL;
3796 }
3797 }
3798
3799 persona = persona_lookup(px_persona->pspi_id);
3800 if (!persona) {
3801 return ESRCH;
3802 }
3803
3804 // non-root process should not be allowed to set persona with uid/gid 0
3805 if (!kauth_cred_issuser(mycred) &&
3806 (px_persona->pspi_uid == 0 || px_persona->pspi_gid == 0)) {
3807 return EPERM;
3808 }
3809
3810 persona_put(persona);
3811 return error;
3812 }
3813
3814 static bool
kauth_cred_model_setpersona(kauth_cred_t model,struct _posix_spawn_persona_info * px_persona)3815 kauth_cred_model_setpersona(
3816 kauth_cred_t model,
3817 struct _posix_spawn_persona_info *px_persona)
3818 {
3819 bool updated = false;
3820
3821 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
3822 updated |= kauth_cred_model_setresuid(model,
3823 px_persona->pspi_uid,
3824 px_persona->pspi_uid,
3825 px_persona->pspi_uid,
3826 KAUTH_UID_NONE);
3827 }
3828
3829 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
3830 updated |= kauth_cred_model_setresgid(model,
3831 px_persona->pspi_gid,
3832 px_persona->pspi_gid,
3833 px_persona->pspi_gid);
3834 }
3835
3836 if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
3837 updated |= kauth_cred_model_setgroups(model,
3838 px_persona->pspi_groups,
3839 px_persona->pspi_ngroups,
3840 px_persona->pspi_gmuid);
3841 }
3842
3843 return updated;
3844 }
3845
3846 static int
spawn_persona_adopt(proc_t p,struct _posix_spawn_persona_info * px_persona)3847 spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
3848 {
3849 struct persona *persona = NULL;
3850
3851 /*
3852 * we want to spawn into the given persona, but we want to override
3853 * the kauth with a different UID/GID combo
3854 */
3855 persona = persona_lookup(px_persona->pspi_id);
3856 if (!persona) {
3857 return ESRCH;
3858 }
3859
3860 return persona_proc_adopt(p, persona,
3861 ^bool (kauth_cred_t parent __unused, kauth_cred_t model) {
3862 return kauth_cred_model_setpersona(model, px_persona);
3863 });
3864 }
3865 #endif
3866
3867 #if __arm64__
3868 #if DEVELOPMENT || DEBUG
3869 TUNABLE(int, legacy_footprint_entitlement_mode, "legacy_footprint_entitlement_mode",
3870 LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE);
3871
3872 __startup_func
3873 static void
legacy_footprint_entitlement_mode_init(void)3874 legacy_footprint_entitlement_mode_init(void)
3875 {
3876 /*
3877 * legacy_footprint_entitlement_mode specifies the behavior we want associated
3878 * with the entitlement. The supported modes are:
3879 *
3880 * LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
3881 * Indicates that we want every process to have the memory accounting
3882 * that is available in iOS 12.0 and beyond.
3883 *
3884 * LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
3885 * Indicates that for every process that has the 'legacy footprint entitlement',
3886 * we want to give it the old iOS 11.0 accounting behavior which accounted some
3887 * of the process's memory to the kernel.
3888 *
3889 * LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
3890 * Indicates that for every process that has the 'legacy footprint entitlement',
3891 * we want it to have a higher memory limit which will help them acclimate to the
3892 * iOS 12.0 (& beyond) accounting behavior that does the right accounting.
3893 * The bonus added to the system-wide task limit to calculate this higher memory limit
3894 * is available in legacy_footprint_bonus_mb.
3895 */
3896
3897 if (legacy_footprint_entitlement_mode < LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE ||
3898 legacy_footprint_entitlement_mode > LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE) {
3899 legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE;
3900 }
3901 }
3902 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, legacy_footprint_entitlement_mode_init);
3903 #else
3904 const int legacy_footprint_entitlement_mode = LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE;
3905 #endif
3906
3907 static inline void
proc_legacy_footprint_entitled(proc_t p,task_t task)3908 proc_legacy_footprint_entitled(proc_t p, task_t task)
3909 {
3910 #pragma unused(p)
3911 boolean_t legacy_footprint_entitled;
3912
3913 switch (legacy_footprint_entitlement_mode) {
3914 case LEGACY_FOOTPRINT_ENTITLEMENT_IGNORE:
3915 /* the entitlement is ignored */
3916 break;
3917 case LEGACY_FOOTPRINT_ENTITLEMENT_IOS11_ACCT:
3918 /* the entitlement grants iOS11 legacy accounting */
3919 legacy_footprint_entitled = memorystatus_task_has_legacy_footprint_entitlement(proc_task(p));
3920 if (legacy_footprint_entitled) {
3921 task_set_legacy_footprint(task);
3922 }
3923 break;
3924 case LEGACY_FOOTPRINT_ENTITLEMENT_LIMIT_INCREASE:
3925 /* the entitlement grants a footprint limit increase */
3926 legacy_footprint_entitled = memorystatus_task_has_legacy_footprint_entitlement(proc_task(p));
3927 if (legacy_footprint_entitled) {
3928 task_set_extra_footprint_limit(task);
3929 }
3930 break;
3931 default:
3932 break;
3933 }
3934 }
3935
3936 static inline void
proc_ios13extended_footprint_entitled(proc_t p,task_t task)3937 proc_ios13extended_footprint_entitled(proc_t p, task_t task)
3938 {
3939 #pragma unused(p)
3940 boolean_t ios13extended_footprint_entitled;
3941
3942 /* the entitlement grants a footprint limit increase */
3943 ios13extended_footprint_entitled = memorystatus_task_has_ios13extended_footprint_limit(proc_task(p));
3944 if (ios13extended_footprint_entitled) {
3945 task_set_ios13extended_footprint_limit(task);
3946 }
3947 }
3948
3949 static inline void
proc_increased_memory_limit_entitled(proc_t p,task_t task)3950 proc_increased_memory_limit_entitled(proc_t p, task_t task)
3951 {
3952 if (memorystatus_task_has_increased_debugging_memory_limit_entitlement(task)) {
3953 memorystatus_act_on_entitled_developer_task_limit(p);
3954 } else if (memorystatus_task_has_increased_memory_limit_entitlement(task)) {
3955 memorystatus_act_on_entitled_task_limit(p);
3956 }
3957 }
3958
3959 /*
3960 * Check for any of the various entitlements that permit a higher
3961 * task footprint limit or alternate accounting and apply them.
3962 */
3963 static inline void
proc_footprint_entitlement_hacks(proc_t p,task_t task)3964 proc_footprint_entitlement_hacks(proc_t p, task_t task)
3965 {
3966 proc_legacy_footprint_entitled(p, task);
3967 proc_ios13extended_footprint_entitled(p, task);
3968 proc_increased_memory_limit_entitled(p, task);
3969 }
3970 #endif /* __arm64__ */
3971
3972 /*
3973 * Processes with certain entitlements are granted a jumbo-size VM map.
3974 */
3975 static inline void
proc_apply_jit_and_vm_policies(struct image_params * imgp,proc_t p,task_t task)3976 proc_apply_jit_and_vm_policies(struct image_params *imgp, proc_t p, task_t task)
3977 {
3978 #if CONFIG_MACF
3979 bool jit_entitled = false;
3980 #endif /* CONFIG_MACF */
3981 bool needs_jumbo_va = false;
3982 bool needs_extra_jumbo_va = false;
3983 struct _posix_spawnattr *psa = imgp->ip_px_sa;
3984
3985 #if CONFIG_MACF
3986 jit_entitled = (mac_proc_check_map_anon(p, proc_ucred_unsafe(p),
3987 0, 0, 0, MAP_JIT, NULL) == 0);
3988 needs_jumbo_va = jit_entitled || IOTaskHasEntitlement(task,
3989 "com.apple.developer.kernel.extended-virtual-addressing") ||
3990 memorystatus_task_has_increased_memory_limit_entitlement(task) ||
3991 memorystatus_task_has_increased_debugging_memory_limit_entitlement(task);
3992 #else
3993 #pragma unused(p)
3994 #endif /* CONFIG_MACF */
3995
3996 #if HAS_MTE
3997 /*
3998 * If we are MTE enabled, communicate to the pmap layer that
3999 * we need the right configuration at each context switch.
4000 */
4001 if (task_has_sec(task)) {
4002 vm_map_set_sec_enabled(get_task_map(task));
4003
4004 #if KERN_AMFI_SUPPORTS_MTE
4005 if (get_lockdown_mode_state() == 0 &&
4006 amfi->has_mte_soft_mode &&
4007 amfi->has_mte_soft_mode(p)) {
4008 EXEC_LOG("AMFI says: enable soft-mode\n");
4009 task_set_sec_soft_mode(task);
4010 }
4011 #endif /* KERN_AMFI_SUPPORTS_MTE */
4012 }
4013
4014 /* Pipe through alias restrictions onto our backing map */
4015 if (task_has_sec_restrict_receiving_aliases_to_tagged_memory(task)) {
4016 vm_map_set_restrict_receiving_aliases_to_tagged_memory(get_task_map(task), true);
4017 }
4018
4019 #endif /* HAS_MTE */
4020
4021 #if HAS_MTE_EMULATION_SHIMS && XNU_TARGET_OS_IOS
4022 if (task_has_sec(task)) {
4023 /* Give Rosetta some breathing room for the shadow table. */
4024 needs_jumbo_va = true;
4025 }
4026 #endif /* HAS_MTE_EMULATION_SHIMS && XNU_TARGET_OS_IOS */
4027 if (needs_jumbo_va) {
4028 vm_map_set_jumbo(get_task_map(task));
4029 }
4030
4031 if (psa && psa->psa_max_addr) {
4032 vm_map_set_max_addr(get_task_map(task), psa->psa_max_addr, false);
4033 }
4034
4035 #if CONFIG_MAP_RANGES
4036 if ((task_has_hardened_heap(task) ||
4037 (task_get_platform_restrictions_version(task) == 1) ||
4038 task_get_platform_binary(task)) && !proc_is_simulated(p)) {
4039 /*
4040 * This must be done last as it needs to observe
4041 * any kind of VA space growth that was requested.
4042 * This is used by the secure allocator, so
4043 * must be applied to all platform restrictions binaries
4044 */
4045 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
4046 needs_extra_jumbo_va = IOTaskHasEntitlement(task,
4047 "com.apple.kernel.large-file-virtual-addressing");
4048 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
4049 vm_map_range_configure(get_task_map(task), needs_extra_jumbo_va);
4050 }
4051 #else
4052 #pragma unused(needs_extra_jumbo_va)
4053 #endif /* CONFIG_MAP_RANGES */
4054
4055 #if CONFIG_MACF
4056 if (jit_entitled) {
4057 vm_map_set_jit_entitled(get_task_map(task));
4058
4059 }
4060 #endif /* CONFIG_MACF */
4061
4062 #if XNU_TARGET_OS_OSX
4063 /* TPRO cannot be enforced on binaries that load 3P plugins on macos - rdar://107420220 */
4064 const bool task_loads_3P_plugins = imgp->ip_flags & IMGPF_3P_PLUGINS;
4065 #endif /* XNU_TARGET_OS_OSX */
4066
4067 if (task_has_tpro(task)
4068 #if XNU_TARGET_OS_OSX
4069 && !task_loads_3P_plugins
4070 #endif /* XNU_TARGET_OS_OSX */
4071 ) {
4072 /*
4073 * Pre-emptively disable TPRO remapping for
4074 * platform restrictions binaries (which do not load 3P plugins)
4075 */
4076 vm_map_set_tpro_enforcement(get_task_map(task));
4077 }
4078 }
4079
4080 static int
spawn_posix_cred_adopt(proc_t p,struct _posix_spawn_posix_cred_info * px_pcred_info)4081 spawn_posix_cred_adopt(proc_t p,
4082 struct _posix_spawn_posix_cred_info *px_pcred_info)
4083 {
4084 int error = 0;
4085
4086 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GID) {
4087 struct setgid_args args = {
4088 .gid = px_pcred_info->pspci_gid,
4089 };
4090 error = setgid(p, &args, NULL);
4091 if (error) {
4092 return error;
4093 }
4094 }
4095
4096 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
4097 error = setgroups_internal(p,
4098 px_pcred_info->pspci_ngroups,
4099 px_pcred_info->pspci_groups,
4100 px_pcred_info->pspci_gmuid);
4101 if (error) {
4102 return error;
4103 }
4104 }
4105
4106 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_UID) {
4107 struct setuid_args args = {
4108 .uid = px_pcred_info->pspci_uid,
4109 };
4110 error = setuid(p, &args, NULL);
4111 if (error) {
4112 return error;
4113 }
4114 }
4115 return 0;
4116 }
4117
4118 /*
4119 * posix_spawn
4120 *
4121 * Parameters: uap->pid Pointer to pid return area
4122 * uap->fname File name to exec
4123 * uap->argp Argument list
4124 * uap->envp Environment list
4125 *
4126 * Returns: 0 Success
4127 * EINVAL Invalid argument
4128 * ENOTSUP Not supported
4129 * ENOEXEC Executable file format error
4130 * exec_activate_image:EINVAL Invalid argument
4131 * exec_activate_image:EACCES Permission denied
4132 * exec_activate_image:EINTR Interrupted function
4133 * exec_activate_image:ENOMEM Not enough space
4134 * exec_activate_image:EFAULT Bad address
4135 * exec_activate_image:ENAMETOOLONG Filename too long
4136 * exec_activate_image:ENOEXEC Executable file format error
4137 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
4138 * exec_activate_image:EAUTH Image decryption failed
4139 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
4140 * exec_activate_image:???
4141 * mac_execve_enter:???
4142 *
4143 * TODO: Expect to need __mac_posix_spawn() at some point...
4144 * Handle posix_spawnattr_t
4145 * Handle posix_spawn_file_actions_t
4146 */
4147 int
posix_spawn(proc_t ap,struct posix_spawn_args * uap,int32_t * retval)4148 posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
4149 {
4150 proc_t p = ap;
4151 user_addr_t pid = uap->pid;
4152 int ival[2]; /* dummy retval for setpgid() */
4153 char *subsystem_root_path = NULL;
4154 struct image_params *imgp = NULL;
4155 struct vnode_attr *vap = NULL;
4156 struct vnode_attr *origvap = NULL;
4157 struct uthread *uthread = 0; /* compiler complains if not set to 0*/
4158 int error, sig;
4159 int is_64 = IS_64BIT_PROCESS(p);
4160 struct vfs_context context;
4161 struct user__posix_spawn_args_desc px_args = {};
4162 struct _posix_spawnattr px_sa = {};
4163 _posix_spawn_file_actions_t px_sfap = NULL;
4164 _posix_spawn_port_actions_t px_spap = NULL;
4165 struct __kern_sigaction vec;
4166 boolean_t spawn_no_exec = FALSE;
4167 boolean_t proc_transit_set = TRUE;
4168 boolean_t proc_signal_set = TRUE;
4169 boolean_t exec_done = FALSE;
4170 os_reason_t exec_failure_reason = NULL;
4171
4172 struct exec_port_actions port_actions = { };
4173 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
4174 task_t old_task = current_task();
4175 task_t new_task = NULL;
4176 boolean_t should_release_proc_ref = FALSE;
4177 void *inherit = NULL;
4178 uint8_t crash_behavior = 0;
4179 uint64_t crash_behavior_deadline = 0;
4180 #if CONFIG_EXCLAVES
4181 char *task_conclave_id = NULL;
4182 #endif
4183 #if CONFIG_PERSONAS
4184 struct _posix_spawn_persona_info *px_persona = NULL;
4185 #endif
4186 struct _posix_spawn_posix_cred_info *px_pcred_info = NULL;
4187 struct {
4188 struct image_params imgp;
4189 struct vnode_attr va;
4190 struct vnode_attr origva;
4191 } *__spawn_data;
4192
4193 /*
4194 * Allocate a big chunk for locals instead of using stack since these
4195 * structures are pretty big.
4196 */
4197 __spawn_data = kalloc_type(typeof(*__spawn_data), Z_WAITOK | Z_ZERO);
4198 if (__spawn_data == NULL) {
4199 error = ENOMEM;
4200 goto bad;
4201 }
4202 imgp = &__spawn_data->imgp;
4203 vap = &__spawn_data->va;
4204 origvap = &__spawn_data->origva;
4205
4206 /* Initialize the common data in the image_params structure */
4207 imgp->ip_user_fname = uap->path;
4208 imgp->ip_user_argv = uap->argv;
4209 imgp->ip_user_envv = uap->envp;
4210 imgp->ip_vattr = vap;
4211 imgp->ip_origvattr = origvap;
4212 imgp->ip_vfs_context = &context;
4213 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE);
4214 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
4215 imgp->ip_mac_return = 0;
4216 imgp->ip_px_persona = NULL;
4217 imgp->ip_px_pcred_info = NULL;
4218 imgp->ip_cs_error = OS_REASON_NULL;
4219 imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
4220 imgp->ip_subsystem_root_path = NULL;
4221 imgp->ip_inherited_shared_region_id = NULL;
4222 imgp->ip_inherited_jop_pid = 0;
4223 uthread_set_exec_data(current_uthread(), imgp);
4224
4225 if (uap->adesc != USER_ADDR_NULL) {
4226 if (is_64) {
4227 error = copyin(uap->adesc, &px_args, sizeof(px_args));
4228 } else {
4229 struct user32__posix_spawn_args_desc px_args32;
4230
4231 error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
4232
4233 /*
4234 * Convert arguments descriptor from external 32 bit
4235 * representation to internal 64 bit representation
4236 */
4237 px_args.attr_size = px_args32.attr_size;
4238 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
4239 px_args.file_actions_size = px_args32.file_actions_size;
4240 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
4241 px_args.port_actions_size = px_args32.port_actions_size;
4242 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
4243 px_args.mac_extensions_size = px_args32.mac_extensions_size;
4244 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
4245 px_args.coal_info_size = px_args32.coal_info_size;
4246 px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
4247 px_args.persona_info_size = px_args32.persona_info_size;
4248 px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
4249 px_args.posix_cred_info_size = px_args32.posix_cred_info_size;
4250 px_args.posix_cred_info = CAST_USER_ADDR_T(px_args32.posix_cred_info);
4251 px_args.subsystem_root_path_size = px_args32.subsystem_root_path_size;
4252 px_args.subsystem_root_path = CAST_USER_ADDR_T(px_args32.subsystem_root_path);
4253 px_args.conclave_id_size = px_args32.conclave_id_size;
4254 px_args.conclave_id = CAST_USER_ADDR_T(px_args32.conclave_id);
4255 }
4256 if (error) {
4257 goto bad;
4258 }
4259
4260 if (px_args.attr_size != 0) {
4261 /*
4262 * We are not copying the port_actions pointer,
4263 * because we already have it from px_args.
4264 * This is a bit fragile: <rdar://problem/16427422>
4265 */
4266
4267 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset)) != 0) {
4268 goto bad;
4269 }
4270
4271 imgp->ip_px_sa = &px_sa;
4272 }
4273 if (px_args.file_actions_size != 0) {
4274 /* Limit file_actions to allowed number of open files */
4275 size_t maxfa_size = PSF_ACTIONS_SIZE(proc_limitgetcur_nofile(p));
4276
4277 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
4278 maxfa_size == 0 || px_args.file_actions_size > maxfa_size) {
4279 error = EINVAL;
4280 goto bad;
4281 }
4282
4283 px_sfap = kalloc_data(px_args.file_actions_size, Z_WAITOK);
4284 if (px_sfap == NULL) {
4285 error = ENOMEM;
4286 goto bad;
4287 }
4288 imgp->ip_px_sfa = px_sfap;
4289
4290 if ((error = copyin(px_args.file_actions, px_sfap,
4291 px_args.file_actions_size)) != 0) {
4292 goto bad;
4293 }
4294
4295 /* Verify that the action count matches the struct size */
4296 size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count);
4297 if (psfsize == 0 || psfsize != px_args.file_actions_size) {
4298 error = EINVAL;
4299 goto bad;
4300 }
4301 }
4302 if (px_args.port_actions_size != 0) {
4303 /* Limit port_actions to one page of data */
4304 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
4305 px_args.port_actions_size > PAGE_SIZE) {
4306 error = EINVAL;
4307 goto bad;
4308 }
4309
4310 px_spap = kalloc_data(px_args.port_actions_size, Z_WAITOK);
4311 if (px_spap == NULL) {
4312 error = ENOMEM;
4313 goto bad;
4314 }
4315 imgp->ip_px_spa = px_spap;
4316
4317 if ((error = copyin(px_args.port_actions, px_spap,
4318 px_args.port_actions_size)) != 0) {
4319 goto bad;
4320 }
4321
4322 /* Verify that the action count matches the struct size */
4323 size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count);
4324 if (pasize == 0 || pasize != px_args.port_actions_size) {
4325 error = EINVAL;
4326 goto bad;
4327 }
4328 }
4329 #if CONFIG_PERSONAS
4330 /* copy in the persona info */
4331 if (px_args.persona_info_size != 0 && px_args.persona_info != 0) {
4332 /* for now, we need the exact same struct in user space */
4333 if (px_args.persona_info_size != sizeof(*px_persona)) {
4334 error = ERANGE;
4335 goto bad;
4336 }
4337
4338 px_persona = kalloc_data(px_args.persona_info_size, Z_WAITOK);
4339 if (px_persona == NULL) {
4340 error = ENOMEM;
4341 goto bad;
4342 }
4343 imgp->ip_px_persona = px_persona;
4344
4345 if ((error = copyin(px_args.persona_info, px_persona,
4346 px_args.persona_info_size)) != 0) {
4347 goto bad;
4348 }
4349 if ((error = spawn_validate_persona(px_persona)) != 0) {
4350 goto bad;
4351 }
4352 }
4353 #endif
4354 /* copy in the posix cred info */
4355 if (px_args.posix_cred_info_size != 0 && px_args.posix_cred_info != 0) {
4356 /* for now, we need the exact same struct in user space */
4357 if (px_args.posix_cred_info_size != sizeof(*px_pcred_info)) {
4358 error = ERANGE;
4359 goto bad;
4360 }
4361
4362 if (!kauth_cred_issuser(kauth_cred_get())) {
4363 error = EPERM;
4364 goto bad;
4365 }
4366
4367 px_pcred_info = kalloc_data(px_args.posix_cred_info_size, Z_WAITOK);
4368 if (px_pcred_info == NULL) {
4369 error = ENOMEM;
4370 goto bad;
4371 }
4372 imgp->ip_px_pcred_info = px_pcred_info;
4373
4374 if ((error = copyin(px_args.posix_cred_info, px_pcred_info,
4375 px_args.posix_cred_info_size)) != 0) {
4376 goto bad;
4377 }
4378
4379 if (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_GROUPS) {
4380 if (px_pcred_info->pspci_ngroups > NGROUPS_MAX) {
4381 error = EINVAL;
4382 goto bad;
4383 }
4384 }
4385 }
4386 #if CONFIG_MACF
4387 if (px_args.mac_extensions_size != 0) {
4388 if ((error = spawn_copyin_macpolicyinfo(&px_args, (struct ip_px_smpx_s *)&imgp->ip_px_smpx)) != 0) {
4389 goto bad;
4390 }
4391 }
4392 #endif /* CONFIG_MACF */
4393 if ((px_args.subsystem_root_path_size > 0) && (px_args.subsystem_root_path_size <= MAXPATHLEN)) {
4394 /*
4395 * If a valid-looking subsystem root has been
4396 * specified...
4397 */
4398 if (IOTaskHasEntitlement(old_task, SPAWN_SUBSYSTEM_ROOT_ENTITLEMENT)) {
4399 /*
4400 * ...AND the parent has the entitlement, copy
4401 * the subsystem root path in.
4402 */
4403 subsystem_root_path = zalloc_flags(ZV_NAMEI,
4404 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4405
4406 if ((error = copyin(px_args.subsystem_root_path, subsystem_root_path, px_args.subsystem_root_path_size))) {
4407 goto bad;
4408 }
4409
4410 /* Paranoia */
4411 subsystem_root_path[px_args.subsystem_root_path_size - 1] = 0;
4412 }
4413 }
4414 #if CONFIG_EXCLAVES
4415
4416 /*
4417 * Calling exclaves_boot_wait() ensures that the conclave name
4418 * id will only be set when exclaves are actually
4419 * supported/enabled. In practice this will never actually block
4420 * as by the time this is called the system will have booted to
4421 * EXCLAVECORE if it's supported/enabled.
4422 */
4423 if ((px_args.conclave_id_size > 0) && (px_args.conclave_id_size <= MAXCONCLAVENAME) &&
4424 (exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE) == KERN_SUCCESS)) {
4425 if (px_args.conclave_id) {
4426 if (imgp->ip_px_sa != NULL && (px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) {
4427 /* Conclave id could be set only for true spawn */
4428 error = EINVAL;
4429 goto bad;
4430 }
4431 task_conclave_id = kalloc_data(MAXCONCLAVENAME,
4432 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4433 if ((error = copyin(px_args.conclave_id, task_conclave_id, MAXCONCLAVENAME))) {
4434 goto bad;
4435 }
4436 task_conclave_id[MAXCONCLAVENAME - 1] = 0;
4437 }
4438 }
4439 #endif
4440 }
4441
4442 if (IOTaskHasEntitlement(old_task, SPAWN_SET_PANIC_CRASH_BEHAVIOR)) {
4443 /* Truncate to uint8_t since we only support 2 flags for now */
4444 crash_behavior = (uint8_t)px_sa.psa_crash_behavior;
4445 crash_behavior_deadline = px_sa.psa_crash_behavior_deadline;
4446 }
4447
4448 /* set uthread to parent */
4449 uthread = current_uthread();
4450
4451 /*
4452 * <rdar://6640530>; this does not result in a behaviour change
4453 * relative to Leopard, so there should not be any existing code
4454 * which depends on it.
4455 */
4456
4457 if (imgp->ip_px_sa != NULL) {
4458 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
4459 if ((psa->psa_options & PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) == PSA_OPTION_PLUGIN_HOST_DISABLE_A_KEYS) {
4460 imgp->ip_flags |= IMGPF_PLUGIN_HOST_DISABLE_A_KEYS;
4461 }
4462
4463 #if (DEVELOPMENT || DEBUG)
4464 if ((psa->psa_options & PSA_OPTION_ALT_ROSETTA) == PSA_OPTION_ALT_ROSETTA) {
4465 imgp->ip_flags |= (IMGPF_ROSETTA | IMGPF_ALT_ROSETTA);
4466 }
4467 #if HAS_MTE_EMULATION_SHIMS
4468 /* If the task has inheritance enabled, carry the emulation setup. */
4469 if (task_has_sec(old_task) && task_has_sec_inherit(old_task)) {
4470 imgp->ip_flags |= (IMGPF_ROSETTA | IMGPF_ALT_ROSETTA);
4471 }
4472 #endif /* HAS_MTE_EMULATION_SHIMS */
4473 #endif /* (DEVELOPMENT || DEBUG) */
4474
4475
4476 if ((error = exec_validate_spawnattr_policy(psa->psa_apptype)) != 0) {
4477 goto bad;
4478 }
4479 }
4480
4481 /*
4482 * If we don't have the extension flag that turns "posix_spawn()"
4483 * into "execve() with options", then we will be creating a new
4484 * process which does not inherit memory from the parent process,
4485 * which is one of the most expensive things about using fork()
4486 * and execve().
4487 */
4488 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) {
4489 /* Set the new task's coalition, if it is requested. */
4490 coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
4491 #if CONFIG_COALITIONS
4492 int i, ncoals;
4493 kern_return_t kr = KERN_SUCCESS;
4494 struct _posix_spawn_coalition_info coal_info;
4495 int coal_role[COALITION_NUM_TYPES];
4496
4497 if (imgp->ip_px_sa == NULL || !px_args.coal_info) {
4498 goto do_fork1;
4499 }
4500
4501 memset(&coal_info, 0, sizeof(coal_info));
4502
4503 if (px_args.coal_info_size > sizeof(coal_info)) {
4504 px_args.coal_info_size = sizeof(coal_info);
4505 }
4506 error = copyin(px_args.coal_info,
4507 &coal_info, px_args.coal_info_size);
4508 if (error != 0) {
4509 goto bad;
4510 }
4511
4512 ncoals = 0;
4513 for (i = 0; i < COALITION_NUM_TYPES; i++) {
4514 uint64_t cid = coal_info.psci_info[i].psci_id;
4515 if (cid != 0) {
4516 /*
4517 * don't allow tasks which are not in a
4518 * privileged coalition to spawn processes
4519 * into coalitions other than their own
4520 */
4521 if (!task_is_in_privileged_coalition(proc_task(p), i) &&
4522 !IOTaskHasEntitlement(proc_task(p), COALITION_SPAWN_ENTITLEMENT)) {
4523 coal_dbg("ERROR: %d not in privilegd "
4524 "coalition of type %d",
4525 proc_getpid(p), i);
4526 spawn_coalitions_release_all(coal);
4527 error = EPERM;
4528 goto bad;
4529 }
4530
4531 coal_dbg("searching for coalition id:%llu", cid);
4532 /*
4533 * take a reference and activation on the
4534 * coalition to guard against free-while-spawn
4535 * races
4536 */
4537 coal[i] = coalition_find_and_activate_by_id(cid);
4538 if (coal[i] == COALITION_NULL) {
4539 coal_dbg("could not find coalition id:%llu "
4540 "(perhaps it has been terminated or reaped)", cid);
4541 /*
4542 * release any other coalition's we
4543 * may have a reference to
4544 */
4545 spawn_coalitions_release_all(coal);
4546 error = ESRCH;
4547 goto bad;
4548 }
4549 if (coalition_type(coal[i]) != i) {
4550 coal_dbg("coalition with id:%lld is not of type:%d"
4551 " (it's type:%d)", cid, i, coalition_type(coal[i]));
4552 spawn_coalitions_release_all(coal);
4553 error = ESRCH;
4554 goto bad;
4555 }
4556 coal_role[i] = coal_info.psci_info[i].psci_role;
4557 ncoals++;
4558 }
4559 }
4560 if (ncoals < COALITION_NUM_TYPES) {
4561 /*
4562 * If the user is attempting to spawn into a subset of
4563 * the known coalition types, then make sure they have
4564 * _at_least_ specified a resource coalition. If not,
4565 * the following fork1() call will implicitly force an
4566 * inheritance from 'p' and won't actually spawn the
4567 * new task into the coalitions the user specified.
4568 * (also the call to coalitions_set_roles will panic)
4569 */
4570 if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
4571 spawn_coalitions_release_all(coal);
4572 error = EINVAL;
4573 goto bad;
4574 }
4575 }
4576 do_fork1:
4577 #endif /* CONFIG_COALITIONS */
4578
4579 /*
4580 * note that this will implicitly inherit the
4581 * caller's persona (if it exists)
4582 */
4583 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
4584 /* returns a thread and task reference */
4585
4586 if (error == 0) {
4587 new_task = get_threadtask(imgp->ip_new_thread);
4588 }
4589 #if CONFIG_COALITIONS
4590 /* set the roles of this task within each given coalition */
4591 if (error == 0) {
4592 kr = coalitions_set_roles(coal, new_task, coal_role);
4593 if (kr != KERN_SUCCESS) {
4594 error = EINVAL;
4595 }
4596 if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION,
4597 MACH_COALITION_ADOPT))) {
4598 for (i = 0; i < COALITION_NUM_TYPES; i++) {
4599 if (coal[i] != COALITION_NULL) {
4600 /*
4601 * On 32-bit targets, uniqueid
4602 * will get truncated to 32 bits
4603 */
4604 KDBG_RELEASE(MACHDBG_CODE(
4605 DBG_MACH_COALITION,
4606 MACH_COALITION_ADOPT),
4607 coalition_id(coal[i]),
4608 get_task_uniqueid(new_task));
4609 }
4610 }
4611 }
4612 }
4613
4614 /* drop our references and activations - fork1() now holds them */
4615 spawn_coalitions_release_all(coal);
4616 #endif /* CONFIG_COALITIONS */
4617 if (error != 0) {
4618 goto bad;
4619 }
4620 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */
4621 spawn_no_exec = TRUE; /* used in later tests */
4622 } else {
4623 /* Adjust the user proc count */
4624 (void)chgproccnt(kauth_getruid(), 1);
4625 /*
4626 * For execve case, create a new proc, task and thread
4627 * but don't make the proc visible to userland. After
4628 * image activation, the new proc would take place of
4629 * the old proc in pid hash and other lists that make
4630 * the proc visible to the system.
4631 */
4632 imgp->ip_new_thread = cloneproc(old_task, NULL, p, CLONEPROC_EXEC);
4633
4634 /* task and thread ref returned by cloneproc */
4635 if (imgp->ip_new_thread == NULL) {
4636 (void)chgproccnt(kauth_getruid(), -1);
4637 error = ENOMEM;
4638 goto bad;
4639 }
4640
4641 new_task = get_threadtask(imgp->ip_new_thread);
4642 imgp->ip_flags |= IMGPF_EXEC;
4643 }
4644
4645 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
4646
4647 if (spawn_no_exec) {
4648 /*
4649 * We had to wait until this point before firing the
4650 * proc:::create probe, otherwise p would not point to the
4651 * child process.
4652 */
4653 DTRACE_PROC1(create, proc_t, p);
4654 }
4655 assert(p != NULL);
4656
4657 if (subsystem_root_path) {
4658 /* If a subsystem root was specified, swap it in */
4659 char * old_subsystem_root_path = p->p_subsystem_root_path;
4660 p->p_subsystem_root_path = subsystem_root_path;
4661 subsystem_root_path = old_subsystem_root_path;
4662 }
4663
4664 p->p_crash_behavior = crash_behavior;
4665 p->p_crash_behavior_deadline = crash_behavior_deadline;
4666
4667 p->p_crash_count = px_sa.psa_crash_count;
4668 p->p_throttle_timeout = px_sa.psa_throttle_timeout;
4669
4670 /* We'll need the subsystem root for setting up Apple strings */
4671 imgp->ip_subsystem_root_path = p->p_subsystem_root_path;
4672
4673 context.vc_thread = imgp->ip_new_thread;
4674 context.vc_ucred = proc_ucred_unsafe(p); /* in init */
4675
4676 /*
4677 * Post fdt_fork(), pre exec_handle_sugid() - this is where we want
4678 * to handle the file_actions.
4679 */
4680
4681 /* Has spawn file actions? */
4682 if (imgp->ip_px_sfa != NULL) {
4683 /*
4684 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
4685 * is handled in exec_handle_file_actions().
4686 */
4687 #if CONFIG_AUDIT
4688 /*
4689 * The file actions auditing can overwrite the upath of
4690 * AUE_POSIX_SPAWN audit record. Save the audit record.
4691 */
4692 struct kaudit_record *save_uu_ar = uthread->uu_ar;
4693 uthread->uu_ar = NULL;
4694 #endif
4695 error = exec_handle_file_actions(imgp,
4696 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0);
4697 #if CONFIG_AUDIT
4698 /* Restore the AUE_POSIX_SPAWN audit record. */
4699 uthread->uu_ar = save_uu_ar;
4700 #endif
4701 if (error != 0) {
4702 goto bad;
4703 }
4704 }
4705
4706 /* Has spawn port actions? */
4707 if (imgp->ip_px_spa != NULL) {
4708 #if CONFIG_AUDIT
4709 /*
4710 * Do the same for the port actions as we did for the file
4711 * actions. Save the AUE_POSIX_SPAWN audit record.
4712 */
4713 struct kaudit_record *save_uu_ar = uthread->uu_ar;
4714 uthread->uu_ar = NULL;
4715 #endif
4716 error = exec_handle_port_actions(imgp, &port_actions);
4717 #if CONFIG_AUDIT
4718 /* Restore the AUE_POSIX_SPAWN audit record. */
4719 uthread->uu_ar = save_uu_ar;
4720 #endif
4721 if (error != 0) {
4722 goto bad;
4723 }
4724 }
4725
4726 /* Has spawn attr? */
4727 if (imgp->ip_px_sa != NULL) {
4728 /*
4729 * Reset UID/GID to parent's RUID/RGID; This works only
4730 * because the operation occurs before the call
4731 * to exec_handle_sugid() by the image activator called
4732 * from exec_activate_image().
4733 *
4734 * POSIX requires that any setuid/setgid bits on the process
4735 * image will take precedence over the spawn attributes
4736 * (re)setting them.
4737 *
4738 * Modifications to p_ucred must be guarded using the
4739 * proc's ucred lock. This prevents others from accessing
4740 * a garbage credential.
4741 */
4742 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
4743 kauth_cred_proc_update(p, PROC_SETTOKEN_NONE,
4744 ^bool (kauth_cred_t parent __unused, kauth_cred_t model){
4745 return kauth_cred_model_setuidgid(model,
4746 kauth_cred_getruid(parent),
4747 kauth_cred_getrgid(parent));
4748 });
4749 }
4750
4751 if (imgp->ip_px_pcred_info) {
4752 if (!spawn_no_exec) {
4753 error = ENOTSUP;
4754 goto bad;
4755 }
4756
4757 error = spawn_posix_cred_adopt(p, imgp->ip_px_pcred_info);
4758 if (error != 0) {
4759 goto bad;
4760 }
4761 }
4762
4763 #if CONFIG_PERSONAS
4764 if (imgp->ip_px_persona != NULL) {
4765 if (!spawn_no_exec) {
4766 error = ENOTSUP;
4767 goto bad;
4768 }
4769
4770 /*
4771 * If we were asked to spawn a process into a new persona,
4772 * do the credential switch now (which may override the UID/GID
4773 * inherit done just above). It's important to do this switch
4774 * before image activation both for reasons stated above, and
4775 * to ensure that the new persona has access to the image/file
4776 * being executed.
4777 */
4778 error = spawn_persona_adopt(p, imgp->ip_px_persona);
4779 if (error != 0) {
4780 goto bad;
4781 }
4782 }
4783 #endif /* CONFIG_PERSONAS */
4784 #if !SECURE_KERNEL
4785 /*
4786 * Disable ASLR for the spawned process.
4787 *
4788 * But only do so if we are not embedded + RELEASE.
4789 * While embedded allows for a boot-arg (-disable_aslr)
4790 * to deal with this (which itself is only honored on
4791 * DEVELOPMENT or DEBUG builds of xnu), it is often
4792 * useful or necessary to disable ASLR on a per-process
4793 * basis for unit testing and debugging.
4794 */
4795 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) {
4796 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
4797 }
4798 #endif /* !SECURE_KERNEL */
4799
4800 /* Randomize high bits of ASLR slide */
4801 if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR) {
4802 imgp->ip_flags |= IMGPF_HIGH_BITS_ASLR;
4803 }
4804
4805 #if !SECURE_KERNEL
4806 /*
4807 * Forcibly disallow execution from data pages for the spawned process
4808 * even if it would otherwise be permitted by the architecture default.
4809 */
4810 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC) {
4811 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
4812 }
4813 #endif /* !SECURE_KERNEL */
4814
4815 #if __has_feature(ptrauth_calls)
4816 if (vm_shared_region_reslide_aslr && is_64 && (px_sa.psa_flags & _POSIX_SPAWN_RESLIDE)) {
4817 imgp->ip_flags |= IMGPF_RESLIDE;
4818 }
4819 #endif /* __has_feature(ptrauth_calls) */
4820
4821 if ((px_sa.psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) ==
4822 POSIX_SPAWN_PROC_TYPE_DRIVER) {
4823 imgp->ip_flags |= IMGPF_DRIVER;
4824 }
4825 }
4826
4827 /*
4828 * Disable ASLR during image activation. This occurs either if the
4829 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
4830 * P_DISABLE_ASLR was inherited from the parent process.
4831 */
4832 if (p->p_flag & P_DISABLE_ASLR) {
4833 imgp->ip_flags |= IMGPF_DISABLE_ASLR;
4834 }
4835
4836 /*
4837 * Clear transition flag so we won't hang if exec_activate_image() causes
4838 * an automount (and launchd does a proc sysctl to service it).
4839 *
4840 * <rdar://problem/6848672>, <rdar://problem/5959568>.
4841 */
4842 proc_transend(p, 0);
4843 proc_transit_set = 0;
4844
4845 if (!spawn_no_exec) {
4846 /*
4847 * Clear the signal lock in case of exec, since
4848 * image activation uses psignal on child process.
4849 */
4850 proc_signalend(p, 0);
4851 proc_signal_set = 0;
4852 }
4853
4854 #if MAC_SPAWN /* XXX */
4855 if (uap->mac_p != USER_ADDR_NULL) {
4856 error = mac_execve_enter(uap->mac_p, imgp);
4857 if (error) {
4858 goto bad;
4859 }
4860 }
4861 #endif
4862 /*
4863 * Activate the image.
4864 * Warning: If activation failed after point of no return, it returns error
4865 * as 0 and pretends the call succeeded.
4866 */
4867 error = exec_activate_image(imgp);
4868 #if defined(HAS_APPLE_PAC)
4869 const uint8_t disable_user_jop = imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE;
4870 ml_task_set_jop_pid_from_shared_region(new_task, disable_user_jop);
4871 ml_task_set_disable_user_jop(new_task, disable_user_jop);
4872 ml_thread_set_disable_user_jop(imgp->ip_new_thread, disable_user_jop);
4873 ml_thread_set_jop_pid(imgp->ip_new_thread, new_task);
4874 #endif
4875
4876 /*
4877 * If you've come here to add support for some new HW feature or some per-process or per-vmmap
4878 * or per-pmap flag that needs to be set before the process runs, or are in general lost, here
4879 * is some help. This summary was accurate as of Jul 2022. Use git log as needed. This comment
4880 * is here to prevent a recurrence of rdar://96307913
4881 *
4882 * In posix_spawn, following is what happens:
4883 * 1. Lots of prep and checking work
4884 * 2. Image activation via exec_activate_image(). The new task will get a new pmap here
4885 * 3. More prep work. (YOU ARE HERE)
4886 * 4. exec_resettextvp() is called
4887 * 5. At this point it is safe to check entitlements and code signatures
4888 * 6. task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
4889 * The new thread is allowed to run in kernel. It cannot yet get to userland
4890 * 7. More things done here. This is your chance to affect the task before it runs in
4891 * userspace
4892 * 8. task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_FINAL_WAIT);
4893 * The new thread is allowed to run in userland
4894 */
4895
4896 if (error == 0 && !spawn_no_exec) {
4897 p = proc_exec_switch_task(current_proc(), p, old_task, new_task, imgp, &inherit);
4898 /* proc ref returned */
4899 should_release_proc_ref = TRUE;
4900 }
4901
4902 if (error == 0) {
4903 /* process completed the exec, but may have failed after point of no return */
4904 exec_done = TRUE;
4905 }
4906
4907 #if CONFIG_EXCLAVES
4908 if (!error && task_conclave_id != NULL) {
4909 kern_return_t kr;
4910 kr = task_add_conclave(new_task, imgp->ip_vp, (int64_t)imgp->ip_arch_offset,
4911 task_conclave_id);
4912 if (kr != KERN_SUCCESS) {
4913 error = EINVAL;
4914 goto bad;
4915 }
4916 }
4917 #endif
4918
4919 if (!error && imgp->ip_px_sa != NULL) {
4920 thread_t child_thread = imgp->ip_new_thread;
4921 uthread_t child_uthread = get_bsdthread_info(child_thread);
4922
4923 /*
4924 * Because of POSIX_SPAWN_SETEXEC, we need to handle this after image
4925 * activation, else when image activation fails (before the point of no
4926 * return) would leave the parent process in a modified state.
4927 */
4928 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
4929 struct setpgid_args spga;
4930 spga.pid = proc_getpid(p);
4931 spga.pgid = px_sa.psa_pgroup;
4932 /*
4933 * Effectively, call setpgid() system call; works
4934 * because there are no pointer arguments.
4935 */
4936 if ((error = setpgid(p, &spga, ival)) != 0) {
4937 goto bad_px_sa;
4938 }
4939 }
4940
4941 if (px_sa.psa_flags & POSIX_SPAWN_SETSID) {
4942 error = setsid_internal(p);
4943 if (error != 0) {
4944 goto bad_px_sa;
4945 }
4946 }
4947
4948 /*
4949 * If we have a spawn attr, and it contains signal related flags,
4950 * the we need to process them in the "context" of the new child
4951 * process, so we have to process it following image activation,
4952 * prior to making the thread runnable in user space. This is
4953 * necessitated by some signal information being per-thread rather
4954 * than per-process, and we don't have the new allocation in hand
4955 * until after the image is activated.
4956 */
4957
4958 /*
4959 * Mask a list of signals, instead of them being unmasked, if
4960 * they were unmasked in the parent; note that some signals
4961 * are not maskable.
4962 */
4963 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) {
4964 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
4965 }
4966 /*
4967 * Default a list of signals instead of ignoring them, if
4968 * they were ignored in the parent. Note that we pass
4969 * spawn_no_exec to setsigvec() to indicate that we called
4970 * fork1() and therefore do not need to call proc_signalstart()
4971 * internally.
4972 */
4973 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
4974 vec.sa_handler = SIG_DFL;
4975 vec.sa_tramp = 0;
4976 vec.sa_mask = 0;
4977 vec.sa_flags = 0;
4978 for (sig = 1; sig < NSIG; sig++) {
4979 if (px_sa.psa_sigdefault & (1 << (sig - 1))) {
4980 error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
4981 }
4982 }
4983 }
4984
4985 /*
4986 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
4987 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
4988 * limit.
4989 *
4990 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
4991 */
4992 if ((px_sa.psa_cpumonitor_percent != 0) && (px_sa.psa_cpumonitor_percent < UINT8_MAX)) {
4993 /*
4994 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
4995 * an entitlement to configure the monitor a certain way seems silly, since
4996 * whomever is turning it on could just as easily choose not to do so.
4997 */
4998 error = proc_set_task_ruse_cpu(proc_task(p),
4999 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
5000 (uint8_t)px_sa.psa_cpumonitor_percent,
5001 px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
5002 0, TRUE);
5003 }
5004
5005
5006 if (px_pcred_info &&
5007 (px_pcred_info->pspci_flags & POSIX_SPAWN_POSIX_CRED_LOGIN)) {
5008 /*
5009 * setlogin() must happen after setsid()
5010 */
5011 setlogin_internal(p, px_pcred_info->pspci_login);
5012 }
5013
5014 bad_px_sa:
5015 if (error != 0) {
5016 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
5017 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_PSATTR, 0, 0);
5018 exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_PSATTR);
5019 }
5020 }
5021
5022 bad:
5023
5024 if (error == 0) {
5025 /* reset delay idle sleep status if set */
5026 #if CONFIG_DELAY_IDLE_SLEEP
5027 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) {
5028 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
5029 }
5030 #endif /* CONFIG_DELAY_IDLE_SLEEP */
5031 /* upon successful spawn, re/set the proc control state */
5032 if (imgp->ip_px_sa != NULL) {
5033 switch (px_sa.psa_pcontrol) {
5034 case POSIX_SPAWN_PCONTROL_THROTTLE:
5035 p->p_pcaction = P_PCTHROTTLE;
5036 break;
5037 case POSIX_SPAWN_PCONTROL_SUSPEND:
5038 p->p_pcaction = P_PCSUSP;
5039 break;
5040 case POSIX_SPAWN_PCONTROL_KILL:
5041 p->p_pcaction = P_PCKILL;
5042 break;
5043 case POSIX_SPAWN_PCONTROL_NONE:
5044 default:
5045 p->p_pcaction = 0;
5046 break;
5047 }
5048 ;
5049 }
5050 exec_resettextvp(p, imgp);
5051
5052 vm_map_setup(get_task_map(new_task), new_task);
5053
5054 exec_setup_platform_restrictions(new_task);
5055
5056 /*
5057 * Set starting EXC_GUARD behavior for task now that platform
5058 * and platform restrictions bits are set.
5059 */
5060 task_set_exc_guard_default(new_task,
5061 proc_best_name(p),
5062 strlen(proc_best_name(p)),
5063 proc_is_simulated(p),
5064 proc_platform(p),
5065 proc_sdk(p));
5066
5067 /*
5068 * Between proc_exec_switch_task and ipc_task_enable, there is a
5069 * window where proc_find will return the new proc, but task_for_pid
5070 * and similar functions will return an error as the task ipc is not
5071 * enabled yet. Configure the task control port during this window
5072 * before other process have access to this task port.
5073 *
5074 * Must enable after resettextvp so that task port policies are not evaluated
5075 * until the csblob in the textvp is accurately reflected.
5076 */
5077 task_set_ctrl_port_default(new_task, imgp->ip_new_thread);
5078
5079 /*
5080 * Enable new task IPC access if exec_activate_image() returned an
5081 * active task. (Checks active bit in ipc_task_enable() under lock).
5082 * Similarly, this must happen after resettextvp.
5083 */
5084 ipc_task_enable(new_task);
5085
5086 /* Set task exception ports now that we can check entitlements */
5087 if (imgp->ip_px_spa != NULL) {
5088 error = exec_handle_exception_port_actions(imgp, &port_actions);
5089 }
5090
5091 #if CONFIG_MEMORYSTATUS
5092 /* Set jetsam priority for DriverKit processes */
5093 if (px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DRIVER) {
5094 px_sa.psa_priority = JETSAM_PRIORITY_DRIVER_APPLE;
5095 }
5096
5097 /* Has jetsam attributes? */
5098 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
5099 int32_t memlimit_active = px_sa.psa_memlimit_active;
5100 int32_t memlimit_inactive = px_sa.psa_memlimit_inactive;
5101
5102 memstat_priority_options_t priority_options = MEMSTAT_PRIORITY_OPTIONS_NONE;
5103 if ((px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY)) {
5104 priority_options |= MEMSTAT_PRIORITY_IS_EFFECTIVE;
5105 }
5106 memorystatus_set_priority(p, px_sa.psa_priority, 0,
5107 priority_options);
5108
5109 memlimit_options_t memlimit_options = MEMLIMIT_OPTIONS_NONE;
5110 if ((px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL)) {
5111 memlimit_options |= MEMLIMIT_ACTIVE_FATAL;
5112 }
5113 if ((px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL)) {
5114 memlimit_options |= MEMLIMIT_INACTIVE_FATAL;
5115 }
5116 if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
5117 /*
5118 * With 2-level high-water-mark support,
5119 * POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no longer relevant,
5120 * as background limits are described via the inactive limit
5121 * slots. However, if the
5122 * POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in, we
5123 * attempt to mimic previous behavior by forcing the BG limit
5124 * data into the inactive/non-fatal mode and force the active
5125 * slots to hold system_wide/fatal mode.
5126 */
5127 memlimit_options |= MEMLIMIT_ACTIVE_FATAL;
5128 memlimit_options &= ~MEMLIMIT_INACTIVE_FATAL;
5129 memlimit_active = -1;
5130 }
5131 memorystatus_set_memlimits(p, memlimit_active, memlimit_inactive,
5132 memlimit_options);
5133 }
5134
5135 /* Has jetsam relaunch behavior? */
5136 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK)) {
5137 /*
5138 * Launchd has passed in data indicating the behavior of this process in response to jetsam.
5139 * This data would be used by the jetsam subsystem to determine the position and protection
5140 * offered to this process on dirty -> clean transitions.
5141 */
5142 int relaunch_flags = P_MEMSTAT_RELAUNCH_UNKNOWN;
5143 switch (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MASK) {
5144 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_LOW:
5145 relaunch_flags = P_MEMSTAT_RELAUNCH_LOW;
5146 break;
5147 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_MED:
5148 relaunch_flags = P_MEMSTAT_RELAUNCH_MED;
5149 break;
5150 case POSIX_SPAWN_JETSAM_RELAUNCH_BEHAVIOR_HIGH:
5151 relaunch_flags = P_MEMSTAT_RELAUNCH_HIGH;
5152 break;
5153 default:
5154 break;
5155 }
5156 memorystatus_relaunch_flags_update(p, relaunch_flags);
5157 }
5158
5159 #endif /* CONFIG_MEMORYSTATUS */
5160 if (imgp->ip_px_sa != NULL && px_sa.psa_thread_limit > 0) {
5161 task_set_thread_limit(new_task, (uint16_t)px_sa.psa_thread_limit);
5162 }
5163 if (imgp->ip_px_sa != NULL && px_sa.psa_conclave_mem_limit > 0) {
5164 task_set_conclave_mem_limit(new_task, px_sa.psa_conclave_mem_limit);
5165 }
5166
5167 #if CONFIG_PROC_RESOURCE_LIMITS
5168 if (imgp->ip_px_sa != NULL && (px_sa.psa_port_soft_limit > 0 || px_sa.psa_port_hard_limit > 0)) {
5169 task_set_port_space_limits(new_task, (uint32_t)px_sa.psa_port_soft_limit,
5170 (uint32_t)px_sa.psa_port_hard_limit);
5171 }
5172
5173 if (imgp->ip_px_sa != NULL && (px_sa.psa_filedesc_soft_limit > 0 || px_sa.psa_filedesc_hard_limit > 0)) {
5174 proc_set_filedesc_limits(p, (int)px_sa.psa_filedesc_soft_limit,
5175 (int)px_sa.psa_filedesc_hard_limit);
5176 }
5177 if (imgp->ip_px_sa != NULL && (px_sa.psa_kqworkloop_soft_limit > 0 || px_sa.psa_kqworkloop_hard_limit > 0)) {
5178 proc_set_kqworkloop_limits(p, (int)px_sa.psa_kqworkloop_soft_limit,
5179 (int)px_sa.psa_kqworkloop_hard_limit);
5180 }
5181 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
5182
5183 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_REALTIME_AUDIO)) {
5184 task_set_jetsam_realtime_audio(new_task, TRUE);
5185 }
5186 }
5187
5188
5189 /*
5190 * If we successfully called fork1() or cloneproc, we always need
5191 * to do this. This is because we come back from that call with
5192 * signals blocked in the child, and we have to unblock them, for exec
5193 * case they are unblocked before activation, but for true spawn case
5194 * we want to wait until after we've performed any spawn actions.
5195 * This has to happen before process_signature(), which uses psignal.
5196 */
5197 if (proc_transit_set) {
5198 proc_transend(p, 0);
5199 }
5200
5201 /*
5202 * Drop the signal lock on the child which was taken on our
5203 * behalf by forkproc()/cloneproc() to prevent signals being
5204 * received by the child in a partially constructed state.
5205 */
5206 if (proc_signal_set) {
5207 proc_signalend(p, 0);
5208 }
5209
5210 if (error == 0) {
5211 /*
5212 * We need to initialize the bank context behind the protection of
5213 * the proc_trans lock to prevent a race with exit. We can't do this during
5214 * exec_activate_image because task_bank_init checks entitlements that
5215 * aren't loaded until subsequent calls (including exec_resettextvp).
5216 */
5217 error = proc_transstart(p, 0, 0);
5218
5219 if (error == 0) {
5220 task_bank_init(new_task);
5221 proc_transend(p, 0);
5222 }
5223
5224 #if __arm64__
5225 proc_footprint_entitlement_hacks(p, new_task);
5226 #endif /* __arm64__ */
5227
5228 #if XNU_TARGET_OS_OSX
5229 #define SINGLE_JIT_ENTITLEMENT "com.apple.security.cs.single-jit"
5230 if (IOTaskHasEntitlement(new_task, SINGLE_JIT_ENTITLEMENT)) {
5231 vm_map_single_jit(get_task_map(new_task));
5232 }
5233 #endif /* XNU_TARGET_OS_OSX */
5234
5235 #if __has_feature(ptrauth_calls)
5236 task_set_pac_exception_fatal_flag(new_task);
5237 #endif /* __has_feature(ptrauth_calls) */
5238 task_set_jit_flags(new_task);
5239 }
5240
5241 /* Inherit task role from old task to new task for exec */
5242 if (error == 0 && !spawn_no_exec) {
5243 proc_inherit_task_role(new_task, old_task);
5244 }
5245
5246 #if CONFIG_ARCADE
5247 if (error == 0) {
5248 /*
5249 * Check to see if we need to trigger an arcade upcall AST now
5250 * that the vnode has been reset on the task.
5251 */
5252 arcade_prepare(new_task, imgp->ip_new_thread);
5253 }
5254 #endif /* CONFIG_ARCADE */
5255
5256 if (error == 0) {
5257 proc_apply_jit_and_vm_policies(imgp, p, new_task);
5258 }
5259
5260 /* Clear the initial wait on the thread before handling spawn policy */
5261 if (imgp && imgp->ip_new_thread) {
5262 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
5263 }
5264
5265 /*
5266 * Apply the spawnattr policy, apptype (which primes the task for importance donation),
5267 * and bind any portwatch ports to the new task.
5268 * This must be done after the exec so that the child's thread is ready,
5269 * and after the in transit state has been released, because priority is
5270 * dropped here so we need to be prepared for a potentially long preemption interval
5271 *
5272 * TODO: Consider splitting this up into separate phases
5273 */
5274 if (error == 0 && imgp->ip_px_sa != NULL) {
5275 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
5276
5277 error = exec_handle_spawnattr_policy(p, imgp->ip_new_thread, psa->psa_apptype, psa->psa_qos_clamp,
5278 psa->psa_darwin_role, &port_actions);
5279 }
5280
5281 /* Transfer the turnstile watchport boost to new task if in exec */
5282 if (error == 0 && !spawn_no_exec) {
5283 task_transfer_turnstile_watchports(old_task, new_task, imgp->ip_new_thread);
5284 }
5285
5286 if (error == 0 && imgp->ip_px_sa != NULL) {
5287 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
5288
5289 if (psa->psa_no_smt) {
5290 task_set_no_smt(new_task);
5291 }
5292 if (psa->psa_tecs) {
5293 task_set_tecs(new_task);
5294 }
5295 }
5296
5297 struct _iopol_param_t iop_param = {
5298 .iop_scope = IOPOL_SCOPE_PROCESS,
5299 .iop_iotype = IOPOL_TYPE_VFS_MATERIALIZE_DATALESS_FILES,
5300 };
5301
5302 if (error == 0) {
5303 if (imgp->ip_px_sa != NULL) {
5304 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
5305
5306 if (psa->psa_options & PSA_OPTION_DATALESS_IOPOLICY) {
5307 iop_param.iop_policy = psa->psa_dataless_iopolicy;
5308 }
5309 } else {
5310 error = iopolicysys_vfs_materialize_dataless_files(p, IOPOL_CMD_GET, iop_param.iop_scope,
5311 iop_param.iop_policy, &iop_param);
5312 }
5313 }
5314
5315 if (error == 0 && iop_param.iop_policy != 0) {
5316 error = iopolicysys_vfs_materialize_dataless_files(p, IOPOL_CMD_SET, iop_param.iop_scope,
5317 (iop_param.iop_policy | IOPOL_MATERIALIZE_DATALESS_FILES_ORIG), &iop_param);
5318 }
5319
5320 if (error == 0) {
5321 /* Apply the main thread qos */
5322 thread_t main_thread = imgp->ip_new_thread;
5323 task_set_main_thread_qos(new_task, main_thread);
5324 }
5325
5326 /*
5327 * Release any ports we kept around for binding to the new task
5328 * We need to release the rights even if the posix_spawn has failed.
5329 */
5330 if (imgp->ip_px_spa != NULL) {
5331 exec_port_actions_destroy(&port_actions);
5332 }
5333
5334 /*
5335 * We have to delay operations which might throw a signal until after
5336 * the signals have been unblocked; however, we want that to happen
5337 * after exec_resettextvp() so that the textvp is correct when they
5338 * fire.
5339 */
5340 if (error == 0) {
5341 error = process_signature(p, imgp);
5342
5343 /*
5344 * Pay for our earlier safety; deliver the delayed signals from
5345 * the incomplete spawn process now that it's complete.
5346 */
5347 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
5348 psignal_vfork(p, proc_task(p), imgp->ip_new_thread, SIGTRAP);
5349 }
5350
5351 if (error == 0 && !spawn_no_exec) {
5352 extern uint64_t kdp_task_exec_meta_flags(task_t task);
5353 KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
5354 proc_getpid(p), kdp_task_exec_meta_flags(proc_task(p)));
5355 }
5356 }
5357
5358 if (spawn_no_exec) {
5359 /* flag the 'fork' has occurred */
5360 proc_knote(p->p_pptr, NOTE_FORK | proc_getpid(p));
5361 }
5362
5363 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
5364 if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
5365 proc_knote(p, NOTE_EXEC);
5366 }
5367
5368 if (imgp != NULL) {
5369 uthread_set_exec_data(current_uthread(), NULL);
5370 if (imgp->ip_vp) {
5371 vnode_put(imgp->ip_vp);
5372 }
5373 if (imgp->ip_scriptvp) {
5374 vnode_put(imgp->ip_scriptvp);
5375 }
5376 if (imgp->ip_strings) {
5377 execargs_free(imgp);
5378 }
5379 if (imgp->ip_free_map) {
5380 /* Free the map after dropping iocount on vnode to avoid deadlock */
5381 vm_map_deallocate(imgp->ip_free_map);
5382 }
5383 kfree_data(imgp->ip_px_sfa,
5384 px_args.file_actions_size);
5385 kfree_data(imgp->ip_px_spa,
5386 px_args.port_actions_size);
5387 #if CONFIG_PERSONAS
5388 kfree_data(imgp->ip_px_persona,
5389 px_args.persona_info_size);
5390 #endif
5391 kfree_data(imgp->ip_px_pcred_info,
5392 px_args.posix_cred_info_size);
5393
5394 if (subsystem_root_path != NULL) {
5395 zfree(ZV_NAMEI, subsystem_root_path);
5396 }
5397 #if CONFIG_MACF
5398 struct ip_px_smpx_s *px_s = &imgp->ip_px_smpx;
5399 kfree_data(px_s->array, px_args.mac_extensions_size);
5400 kfree_data(px_s->data, (vm_size_t)px_s->datalen);
5401
5402 if (imgp->ip_execlabelp) {
5403 mac_cred_label_free(imgp->ip_execlabelp);
5404 imgp->ip_execlabelp = NULL;
5405 }
5406 if (imgp->ip_scriptlabelp) {
5407 mac_vnode_label_free(imgp->ip_scriptlabelp);
5408 imgp->ip_scriptlabelp = NULL;
5409 }
5410 if (imgp->ip_cs_error != OS_REASON_NULL) {
5411 os_reason_free(imgp->ip_cs_error);
5412 imgp->ip_cs_error = OS_REASON_NULL;
5413 }
5414 if (imgp->ip_inherited_shared_region_id != NULL) {
5415 kfree_data(imgp->ip_inherited_shared_region_id,
5416 strlen(imgp->ip_inherited_shared_region_id) + 1);
5417 imgp->ip_inherited_shared_region_id = NULL;
5418 }
5419 #endif
5420 }
5421
5422 #if CONFIG_DTRACE
5423 if (spawn_no_exec) {
5424 /*
5425 * In the original DTrace reference implementation,
5426 * posix_spawn() was a libc routine that just
5427 * did vfork(2) then exec(2). Thus the proc::: probes
5428 * are very fork/exec oriented. The details of this
5429 * in-kernel implementation of posix_spawn() is different
5430 * (while producing the same process-observable effects)
5431 * particularly w.r.t. errors, and which thread/process
5432 * is constructing what on behalf of whom.
5433 */
5434 if (error) {
5435 DTRACE_PROC1(spawn__failure, int, error);
5436 } else {
5437 DTRACE_PROC(spawn__success);
5438 /*
5439 * Some DTrace scripts, e.g. newproc.d in
5440 * /usr/bin, rely on the the 'exec-success'
5441 * probe being fired in the child after the
5442 * new process image has been constructed
5443 * in order to determine the associated pid.
5444 *
5445 * So, even though the parent built the image
5446 * here, for compatibility, mark the new thread
5447 * so 'exec-success' fires on it as it leaves
5448 * the kernel.
5449 */
5450 dtrace_thread_didexec(imgp->ip_new_thread);
5451 }
5452 } else {
5453 if (error) {
5454 DTRACE_PROC1(exec__failure, int, error);
5455 } else {
5456 dtrace_thread_didexec(imgp->ip_new_thread);
5457 }
5458 }
5459
5460 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
5461 (*dtrace_proc_waitfor_hook)(p);
5462 }
5463 #endif
5464
5465 #if CONFIG_AUDIT
5466 if (!error && AUDIT_ENABLED() && p) {
5467 /* Add the CDHash of the new process to the audit record */
5468 uint8_t *cdhash = cs_get_cdhash(p);
5469 if (cdhash) {
5470 AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
5471 }
5472 }
5473 #endif
5474
5475 /* terminate the new task if exec failed */
5476 if (new_task != NULL && task_is_exec_copy(new_task)) {
5477 task_terminate_internal(new_task);
5478 }
5479
5480 if (exec_failure_reason && !spawn_no_exec) {
5481 psignal_with_reason(p, SIGKILL, exec_failure_reason);
5482 exec_failure_reason = NULL;
5483 }
5484
5485 /* Return to both the parent and the child? */
5486 if (imgp != NULL && spawn_no_exec) {
5487 /*
5488 * If the parent wants the pid, copy it out
5489 */
5490 if (error == 0 && pid != USER_ADDR_NULL) {
5491 _Static_assert(sizeof(pid_t) == 4, "posix_spawn() assumes a 32-bit pid_t");
5492 bool aligned = (pid & 3) == 0;
5493 if (aligned) {
5494 (void)copyout_atomic32(proc_getpid(p), pid);
5495 } else {
5496 (void)suword(pid, proc_getpid(p));
5497 }
5498 }
5499 retval[0] = error;
5500
5501 /*
5502 * If we had an error, perform an internal reap ; this is
5503 * entirely safe, as we have a real process backing us.
5504 */
5505 if (error) {
5506 proc_list_lock();
5507 p->p_listflag |= P_LIST_DEADPARENT;
5508 proc_list_unlock();
5509 proc_lock(p);
5510 /* make sure no one else has killed it off... */
5511 if (p->p_stat != SZOMB && p->exit_thread == NULL) {
5512 p->exit_thread = current_thread();
5513 p->p_posix_spawn_failed = true;
5514 proc_unlock(p);
5515 exit1(p, 1, (int *)NULL);
5516 } else {
5517 /* someone is doing it for us; just skip it */
5518 proc_unlock(p);
5519 }
5520 }
5521 }
5522
5523 /*
5524 * Do not terminate the current task, if proc_exec_switch_task did not
5525 * switch the tasks, terminating the current task without the switch would
5526 * result in loosing the SIGKILL status.
5527 */
5528 if (task_did_exec(old_task)) {
5529 /* Terminate the current task, since exec will start in new task */
5530 task_terminate_internal(old_task);
5531 }
5532
5533 /* Release the thread ref returned by cloneproc/fork1 */
5534 if (imgp != NULL && imgp->ip_new_thread) {
5535 /* clear the exec complete flag if there is an error before point of no-return */
5536 uint32_t clearwait_flags = TCRW_CLEAR_FINAL_WAIT;
5537 if (!spawn_no_exec && !exec_done && error != 0) {
5538 clearwait_flags |= TCRW_CLEAR_EXEC_COMPLETE;
5539 }
5540 /* wake up the new thread */
5541 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), clearwait_flags);
5542 thread_deallocate(imgp->ip_new_thread);
5543 imgp->ip_new_thread = NULL;
5544 }
5545
5546 /* Release the ref returned by cloneproc/fork1 */
5547 if (new_task) {
5548 task_deallocate(new_task);
5549 new_task = NULL;
5550 }
5551
5552 if (should_release_proc_ref) {
5553 proc_rele(p);
5554 }
5555
5556 kfree_type(typeof(*__spawn_data), __spawn_data);
5557
5558 if (inherit != NULL) {
5559 ipc_importance_release(inherit);
5560 }
5561
5562 #if CONFIG_EXCLAVES
5563 if (task_conclave_id != NULL) {
5564 kfree_data(task_conclave_id, MAXCONCLAVENAME);
5565 }
5566 #endif
5567
5568 assert(spawn_no_exec || exec_failure_reason == NULL);
5569 return error;
5570 }
5571
5572 /*
5573 * proc_exec_switch_task
5574 *
5575 * Parameters: old_proc proc before exec
5576 * new_proc proc after exec
5577 * old_task task before exec
5578 * new_task task after exec
5579 * imgp image params
5580 * inherit resulting importance linkage
5581 *
5582 * Returns: proc.
5583 *
5584 * Note: The function will switch proc in pid hash from old proc to new proc.
5585 * The switch needs to happen after draining all proc refs and inside
5586 * a proc list lock. In the case of failure to switch the proc, which
5587 * might happen if the process received a SIGKILL or jetsam killed it,
5588 * it will make sure that the new tasks terminates. User proc ref returned
5589 * to caller.
5590 *
5591 * This function is called after point of no return, in the case
5592 * failure to switch, it will terminate the new task and swallow the
5593 * error and let the terminated process complete exec and die.
5594 */
5595 proc_t
proc_exec_switch_task(proc_t old_proc,proc_t new_proc,task_t old_task,task_t new_task,struct image_params * imgp,void ** inherit)5596 proc_exec_switch_task(proc_t old_proc, proc_t new_proc, task_t old_task, task_t new_task, struct image_params *imgp, void **inherit)
5597 {
5598 boolean_t task_active;
5599 boolean_t proc_active;
5600 boolean_t thread_active;
5601 boolean_t reparent_traced_child = FALSE;
5602 thread_t old_thread = current_thread();
5603 thread_t new_thread = imgp->ip_new_thread;
5604
5605 thread_set_exec_promotion(old_thread);
5606 old_proc = proc_refdrain_will_exec(old_proc);
5607
5608 new_proc = proc_refdrain_will_exec(new_proc);
5609 /* extra proc ref returned to the caller */
5610
5611 assert(get_threadtask(new_thread) == new_task);
5612 task_active = task_is_active(new_task);
5613 proc_active = !(old_proc->p_lflag & P_LEXIT);
5614
5615 /* Check if the current thread is not aborted due to SIGKILL */
5616 thread_active = thread_is_active(old_thread);
5617
5618 /*
5619 * Do not switch the proc if the new task or proc is already terminated
5620 * as a result of error in exec past point of no return
5621 */
5622 if (proc_active && task_active && thread_active) {
5623 uthread_t new_uthread = get_bsdthread_info(new_thread);
5624 uthread_t old_uthread = current_uthread();
5625
5626 /* Clear dispatchqueue and workloop ast offset */
5627 new_proc->p_dispatchqueue_offset = 0;
5628 new_proc->p_dispatchqueue_serialno_offset = 0;
5629 new_proc->p_dispatchqueue_label_offset = 0;
5630 new_proc->p_return_to_kernel_offset = 0;
5631 new_proc->p_pthread_wq_quantum_offset = 0;
5632
5633 /* If old_proc is session leader, change the leader to new proc */
5634 session_replace_leader(old_proc, new_proc);
5635
5636 proc_lock(old_proc);
5637
5638 /* Copy the signal state, dtrace state and set bsd ast on new thread */
5639 act_set_astbsd(new_thread);
5640 new_uthread->uu_siglist |= old_uthread->uu_siglist;
5641 new_uthread->uu_siglist |= old_proc->p_siglist;
5642 new_uthread->uu_sigwait = old_uthread->uu_sigwait;
5643 new_uthread->uu_sigmask = old_uthread->uu_sigmask;
5644 new_uthread->uu_oldmask = old_uthread->uu_oldmask;
5645 new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
5646 #if CONFIG_DTRACE
5647 new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
5648 new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
5649 new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
5650 assert(new_uthread->t_dtrace_scratch == NULL);
5651 new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
5652
5653 old_uthread->t_dtrace_sig = 0;
5654 old_uthread->t_dtrace_stop = 0;
5655 old_uthread->t_dtrace_resumepid = 0;
5656 old_uthread->t_dtrace_scratch = NULL;
5657 #endif
5658
5659 #if CONFIG_PROC_UDATA_STORAGE
5660 new_proc->p_user_data = old_proc->p_user_data;
5661 #endif /* CONFIG_PROC_UDATA_STORAGE */
5662
5663 /* Copy the resource accounting info */
5664 thread_copy_resource_info(new_thread, current_thread());
5665
5666 /* Clear the exit reason and signal state on old thread */
5667 old_uthread->uu_exit_reason = NULL;
5668 old_uthread->uu_siglist = 0;
5669
5670 task_set_did_exec_flag(old_task);
5671 task_clear_exec_copy_flag(new_task);
5672
5673 task_copy_fields_for_exec(new_task, old_task);
5674
5675 /*
5676 * Need to transfer pending watch port boosts to the new task
5677 * while still making sure that the old task remains in the
5678 * importance linkage. Create an importance linkage from old task
5679 * to new task, then switch the task importance base of old task
5680 * and new task. After the switch the port watch boost will be
5681 * boosting the new task and new task will be donating importance
5682 * to old task.
5683 */
5684 *inherit = ipc_importance_exec_switch_task(old_task, new_task);
5685
5686 /* Transfer parent's ptrace state to child */
5687 new_proc->p_lflag &= ~(P_LTRACED | P_LSIGEXC | P_LNOATTACH);
5688 new_proc->p_lflag |= (old_proc->p_lflag & (P_LTRACED | P_LSIGEXC | P_LNOATTACH));
5689 new_proc->p_oppid = old_proc->p_oppid;
5690
5691 if (old_proc->p_pptr != new_proc->p_pptr) {
5692 reparent_traced_child = TRUE;
5693 new_proc->p_lflag |= P_LTRACE_WAIT;
5694 }
5695
5696 proc_unlock(old_proc);
5697
5698 /* Update the list of proc knotes */
5699 proc_transfer_knotes(old_proc, new_proc);
5700
5701 /* Update the proc interval timers */
5702 proc_inherit_itimers(old_proc, new_proc);
5703
5704 proc_list_lock();
5705
5706 /* Insert the new proc in child list of parent proc */
5707 p_reparentallchildren(old_proc, new_proc);
5708
5709 /* Switch proc in pid hash */
5710 phash_replace_locked(old_proc, new_proc);
5711
5712 /* Transfer the shadow flag to old proc */
5713 os_atomic_andnot(&new_proc->p_refcount, P_REF_SHADOW, relaxed);
5714 os_atomic_or(&old_proc->p_refcount, P_REF_SHADOW, relaxed);
5715
5716 /* Change init proc if launchd exec */
5717 if (old_proc == initproc) {
5718 /* Take the ref on new proc after proc_refwake_did_exec */
5719 initproc = new_proc;
5720 /* Drop the proc ref on old proc */
5721 proc_rele(old_proc);
5722 }
5723
5724 proc_list_unlock();
5725 #if CONFIG_EXCLAVES
5726 if (task_inherit_conclave(old_task, new_task, imgp->ip_vp,
5727 (int64_t)imgp->ip_arch_offset) != KERN_SUCCESS) {
5728 task_terminate_internal(new_task);
5729 }
5730 #endif
5731 } else {
5732 task_terminate_internal(new_task);
5733 }
5734
5735 proc_refwake_did_exec(new_proc);
5736 proc_refwake_did_exec(old_proc);
5737
5738 /* Take a ref on initproc if it changed */
5739 if (new_proc == initproc) {
5740 initproc = proc_ref(new_proc, false);
5741 assert(initproc != PROC_NULL);
5742 }
5743
5744 thread_clear_exec_promotion(old_thread);
5745 proc_rele(old_proc);
5746
5747 if (reparent_traced_child) {
5748 proc_t pp = proc_parent(old_proc);
5749 assert(pp != PROC_NULL);
5750
5751 proc_reparentlocked(new_proc, pp, 1, 0);
5752 proc_rele(pp);
5753
5754 proc_lock(new_proc);
5755 new_proc->p_lflag &= ~P_LTRACE_WAIT;
5756 proc_unlock(new_proc);
5757 }
5758
5759 return new_proc;
5760 }
5761
5762 /*
5763 * execve
5764 *
5765 * Parameters: uap->fname File name to exec
5766 * uap->argp Argument list
5767 * uap->envp Environment list
5768 *
5769 * Returns: 0 Success
5770 * __mac_execve:EINVAL Invalid argument
5771 * __mac_execve:ENOTSUP Invalid argument
5772 * __mac_execve:EACCES Permission denied
5773 * __mac_execve:EINTR Interrupted function
5774 * __mac_execve:ENOMEM Not enough space
5775 * __mac_execve:EFAULT Bad address
5776 * __mac_execve:ENAMETOOLONG Filename too long
5777 * __mac_execve:ENOEXEC Executable file format error
5778 * __mac_execve:ETXTBSY Text file busy [misuse of error code]
5779 * __mac_execve:???
5780 *
5781 * TODO: Dynamic linker header address on stack is copied via suword()
5782 */
5783 /* ARGSUSED */
5784 int
execve(proc_t p,struct execve_args * uap,int32_t * retval)5785 execve(proc_t p, struct execve_args *uap, int32_t *retval)
5786 {
5787 struct __mac_execve_args muap;
5788 int err;
5789
5790 memoryshot(DBG_VM_EXECVE, DBG_FUNC_NONE);
5791
5792 muap.fname = uap->fname;
5793 muap.argp = uap->argp;
5794 muap.envp = uap->envp;
5795 muap.mac_p = USER_ADDR_NULL;
5796 err = __mac_execve(p, &muap, retval);
5797
5798 return err;
5799 }
5800
5801 /*
5802 * __mac_execve
5803 *
5804 * Parameters: uap->fname File name to exec
5805 * uap->argp Argument list
5806 * uap->envp Environment list
5807 * uap->mac_p MAC label supplied by caller
5808 *
5809 * Returns: 0 Success
5810 * EINVAL Invalid argument
5811 * ENOTSUP Not supported
5812 * ENOEXEC Executable file format error
5813 * exec_activate_image:EINVAL Invalid argument
5814 * exec_activate_image:EACCES Permission denied
5815 * exec_activate_image:EINTR Interrupted function
5816 * exec_activate_image:ENOMEM Not enough space
5817 * exec_activate_image:EFAULT Bad address
5818 * exec_activate_image:ENAMETOOLONG Filename too long
5819 * exec_activate_image:ENOEXEC Executable file format error
5820 * exec_activate_image:ETXTBSY Text file busy [misuse of error code]
5821 * exec_activate_image:EBADEXEC The executable is corrupt/unknown
5822 * exec_activate_image:???
5823 * mac_execve_enter:???
5824 *
5825 * TODO: Dynamic linker header address on stack is copied via suword()
5826 */
5827 int
__mac_execve(proc_t p,struct __mac_execve_args * uap,int32_t * retval __unused)5828 __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval __unused)
5829 {
5830 struct image_params *imgp = NULL;
5831 struct vnode_attr *vap = NULL;
5832 struct vnode_attr *origvap = NULL;
5833 int error;
5834 int is_64 = IS_64BIT_PROCESS(p);
5835 struct vfs_context context;
5836 struct uthread *uthread = NULL;
5837 task_t old_task = current_task();
5838 task_t new_task = NULL;
5839 boolean_t should_release_proc_ref = FALSE;
5840 boolean_t exec_done = FALSE;
5841 void *inherit = NULL;
5842 struct {
5843 struct image_params imgp;
5844 struct vnode_attr va;
5845 struct vnode_attr origva;
5846 } *__execve_data;
5847
5848 /* Allocate a big chunk for locals instead of using stack since these
5849 * structures are pretty big.
5850 */
5851 __execve_data = kalloc_type(typeof(*__execve_data), Z_WAITOK | Z_ZERO);
5852 if (__execve_data == NULL) {
5853 error = ENOMEM;
5854 goto exit_with_error;
5855 }
5856 imgp = &__execve_data->imgp;
5857 vap = &__execve_data->va;
5858 origvap = &__execve_data->origva;
5859
5860 /* Initialize the common data in the image_params structure */
5861 imgp->ip_user_fname = uap->fname;
5862 imgp->ip_user_argv = uap->argp;
5863 imgp->ip_user_envv = uap->envp;
5864 imgp->ip_vattr = vap;
5865 imgp->ip_origvattr = origvap;
5866 imgp->ip_vfs_context = &context;
5867 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
5868 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
5869 imgp->ip_mac_return = 0;
5870 imgp->ip_cs_error = OS_REASON_NULL;
5871 imgp->ip_simulator_binary = IMGPF_SB_DEFAULT;
5872 imgp->ip_subsystem_root_path = NULL;
5873 uthread_set_exec_data(current_uthread(), imgp);
5874
5875 #if CONFIG_MACF
5876 if (uap->mac_p != USER_ADDR_NULL) {
5877 error = mac_execve_enter(uap->mac_p, imgp);
5878 if (error) {
5879 goto exit_with_error;
5880 }
5881 }
5882 #endif
5883 uthread = current_uthread();
5884 {
5885 imgp->ip_flags |= IMGPF_EXEC;
5886
5887 /* Adjust the user proc count */
5888 (void)chgproccnt(kauth_getruid(), 1);
5889 /*
5890 * For execve case, create a new proc, task and thread
5891 * but don't make the proc visible to userland. After
5892 * image activation, the new proc would take place of
5893 * the old proc in pid hash and other lists that make
5894 * the proc visible to the system.
5895 */
5896 imgp->ip_new_thread = cloneproc(old_task, NULL, p, CLONEPROC_EXEC);
5897 /* task and thread ref returned by cloneproc */
5898 if (imgp->ip_new_thread == NULL) {
5899 (void)chgproccnt(kauth_getruid(), -1);
5900 error = ENOMEM;
5901 goto exit_with_error;
5902 }
5903
5904 new_task = get_threadtask(imgp->ip_new_thread);
5905 }
5906
5907 #if HAS_MTE_EMULATION_SHIMS
5908 /*
5909 * ARM2ARM Rosetta doesn't carry over the configuration from the initial posix_spawn,
5910 * so we key the enablement of the runtime to whether inheritance is enabled or not
5911 * for the task. We will defer any MTE specific configuration to image activation.
5912 */
5913 if (task_has_sec_inherit(old_task) && task_has_sec(old_task)) {
5914 imgp->ip_flags |= (IMGPF_ROSETTA | IMGPF_ALT_ROSETTA);
5915 }
5916 #endif /* HAS_MTE_EMULATION_SHIMS */
5917
5918 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
5919
5920 context.vc_thread = imgp->ip_new_thread;
5921 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
5922
5923 imgp->ip_subsystem_root_path = p->p_subsystem_root_path;
5924
5925 proc_transend(p, 0);
5926 proc_signalend(p, 0);
5927
5928 /*
5929 * Activate the image.
5930 * Warning: If activation failed after point of no return, it returns error
5931 * as 0 and pretends the call succeeded.
5932 */
5933 error = exec_activate_image(imgp);
5934 /* thread and task ref returned for vfexec case */
5935
5936 if (imgp->ip_new_thread != NULL) {
5937 /*
5938 * task reference might be returned by exec_activate_image
5939 * for vfexec.
5940 */
5941 new_task = get_threadtask(imgp->ip_new_thread);
5942 #if defined(HAS_APPLE_PAC)
5943 ml_task_set_disable_user_jop(new_task, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
5944 ml_thread_set_disable_user_jop(imgp->ip_new_thread, imgp->ip_flags & IMGPF_NOJOP ? TRUE : FALSE);
5945 #endif
5946 }
5947
5948 if (!error) {
5949 p = proc_exec_switch_task(current_proc(), p, old_task, new_task, imgp, &inherit);
5950 /* proc ref returned */
5951 should_release_proc_ref = TRUE;
5952 }
5953
5954 kauth_cred_unref(&context.vc_ucred);
5955
5956 if (!error) {
5957 exec_done = TRUE;
5958 assert(imgp->ip_new_thread != NULL);
5959
5960 exec_resettextvp(p, imgp);
5961
5962 vm_map_setup(get_task_map(new_task), new_task);
5963
5964 exec_setup_platform_restrictions(new_task);
5965
5966 /*
5967 * Set starting EXC_GUARD behavior for task now that platform
5968 * and platform restrictions bits are set.
5969 */
5970 task_set_exc_guard_default(new_task,
5971 proc_best_name(p),
5972 strlen(proc_best_name(p)),
5973 proc_is_simulated(p),
5974 proc_platform(p),
5975 proc_sdk(p));
5976
5977 /*
5978 * Between proc_exec_switch_task and ipc_task_enable, there is a
5979 * window where proc_find will return the new proc, but task_for_pid
5980 * and similar functions will return an error as the task ipc is not
5981 * enabled yet. Configure the task control port during this window
5982 * before other process have access to this task port.
5983 *
5984 * Must enable after resettextvp so that task port policies are not evaluated
5985 * until the csblob in the textvp is accurately reflected.
5986 */
5987 task_set_ctrl_port_default(new_task, imgp->ip_new_thread);
5988
5989 /*
5990 * Enable new task IPC access if exec_activate_image() returned an
5991 * active task. (Checks active bit in ipc_task_enable() under lock).
5992 * Similarly, this must happen after resettextvp.
5993 */
5994 ipc_task_enable(new_task);
5995 error = process_signature(p, imgp);
5996 }
5997
5998
5999 #if defined(HAS_APPLE_PAC)
6000 if (imgp->ip_new_thread && !error) {
6001 ml_task_set_jop_pid_from_shared_region(new_task, imgp->ip_flags & IMGPF_NOJOP);
6002 ml_thread_set_jop_pid(imgp->ip_new_thread, new_task);
6003 }
6004 #endif /* defined(HAS_APPLE_PAC) */
6005
6006 /* flag exec has occurred, notify only if it has not failed due to FP Key error */
6007 if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)) {
6008 proc_knote(p, NOTE_EXEC);
6009 }
6010
6011 if (imgp->ip_vp != NULLVP) {
6012 vnode_put(imgp->ip_vp);
6013 }
6014 if (imgp->ip_scriptvp != NULLVP) {
6015 vnode_put(imgp->ip_scriptvp);
6016 }
6017 if (imgp->ip_free_map) {
6018 /* Free the map after dropping iocount on vnode to avoid deadlock */
6019 vm_map_deallocate(imgp->ip_free_map);
6020 }
6021 if (imgp->ip_strings) {
6022 execargs_free(imgp);
6023 }
6024 #if CONFIG_MACF
6025 if (imgp->ip_execlabelp) {
6026 mac_cred_label_free(imgp->ip_execlabelp);
6027 imgp->ip_execlabelp = NULL;
6028 }
6029 if (imgp->ip_scriptlabelp) {
6030 mac_vnode_label_free(imgp->ip_scriptlabelp);
6031 imgp->ip_scriptlabelp = NULL;
6032 }
6033 #endif
6034 if (imgp->ip_cs_error != OS_REASON_NULL) {
6035 os_reason_free(imgp->ip_cs_error);
6036 imgp->ip_cs_error = OS_REASON_NULL;
6037 }
6038
6039 if (!error) {
6040 /*
6041 * We need to initialize the bank context behind the protection of
6042 * the proc_trans lock to prevent a race with exit. We can't do this during
6043 * exec_activate_image because task_bank_init checks entitlements that
6044 * aren't loaded until subsequent calls (including exec_resettextvp).
6045 */
6046 error = proc_transstart(p, 0, 0);
6047 }
6048
6049 if (!error) {
6050 task_bank_init(new_task);
6051 proc_transend(p, 0);
6052
6053 // Don't inherit crash behavior across exec, but preserve crash behavior from bootargs
6054 p->p_crash_behavior = 0;
6055 p->p_crash_behavior_deadline = 0;
6056 #if (DEVELOPMENT || DEBUG)
6057 set_crash_behavior_from_bootarg(p);
6058 #endif
6059
6060 #if __arm64__
6061 proc_footprint_entitlement_hacks(p, new_task);
6062 #endif /* __arm64__ */
6063
6064 #if XNU_TARGET_OS_OSX
6065 if (IOTaskHasEntitlement(new_task, SINGLE_JIT_ENTITLEMENT)) {
6066 vm_map_single_jit(get_task_map(new_task));
6067 }
6068 #endif /* XNU_TARGET_OS_OSX */
6069
6070 /* Sever any extant thread affinity */
6071 thread_affinity_exec(current_thread());
6072
6073 /* Inherit task role from old task to new task for exec */
6074 proc_inherit_task_role(new_task, old_task);
6075
6076 thread_t main_thread = imgp->ip_new_thread;
6077
6078 task_set_main_thread_qos(new_task, main_thread);
6079
6080 #if __has_feature(ptrauth_calls)
6081 task_set_pac_exception_fatal_flag(new_task);
6082 #endif /* __has_feature(ptrauth_calls) */
6083 task_set_jit_flags(new_task);
6084
6085 #if CONFIG_ARCADE
6086 /*
6087 * Check to see if we need to trigger an arcade upcall AST now
6088 * that the vnode has been reset on the task.
6089 */
6090 arcade_prepare(new_task, imgp->ip_new_thread);
6091 #endif /* CONFIG_ARCADE */
6092
6093 proc_apply_jit_and_vm_policies(imgp, p, new_task);
6094
6095 if (vm_darkwake_mode == TRUE) {
6096 /*
6097 * This process is being launched when the system
6098 * is in darkwake. So mark it specially. This will
6099 * cause all its pages to be entered in the background Q.
6100 */
6101 task_set_darkwake_mode(new_task, vm_darkwake_mode);
6102 }
6103
6104 #if CONFIG_DTRACE
6105 dtrace_thread_didexec(imgp->ip_new_thread);
6106
6107 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
6108 (*dtrace_proc_waitfor_hook)(p);
6109 }
6110 #endif
6111
6112 #if CONFIG_AUDIT
6113 if (!error && AUDIT_ENABLED() && p) {
6114 /* Add the CDHash of the new process to the audit record */
6115 uint8_t *cdhash = cs_get_cdhash(p);
6116 if (cdhash) {
6117 AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
6118 }
6119 }
6120 #endif
6121 } else {
6122 DTRACE_PROC1(exec__failure, int, error);
6123 }
6124
6125 exit_with_error:
6126
6127 /* terminate the new task it if exec failed */
6128 if (new_task != NULL && task_is_exec_copy(new_task)) {
6129 task_terminate_internal(new_task);
6130 }
6131
6132 if (imgp != NULL) {
6133 /* Clear the initial wait on the thread transferring watchports */
6134 if (imgp->ip_new_thread) {
6135 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), TCRW_CLEAR_INITIAL_WAIT);
6136 }
6137
6138 /* Transfer the watchport boost to new task */
6139 if (!error) {
6140 task_transfer_turnstile_watchports(old_task,
6141 new_task, imgp->ip_new_thread);
6142 }
6143 /*
6144 * Do not terminate the current task, if proc_exec_switch_task did not
6145 * switch the tasks, terminating the current task without the switch would
6146 * result in loosing the SIGKILL status.
6147 */
6148 if (task_did_exec(old_task)) {
6149 /* Terminate the current task, since exec will start in new task */
6150 task_terminate_internal(old_task);
6151 }
6152
6153 /* Release the thread ref returned by cloneproc */
6154 if (imgp->ip_new_thread) {
6155 /* clear the exec complete flag if there is an error before point of no-return */
6156 uint32_t clearwait_flags = TCRW_CLEAR_FINAL_WAIT;
6157 if (!exec_done && error != 0) {
6158 clearwait_flags |= TCRW_CLEAR_EXEC_COMPLETE;
6159 }
6160 /* wake up the new exec thread */
6161 task_clear_return_wait(get_threadtask(imgp->ip_new_thread), clearwait_flags);
6162 thread_deallocate(imgp->ip_new_thread);
6163 imgp->ip_new_thread = NULL;
6164 }
6165 }
6166
6167 /* Release the ref returned by fork_create_child */
6168 if (new_task) {
6169 task_deallocate(new_task);
6170 new_task = NULL;
6171 }
6172
6173 if (should_release_proc_ref) {
6174 proc_rele(p);
6175 }
6176
6177 uthread_set_exec_data(current_uthread(), NULL);
6178 kfree_type(typeof(*__execve_data), __execve_data);
6179
6180 if (inherit != NULL) {
6181 ipc_importance_release(inherit);
6182 }
6183
6184 return error;
6185 }
6186
6187
6188 /*
6189 * copyinptr
6190 *
6191 * Description: Copy a pointer in from user space to a user_addr_t in kernel
6192 * space, based on 32/64 bitness of the user space
6193 *
6194 * Parameters: froma User space address
6195 * toptr Address of kernel space user_addr_t
6196 * ptr_size 4/8, based on 'froma' address space
6197 *
6198 * Returns: 0 Success
6199 * EFAULT Bad 'froma'
6200 *
6201 * Implicit returns:
6202 * *ptr_size Modified
6203 */
6204 static int
copyinptr(user_addr_t froma,user_addr_t * toptr,int ptr_size)6205 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
6206 {
6207 int error;
6208
6209 if (ptr_size == 4) {
6210 /* 64 bit value containing 32 bit address */
6211 unsigned int i = 0;
6212
6213 error = copyin(froma, &i, 4);
6214 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
6215 } else {
6216 error = copyin(froma, toptr, 8);
6217 }
6218 return error;
6219 }
6220
6221
6222 /*
6223 * copyoutptr
6224 *
6225 * Description: Copy a pointer out from a user_addr_t in kernel space to
6226 * user space, based on 32/64 bitness of the user space
6227 *
6228 * Parameters: ua User space address to copy to
6229 * ptr Address of kernel space user_addr_t
6230 * ptr_size 4/8, based on 'ua' address space
6231 *
6232 * Returns: 0 Success
6233 * EFAULT Bad 'ua'
6234 *
6235 */
6236 static int
copyoutptr(user_addr_t ua,user_addr_t ptr,int ptr_size)6237 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
6238 {
6239 int error;
6240
6241 if (ptr_size == 4) {
6242 /* 64 bit value containing 32 bit address */
6243 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int, ua); /* SAFE */
6244
6245 error = copyout(&i, ptr, 4);
6246 } else {
6247 error = copyout(&ua, ptr, 8);
6248 }
6249 return error;
6250 }
6251
6252
6253 /*
6254 * exec_copyout_strings
6255 *
6256 * Copy out the strings segment to user space. The strings segment is put
6257 * on a preinitialized stack frame.
6258 *
6259 * Parameters: struct image_params * the image parameter block
6260 * int * a pointer to the stack offset variable
6261 *
6262 * Returns: 0 Success
6263 * !0 Faiure: errno
6264 *
6265 * Implicit returns:
6266 * (*stackp) The stack offset, modified
6267 *
6268 * Note: The strings segment layout is backward, from the beginning
6269 * of the top of the stack to consume the minimal amount of
6270 * space possible; the returned stack pointer points to the
6271 * end of the area consumed (stacks grow downward).
6272 *
6273 * argc is an int; arg[i] are pointers; env[i] are pointers;
6274 * the 0's are (void *)NULL's
6275 *
6276 * The stack frame layout is:
6277 *
6278 * +-------------+ <- p->user_stack
6279 * | 16b |
6280 * +-------------+
6281 * | STRING AREA |
6282 * | : |
6283 * | : |
6284 * | : |
6285 * +- -- -- -- --+
6286 * | PATH AREA |
6287 * +-------------+
6288 * | 0 |
6289 * +-------------+
6290 * | applev[n] |
6291 * +-------------+
6292 * :
6293 * :
6294 * +-------------+
6295 * | applev[1] |
6296 * +-------------+
6297 * | exec_path / |
6298 * | applev[0] |
6299 * +-------------+
6300 * | 0 |
6301 * +-------------+
6302 * | env[n] |
6303 * +-------------+
6304 * :
6305 * :
6306 * +-------------+
6307 * | env[0] |
6308 * +-------------+
6309 * | 0 |
6310 * +-------------+
6311 * | arg[argc-1] |
6312 * +-------------+
6313 * :
6314 * :
6315 * +-------------+
6316 * | arg[0] |
6317 * +-------------+
6318 * | argc |
6319 * sp-> +-------------+
6320 *
6321 * Although technically a part of the STRING AREA, we treat the PATH AREA as
6322 * a separate entity. This allows us to align the beginning of the PATH AREA
6323 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
6324 * which preceed it on the stack are properly aligned.
6325 */
6326 __attribute__((noinline))
6327 static int
exec_copyout_strings(struct image_params * imgp,user_addr_t * stackp)6328 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
6329 {
6330 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
6331 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
6332 int ptr_area_size;
6333 void *ptr_buffer_start, *ptr_buffer;
6334 size_t string_size;
6335
6336 user_addr_t string_area; /* *argv[], *env[] */
6337 user_addr_t ptr_area; /* argv[], env[], applev[] */
6338 user_addr_t argc_area; /* argc */
6339 user_addr_t stack;
6340 int error;
6341
6342 unsigned i;
6343 struct copyout_desc {
6344 char *start_string;
6345 int count;
6346 #if CONFIG_DTRACE
6347 user_addr_t *dtrace_cookie;
6348 #endif
6349 boolean_t null_term;
6350 } descriptors[] = {
6351 {
6352 .start_string = imgp->ip_startargv,
6353 .count = imgp->ip_argc,
6354 #if CONFIG_DTRACE
6355 .dtrace_cookie = &p->p_dtrace_argv,
6356 #endif
6357 .null_term = TRUE
6358 },
6359 {
6360 .start_string = imgp->ip_endargv,
6361 .count = imgp->ip_envc,
6362 #if CONFIG_DTRACE
6363 .dtrace_cookie = &p->p_dtrace_envp,
6364 #endif
6365 .null_term = TRUE
6366 },
6367 {
6368 .start_string = imgp->ip_strings,
6369 .count = 1,
6370 #if CONFIG_DTRACE
6371 .dtrace_cookie = NULL,
6372 #endif
6373 .null_term = FALSE
6374 },
6375 {
6376 .start_string = imgp->ip_endenvv,
6377 .count = imgp->ip_applec - 1, /* exec_path handled above */
6378 #if CONFIG_DTRACE
6379 .dtrace_cookie = NULL,
6380 #endif
6381 .null_term = TRUE
6382 }
6383 };
6384
6385 stack = *stackp;
6386
6387 /*
6388 * All previous contributors to the string area
6389 * should have aligned their sub-area
6390 */
6391 if (imgp->ip_strspace % ptr_size != 0) {
6392 error = EINVAL;
6393 goto bad;
6394 }
6395
6396 /* Grow the stack down for the strings we've been building up */
6397 string_size = imgp->ip_strendp - imgp->ip_strings;
6398 stack -= string_size;
6399 string_area = stack;
6400
6401 /*
6402 * Need room for one pointer for each string, plus
6403 * one for the NULLs terminating the argv, envv, and apple areas.
6404 */
6405 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) * ptr_size;
6406 stack -= ptr_area_size;
6407 ptr_area = stack;
6408
6409 /* We'll construct all the pointer arrays in our string buffer,
6410 * which we already know is aligned properly, and ip_argspace
6411 * was used to verify we have enough space.
6412 */
6413 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
6414
6415 /*
6416 * Need room for pointer-aligned argc slot.
6417 */
6418 stack -= ptr_size;
6419 argc_area = stack;
6420
6421 /*
6422 * Record the size of the arguments area so that sysctl_procargs()
6423 * can return the argument area without having to parse the arguments.
6424 */
6425 proc_lock(p);
6426 p->p_argc = imgp->ip_argc;
6427 p->p_argslen = (int)(*stackp - string_area);
6428 proc_unlock(p);
6429
6430 /* Return the initial stack address: the location of argc */
6431 *stackp = stack;
6432
6433 /*
6434 * Copy out the entire strings area.
6435 */
6436 error = copyout(imgp->ip_strings, string_area,
6437 string_size);
6438 if (error) {
6439 goto bad;
6440 }
6441
6442 for (i = 0; i < sizeof(descriptors) / sizeof(descriptors[0]); i++) {
6443 char *cur_string = descriptors[i].start_string;
6444 int j;
6445
6446 #if CONFIG_DTRACE
6447 if (descriptors[i].dtrace_cookie) {
6448 proc_lock(p);
6449 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
6450 proc_unlock(p);
6451 }
6452 #endif /* CONFIG_DTRACE */
6453
6454 /*
6455 * For each segment (argv, envv, applev), copy as many pointers as requested
6456 * to our pointer buffer.
6457 */
6458 for (j = 0; j < descriptors[i].count; j++) {
6459 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
6460
6461 /* Copy out the pointer to the current string. Alignment has been verified */
6462 if (ptr_size == 8) {
6463 *(uint64_t *)ptr_buffer = (uint64_t)cur_address;
6464 } else {
6465 *(uint32_t *)ptr_buffer = (uint32_t)cur_address;
6466 }
6467
6468 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
6469 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
6470 }
6471
6472 if (descriptors[i].null_term) {
6473 if (ptr_size == 8) {
6474 *(uint64_t *)ptr_buffer = 0ULL;
6475 } else {
6476 *(uint32_t *)ptr_buffer = 0;
6477 }
6478
6479 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
6480 }
6481 }
6482
6483 /*
6484 * Copy out all our pointer arrays in bulk.
6485 */
6486 error = copyout(ptr_buffer_start, ptr_area,
6487 ptr_area_size);
6488 if (error) {
6489 goto bad;
6490 }
6491
6492 /* argc (int32, stored in a ptr_size area) */
6493 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
6494 if (error) {
6495 goto bad;
6496 }
6497
6498 bad:
6499 return error;
6500 }
6501
6502
6503 /*
6504 * exec_extract_strings
6505 *
6506 * Copy arguments and environment from user space into work area; we may
6507 * have already copied some early arguments into the work area, and if
6508 * so, any arguments opied in are appended to those already there.
6509 * This function is the primary manipulator of ip_argspace, since
6510 * these are the arguments the client of execve(2) knows about. After
6511 * each argv[]/envv[] string is copied, we charge the string length
6512 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
6513 * full preflight the arg list size.
6514 *
6515 * Parameters: struct image_params * the image parameter block
6516 *
6517 * Returns: 0 Success
6518 * !0 Failure: errno
6519 *
6520 * Implicit returns;
6521 * (imgp->ip_argc) Count of arguments, updated
6522 * (imgp->ip_envc) Count of environment strings, updated
6523 * (imgp->ip_argspace) Count of remaining of NCARGS
6524 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
6525 *
6526 *
6527 * Note: The argument and environment vectors are user space pointers
6528 * to arrays of user space pointers.
6529 */
6530 __attribute__((noinline))
6531 static int
exec_extract_strings(struct image_params * imgp)6532 exec_extract_strings(struct image_params *imgp)
6533 {
6534 int error = 0;
6535 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT_ADDR) ? 8 : 4;
6536 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
6537 user_addr_t argv = imgp->ip_user_argv;
6538 user_addr_t envv = imgp->ip_user_envv;
6539
6540 /*
6541 * Adjust space reserved for the path name by however much padding it
6542 * needs. Doing this here since we didn't know if this would be a 32-
6543 * or 64-bit process back in exec_save_path.
6544 */
6545 while (imgp->ip_strspace % new_ptr_size != 0) {
6546 *imgp->ip_strendp++ = '\0';
6547 imgp->ip_strspace--;
6548 /* imgp->ip_argspace--; not counted towards exec args total */
6549 }
6550
6551 /*
6552 * From now on, we start attributing string space to ip_argspace
6553 */
6554 imgp->ip_startargv = imgp->ip_strendp;
6555 imgp->ip_argc = 0;
6556
6557 if ((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
6558 user_addr_t arg;
6559 char *argstart, *ch;
6560
6561 /* First, the arguments in the "#!" string are tokenized and extracted. */
6562 argstart = imgp->ip_interp_buffer;
6563 while (argstart) {
6564 ch = argstart;
6565 while (*ch && !IS_WHITESPACE(*ch)) {
6566 ch++;
6567 }
6568
6569 if (*ch == '\0') {
6570 /* last argument, no need to NUL-terminate */
6571 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
6572 argstart = NULL;
6573 } else {
6574 /* NUL-terminate */
6575 *ch = '\0';
6576 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
6577
6578 /*
6579 * Find the next string. We know spaces at the end of the string have already
6580 * been stripped.
6581 */
6582 argstart = ch + 1;
6583 while (IS_WHITESPACE(*argstart)) {
6584 argstart++;
6585 }
6586 }
6587
6588 /* Error-check, regardless of whether this is the last interpreter arg or not */
6589 if (error) {
6590 goto bad;
6591 }
6592 if (imgp->ip_argspace < new_ptr_size) {
6593 error = E2BIG;
6594 goto bad;
6595 }
6596 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
6597 imgp->ip_argc++;
6598 }
6599
6600 if (argv != 0LL) {
6601 /*
6602 * If we are running an interpreter, replace the av[0] that was
6603 * passed to execve() with the path name that was
6604 * passed to execve() for interpreters which do not use the PATH
6605 * to locate their script arguments.
6606 */
6607 error = copyinptr(argv, &arg, ptr_size);
6608 if (error) {
6609 goto bad;
6610 }
6611 if (arg != 0LL) {
6612 argv += ptr_size; /* consume without using */
6613 }
6614 }
6615
6616 if (imgp->ip_interp_sugid_fd != -1) {
6617 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
6618 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
6619 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
6620 } else {
6621 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
6622 }
6623
6624 if (error) {
6625 goto bad;
6626 }
6627 if (imgp->ip_argspace < new_ptr_size) {
6628 error = E2BIG;
6629 goto bad;
6630 }
6631 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
6632 imgp->ip_argc++;
6633 }
6634
6635 while (argv != 0LL) {
6636 user_addr_t arg;
6637
6638 error = copyinptr(argv, &arg, ptr_size);
6639 if (error) {
6640 goto bad;
6641 }
6642
6643 if (arg == 0LL) {
6644 break;
6645 }
6646
6647 argv += ptr_size;
6648
6649 /*
6650 * av[n...] = arg[n]
6651 */
6652 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
6653 if (error) {
6654 goto bad;
6655 }
6656 if (imgp->ip_argspace < new_ptr_size) {
6657 error = E2BIG;
6658 goto bad;
6659 }
6660 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
6661 imgp->ip_argc++;
6662 }
6663
6664 /* Save space for argv[] NULL terminator */
6665 if (imgp->ip_argspace < new_ptr_size) {
6666 error = E2BIG;
6667 goto bad;
6668 }
6669 imgp->ip_argspace -= new_ptr_size;
6670
6671 /* Note where the args ends and env begins. */
6672 imgp->ip_endargv = imgp->ip_strendp;
6673 imgp->ip_envc = 0;
6674
6675 /* Now, get the environment */
6676 while (envv != 0LL) {
6677 user_addr_t env;
6678
6679 error = copyinptr(envv, &env, ptr_size);
6680 if (error) {
6681 goto bad;
6682 }
6683
6684 envv += ptr_size;
6685 if (env == 0LL) {
6686 break;
6687 }
6688 /*
6689 * av[n...] = env[n]
6690 */
6691 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
6692 if (error) {
6693 goto bad;
6694 }
6695 if (imgp->ip_argspace < new_ptr_size) {
6696 error = E2BIG;
6697 goto bad;
6698 }
6699 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
6700 imgp->ip_envc++;
6701 }
6702
6703 /* Save space for envv[] NULL terminator */
6704 if (imgp->ip_argspace < new_ptr_size) {
6705 error = E2BIG;
6706 goto bad;
6707 }
6708 imgp->ip_argspace -= new_ptr_size;
6709
6710 /* Align the tail of the combined argv+envv area */
6711 while (imgp->ip_strspace % new_ptr_size != 0) {
6712 if (imgp->ip_argspace < 1) {
6713 error = E2BIG;
6714 goto bad;
6715 }
6716 *imgp->ip_strendp++ = '\0';
6717 imgp->ip_strspace--;
6718 imgp->ip_argspace--;
6719 }
6720
6721 /* Note where the envv ends and applev begins. */
6722 imgp->ip_endenvv = imgp->ip_strendp;
6723
6724 /*
6725 * From now on, we are no longer charging argument
6726 * space to ip_argspace.
6727 */
6728
6729 bad:
6730 return error;
6731 }
6732
6733 /*
6734 * Libc has an 8-element array set up for stack guard values. It only fills
6735 * in one of those entries, and both gcc and llvm seem to use only a single
6736 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
6737 * do the work to construct them.
6738 */
6739 #define GUARD_VALUES 1
6740 #define GUARD_KEY "stack_guard="
6741
6742 /*
6743 * System malloc needs some entropy when it is initialized.
6744 */
6745 #define ENTROPY_VALUES 2
6746 #define ENTROPY_KEY "malloc_entropy="
6747
6748 /*
6749 * libplatform needs a random pointer-obfuscation value when it is initialized.
6750 */
6751 #define PTR_MUNGE_VALUES 1
6752 #define PTR_MUNGE_KEY "ptr_munge="
6753
6754 /*
6755 * System malloc engages nanozone for UIAPP.
6756 */
6757 #define NANO_ENGAGE_KEY "MallocNanoZone=1"
6758
6759 /*
6760 * Used to pass experiment flags up to libmalloc.
6761 */
6762 #define LIBMALLOC_EXPERIMENT_FACTORS_KEY "MallocExperiment="
6763
6764 /*
6765 * Passes information about hardened heap/"hardened runtime" entitlements to libsystem/libmalloc
6766 */
6767 #define HARDENED_RUNTIME_KEY "HardenedRuntime="
6768
6769 #define PFZ_KEY "pfz="
6770 extern user32_addr_t commpage_text32_location;
6771 extern user64_addr_t commpage_text64_location;
6772
6773 extern uuid_string_t bootsessionuuid_string;
6774 static TUNABLE(uint32_t, exe_boothash_salt, "exe_boothash_salt", 0);
6775
6776 __startup_func
6777 static void
exe_boothash_salt_generate(void)6778 exe_boothash_salt_generate(void)
6779 {
6780 if (!PE_parse_boot_argn("exe_boothash_salt", NULL, 0)) {
6781 read_random(&exe_boothash_salt, sizeof(exe_boothash_salt));
6782 }
6783 }
6784 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, exe_boothash_salt_generate);
6785
6786
6787 #define MAIN_STACK_VALUES 4
6788 #define MAIN_STACK_KEY "main_stack="
6789
6790 #define FSID_KEY "executable_file="
6791 #define DYLD_FSID_KEY "dyld_file="
6792 #define CDHASH_KEY "executable_cdhash="
6793 #define DYLD_FLAGS_KEY "dyld_flags="
6794 #define SUBSYSTEM_ROOT_PATH_KEY "subsystem_root_path="
6795 #define APP_BOOT_SESSION_KEY "executable_boothash="
6796 #if __has_feature(ptrauth_calls)
6797 #define PTRAUTH_DISABLED_FLAG "ptrauth_disabled=1"
6798 #define DYLD_ARM64E_ABI_KEY "arm64e_abi="
6799 #endif /* __has_feature(ptrauth_calls) */
6800 #define MAIN_TH_PORT_KEY "th_port="
6801
6802 #define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
6803
6804 #define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
6805 #define HEX_STR_LEN32 10 // 32-bit hex value "0x01234567"
6806
6807 #if XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES
6808 #define VM_FORCE_4K_PAGES_KEY "vm_force_4k_pages=1"
6809 #endif /* XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES */
6810
6811 static int
exec_add_entropy_key(struct image_params * imgp,const char * key,int values,boolean_t embedNUL)6812 exec_add_entropy_key(struct image_params *imgp,
6813 const char *key,
6814 int values,
6815 boolean_t embedNUL)
6816 {
6817 const int limit = 8;
6818 uint64_t entropy[limit];
6819 char str[strlen(key) + (HEX_STR_LEN + 1) * limit + 1];
6820 if (values > limit) {
6821 values = limit;
6822 }
6823
6824 read_random(entropy, sizeof(entropy[0]) * values);
6825
6826 if (embedNUL) {
6827 entropy[0] &= ~(0xffull << 8);
6828 }
6829
6830 int len = scnprintf(str, sizeof(str), "%s0x%llx", key, entropy[0]);
6831 size_t remaining = sizeof(str) - len;
6832 for (int i = 1; i < values && remaining > 0; ++i) {
6833 size_t start = sizeof(str) - remaining;
6834 len = scnprintf(&str[start], remaining, ",0x%llx", entropy[i]);
6835 remaining -= len;
6836 }
6837
6838 return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
6839 }
6840
6841 /*
6842 * Build up the contents of the apple[] string vector
6843 */
6844 #if (DEVELOPMENT || DEBUG)
6845 extern uint64_t dyld_flags;
6846 #endif
6847
6848 #if __has_feature(ptrauth_calls)
6849 static inline bool
is_arm64e_running_as_arm64(const struct image_params * imgp)6850 is_arm64e_running_as_arm64(const struct image_params *imgp)
6851 {
6852 return (imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK) == CPU_SUBTYPE_ARM64E &&
6853 (imgp->ip_flags & IMGPF_NOJOP);
6854 }
6855 #endif /* __has_feature(ptrauth_calls) */
6856
6857 _Atomic uint64_t libmalloc_experiment_factors = 0;
6858
6859 static int
exec_add_apple_strings(struct image_params * imgp,const load_result_t * load_result,task_t task)6860 exec_add_apple_strings(struct image_params *imgp,
6861 const load_result_t *load_result, task_t task)
6862 {
6863 int error;
6864 int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
6865 thread_t new_thread;
6866 ipc_port_t sright;
6867 uint64_t local_experiment_factors = 0;
6868
6869 /* exec_save_path stored the first string */
6870 imgp->ip_applec = 1;
6871
6872 /* adding the pfz string */
6873 {
6874 char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + 1];
6875
6876 if (img_ptr_size == 8) {
6877 __assert_only size_t ret = snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
6878 assert(ret < sizeof(pfz_string));
6879 } else {
6880 snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
6881 }
6882 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
6883 if (error) {
6884 printf("Failed to add the pfz string with error %d\n", error);
6885 goto bad;
6886 }
6887 imgp->ip_applec++;
6888 }
6889
6890 /* adding the NANO_ENGAGE_KEY key */
6891 if (imgp->ip_px_sa) {
6892 struct _posix_spawnattr* psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
6893 int proc_flags = psa->psa_flags;
6894
6895 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
6896 const char *nano_string = NANO_ENGAGE_KEY;
6897 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
6898 if (error) {
6899 goto bad;
6900 }
6901 imgp->ip_applec++;
6902 }
6903 }
6904
6905 /*
6906 * Supply libc with a collection of random values to use when
6907 * implementing -fstack-protector.
6908 *
6909 * (The first random string always contains an embedded NUL so that
6910 * __stack_chk_guard also protects against C string vulnerabilities)
6911 */
6912 error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
6913 if (error) {
6914 goto bad;
6915 }
6916 imgp->ip_applec++;
6917
6918 /*
6919 * Supply libc with entropy for system malloc.
6920 */
6921 error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
6922 if (error) {
6923 goto bad;
6924 }
6925 imgp->ip_applec++;
6926
6927 /*
6928 * Supply libpthread & libplatform with a random value to use for pointer
6929 * obfuscation.
6930 */
6931 error = exec_add_entropy_key(imgp, PTR_MUNGE_KEY, PTR_MUNGE_VALUES, FALSE);
6932 if (error) {
6933 goto bad;
6934 }
6935 imgp->ip_applec++;
6936
6937 /*
6938 * Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
6939 * stack if it was allocated by the kernel.
6940 *
6941 * The guard page is not included in this stack size as libpthread
6942 * expects to add it back in after receiving this value.
6943 */
6944 if (load_result->unixproc) {
6945 char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + 1) * MAIN_STACK_VALUES + 1];
6946 snprintf(stack_string, sizeof(stack_string),
6947 MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
6948 (uint64_t)load_result->user_stack,
6949 (uint64_t)load_result->user_stack_size,
6950 (uint64_t)load_result->user_stack_alloc,
6951 (uint64_t)load_result->user_stack_alloc_size);
6952 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
6953 if (error) {
6954 goto bad;
6955 }
6956 imgp->ip_applec++;
6957 }
6958
6959 if (imgp->ip_vattr) {
6960 uint64_t fsid = vnode_get_va_fsid(imgp->ip_vattr);
6961 uint64_t fsobjid = imgp->ip_vattr->va_fileid;
6962
6963 char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
6964 snprintf(fsid_string, sizeof(fsid_string),
6965 FSID_KEY "0x%llx,0x%llx", fsid, fsobjid);
6966 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
6967 if (error) {
6968 goto bad;
6969 }
6970 imgp->ip_applec++;
6971 }
6972
6973 if (imgp->ip_dyld_fsid || imgp->ip_dyld_fsobjid) {
6974 char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + 1];
6975 snprintf(fsid_string, sizeof(fsid_string),
6976 DYLD_FSID_KEY "0x%llx,0x%llx", imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid);
6977 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
6978 if (error) {
6979 goto bad;
6980 }
6981 imgp->ip_applec++;
6982 }
6983
6984 uint8_t cdhash[SHA1_RESULTLEN];
6985 int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash, NULL);
6986 if (cdhash_errror == 0) {
6987 char hash_string[strlen(CDHASH_KEY) + 2 * SHA1_RESULTLEN + 1];
6988 strncpy(hash_string, CDHASH_KEY, sizeof(hash_string));
6989 char *p = hash_string + sizeof(CDHASH_KEY) - 1;
6990 for (int i = 0; i < SHA1_RESULTLEN; i++) {
6991 snprintf(p, 3, "%02x", (int) cdhash[i]);
6992 p += 2;
6993 }
6994 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), UIO_SYSSPACE, FALSE);
6995 if (error) {
6996 goto bad;
6997 }
6998 imgp->ip_applec++;
6999
7000 /* hash together cd-hash and boot-session-uuid */
7001 uint8_t sha_digest[SHA256_DIGEST_LENGTH];
7002 SHA256_CTX sha_ctx;
7003 SHA256_Init(&sha_ctx);
7004 SHA256_Update(&sha_ctx, &exe_boothash_salt, sizeof(exe_boothash_salt));
7005 SHA256_Update(&sha_ctx, bootsessionuuid_string, sizeof(bootsessionuuid_string));
7006 SHA256_Update(&sha_ctx, cdhash, sizeof(cdhash));
7007 SHA256_Final(sha_digest, &sha_ctx);
7008 char app_boot_string[strlen(APP_BOOT_SESSION_KEY) + 2 * SHA1_RESULTLEN + 1];
7009 strncpy(app_boot_string, APP_BOOT_SESSION_KEY, sizeof(app_boot_string));
7010 char *s = app_boot_string + sizeof(APP_BOOT_SESSION_KEY) - 1;
7011 for (int i = 0; i < SHA1_RESULTLEN; i++) {
7012 snprintf(s, 3, "%02x", (int) sha_digest[i]);
7013 s += 2;
7014 }
7015 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(app_boot_string), UIO_SYSSPACE, FALSE);
7016 if (error) {
7017 goto bad;
7018 }
7019 imgp->ip_applec++;
7020 }
7021 #if (DEVELOPMENT || DEBUG)
7022 if (dyld_flags) {
7023 char dyld_flags_string[strlen(DYLD_FLAGS_KEY) + HEX_STR_LEN + 1];
7024 snprintf(dyld_flags_string, sizeof(dyld_flags_string), DYLD_FLAGS_KEY "0x%llx", dyld_flags);
7025 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_flags_string), UIO_SYSSPACE, FALSE);
7026 if (error) {
7027 goto bad;
7028 }
7029 imgp->ip_applec++;
7030 }
7031 #endif
7032 if (imgp->ip_subsystem_root_path) {
7033 size_t buffer_len = MAXPATHLEN + strlen(SUBSYSTEM_ROOT_PATH_KEY);
7034 char subsystem_root_path_string[buffer_len];
7035 int required_len = snprintf(subsystem_root_path_string, buffer_len, SUBSYSTEM_ROOT_PATH_KEY "%s", imgp->ip_subsystem_root_path);
7036
7037 if (((size_t)required_len >= buffer_len) || (required_len < 0)) {
7038 error = ENAMETOOLONG;
7039 goto bad;
7040 }
7041
7042 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(subsystem_root_path_string), UIO_SYSSPACE, FALSE);
7043 if (error) {
7044 goto bad;
7045 }
7046
7047 imgp->ip_applec++;
7048 }
7049 #if __has_feature(ptrauth_calls)
7050 if (is_arm64e_running_as_arm64(imgp)) {
7051 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(PTRAUTH_DISABLED_FLAG), UIO_SYSSPACE, FALSE);
7052 if (error) {
7053 goto bad;
7054 }
7055
7056 imgp->ip_applec++;
7057 }
7058 #endif /* __has_feature(ptrauth_calls) */
7059
7060
7061 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
7062 {
7063 char dyld_abi_string[strlen(DYLD_ARM64E_ABI_KEY) + 8];
7064 strlcpy(dyld_abi_string, DYLD_ARM64E_ABI_KEY, sizeof(dyld_abi_string));
7065 bool allowAll = bootarg_arm64e_preview_abi;
7066 strlcat(dyld_abi_string, (allowAll ? "all" : "os"), sizeof(dyld_abi_string));
7067 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(dyld_abi_string), UIO_SYSSPACE, FALSE);
7068 if (error) {
7069 goto bad;
7070 }
7071
7072 imgp->ip_applec++;
7073 }
7074 #endif
7075 /*
7076 * Add main thread mach port name
7077 * +1 uref on main thread port, this ref will be extracted by libpthread in __pthread_init
7078 * and consumed in _bsdthread_terminate. Leaking the main thread port name if not linked
7079 * against libpthread.
7080 */
7081 if ((new_thread = imgp->ip_new_thread) != THREAD_NULL) {
7082 thread_reference(new_thread);
7083 sright = convert_thread_to_port_immovable(new_thread);
7084 task_t new_task = get_threadtask(new_thread);
7085 mach_port_name_t name = ipc_port_copyout_send_pinned(sright, get_task_ipcspace(new_task));
7086 char port_name_hex_str[strlen(MAIN_TH_PORT_KEY) + HEX_STR_LEN32 + 1];
7087 snprintf(port_name_hex_str, sizeof(port_name_hex_str), MAIN_TH_PORT_KEY "0x%x", name);
7088
7089 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(port_name_hex_str), UIO_SYSSPACE, FALSE);
7090 if (error) {
7091 goto bad;
7092 }
7093 imgp->ip_applec++;
7094 }
7095
7096 #if XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES
7097 if (imgp->ip_px_sa != NULL) {
7098 struct _posix_spawnattr* psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
7099 if (psa->psa_flags & _POSIX_SPAWN_FORCE_4K_PAGES) {
7100 const char *vm_force_4k_string = VM_FORCE_4K_PAGES_KEY;
7101 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(vm_force_4k_string), UIO_SYSSPACE, FALSE);
7102 if (error) {
7103 goto bad;
7104 }
7105 imgp->ip_applec++;
7106 }
7107 }
7108 #endif /* XNU_TARGET_OS_OSX && _POSIX_SPAWN_FORCE_4K_PAGES && PMAP_CREATE_FORCE_4K_PAGES */
7109
7110 /* adding the libmalloc experiment string */
7111 local_experiment_factors = os_atomic_load_wide(&libmalloc_experiment_factors, relaxed);
7112 if (__improbable(local_experiment_factors != 0)) {
7113 char libmalloc_experiment_factors_string[strlen(LIBMALLOC_EXPERIMENT_FACTORS_KEY) + HEX_STR_LEN + 1];
7114
7115 snprintf(
7116 libmalloc_experiment_factors_string,
7117 sizeof(libmalloc_experiment_factors_string),
7118 LIBMALLOC_EXPERIMENT_FACTORS_KEY "0x%llx",
7119 local_experiment_factors);
7120 error = exec_add_user_string(
7121 imgp,
7122 CAST_USER_ADDR_T(libmalloc_experiment_factors_string),
7123 UIO_SYSSPACE,
7124 FALSE);
7125 if (error) {
7126 printf("Failed to add the libmalloc experiment factors string with error %d\n", error);
7127 goto bad;
7128 }
7129 imgp->ip_applec++;
7130 }
7131
7132 /*
7133 * Push down the task security configuration. To reduce confusion when userland parses the information
7134 * still push an empty security configuration if nothing is active.
7135 */
7136 {
7137 #define SECURITY_CONFIG_KEY "security_config="
7138 char security_config_str[strlen(SECURITY_CONFIG_KEY) + HEX_STR_LEN + 1];
7139
7140 snprintf(security_config_str, sizeof(security_config_str),
7141 SECURITY_CONFIG_KEY "0x%x", task_get_security_config(task));
7142
7143 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(security_config_str), UIO_SYSSPACE, FALSE);
7144 if (error) {
7145 printf("Failed to add the security config string with error %d\n", error);
7146 goto bad;
7147 }
7148 imgp->ip_applec++;
7149 }
7150
7151
7152 #if HAS_MTE || HAS_MTE_EMULATION_SHIMS
7153 if (task_has_sec(task)) {
7154 const char *sec_transition_shims = "has_sec_transition=1";
7155 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(sec_transition_shims), UIO_SYSSPACE, FALSE);
7156 if (error) {
7157 printf("Failed to add security translation shims notification\n");
7158 goto bad;
7159 }
7160
7161 imgp->ip_applec++;
7162
7163 /* Push down MTE-specific configuration options that allocators may be interested into. */
7164 #define SEC_TRANSITION_POLICY_KEY "sec_transition_policy="
7165
7166 char sec_transition_policy[strlen(SEC_TRANSITION_POLICY_KEY) + HEX_STR_LEN + 1];
7167
7168 snprintf(sec_transition_policy, sizeof(sec_transition_policy),
7169 SEC_TRANSITION_POLICY_KEY "0x%x", task_get_sec_policy(task));
7170
7171 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(sec_transition_policy), UIO_SYSSPACE, FALSE);
7172 if (error) {
7173 printf("Failed to add the security transition policy string with error %d\n", error);
7174 goto bad;
7175 }
7176 imgp->ip_applec++;
7177 }
7178 #endif /* HAS_MTE || HAS_MTE_EMULATION_SHIMS */
7179
7180
7181 if (load_result->hardened_browser) {
7182 const size_t HR_STRING_SIZE = sizeof(HARDENED_RUNTIME_KEY) + HR_FLAGS_NUM_NIBBLES + 2 + 1;
7183 char hardened_runtime[HR_STRING_SIZE];
7184 snprintf(hardened_runtime, HR_STRING_SIZE, HARDENED_RUNTIME_KEY"0x%x", load_result->hardened_browser);
7185 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hardened_runtime), UIO_SYSSPACE, FALSE);
7186 if (error) {
7187 printf("Failed to add hardened runtime flag with error %d\n", error);
7188 goto bad;
7189 }
7190 imgp->ip_applec++;
7191 }
7192 /* Align the tail of the combined applev area */
7193 while (imgp->ip_strspace % img_ptr_size != 0) {
7194 *imgp->ip_strendp++ = '\0';
7195 imgp->ip_strspace--;
7196 }
7197
7198 bad:
7199 return error;
7200 }
7201
7202 /*
7203 * exec_check_permissions
7204 *
7205 * Description: Verify that the file that is being attempted to be executed
7206 * is in fact allowed to be executed based on it POSIX file
7207 * permissions and other access control criteria
7208 *
7209 * Parameters: struct image_params * the image parameter block
7210 *
7211 * Returns: 0 Success
7212 * EACCES Permission denied
7213 * ENOEXEC Executable file format error
7214 * ETXTBSY Text file busy [misuse of error code]
7215 * vnode_getattr:???
7216 * vnode_authorize:???
7217 */
7218 static int
exec_check_permissions(struct image_params * imgp)7219 exec_check_permissions(struct image_params *imgp)
7220 {
7221 struct vnode *vp = imgp->ip_vp;
7222 struct vnode_attr *vap = imgp->ip_vattr;
7223 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
7224 int error;
7225 kauth_action_t action;
7226
7227 /* Only allow execution of regular files */
7228 if (!vnode_isreg(vp)) {
7229 return EACCES;
7230 }
7231
7232 /* Get the file attributes that we will be using here and elsewhere */
7233 VATTR_INIT(vap);
7234 VATTR_WANTED(vap, va_uid);
7235 VATTR_WANTED(vap, va_gid);
7236 VATTR_WANTED(vap, va_mode);
7237 VATTR_WANTED(vap, va_fsid);
7238 VATTR_WANTED(vap, va_fsid64);
7239 VATTR_WANTED(vap, va_fileid);
7240 VATTR_WANTED(vap, va_data_size);
7241 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0) {
7242 return error;
7243 }
7244
7245 /*
7246 * Ensure that at least one execute bit is on - otherwise root
7247 * will always succeed, and we don't want to happen unless the
7248 * file really is executable.
7249 */
7250 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
7251 return EACCES;
7252 }
7253
7254 /* Disallow zero length files */
7255 if (vap->va_data_size == 0) {
7256 return ENOEXEC;
7257 }
7258
7259 imgp->ip_arch_offset = (user_size_t)0;
7260 #if __LP64__
7261 imgp->ip_arch_size = vap->va_data_size;
7262 #else
7263 if (vap->va_data_size > UINT32_MAX) {
7264 return ENOEXEC;
7265 }
7266 imgp->ip_arch_size = (user_size_t)vap->va_data_size;
7267 #endif
7268
7269 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
7270 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) {
7271 vap->va_mode &= ~(VSUID | VSGID);
7272 }
7273
7274 /*
7275 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
7276 * flags for setuid/setgid binaries.
7277 */
7278 if (vap->va_mode & (VSUID | VSGID)) {
7279 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
7280 }
7281
7282 #if CONFIG_MACF
7283 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
7284 if (error) {
7285 return error;
7286 }
7287 #endif
7288
7289 /* Check for execute permission */
7290 action = KAUTH_VNODE_EXECUTE;
7291 /* Traced images must also be readable */
7292 if (p->p_lflag & P_LTRACED) {
7293 action |= KAUTH_VNODE_READ_DATA;
7294 }
7295 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0) {
7296 return error;
7297 }
7298
7299 #if 0
7300 /* Don't let it run if anyone had it open for writing */
7301 vnode_lock(vp);
7302 if (vp->v_writecount) {
7303 panic("going to return ETXTBSY %x", vp);
7304 vnode_unlock(vp);
7305 return ETXTBSY;
7306 }
7307 vnode_unlock(vp);
7308 #endif
7309
7310 /* XXX May want to indicate to underlying FS that vnode is open */
7311
7312 return error;
7313 }
7314
7315
7316 /*
7317 * exec_handle_sugid
7318 *
7319 * Initially clear the P_SUGID in the process flags; if an SUGID process is
7320 * exec'ing a non-SUGID image, then this is the point of no return.
7321 *
7322 * If the image being activated is SUGID, then replace the credential with a
7323 * copy, disable tracing (unless the tracing process is root), reset the
7324 * mach task port to revoke it, set the P_SUGID bit,
7325 *
7326 * If the saved user and group ID will be changing, then make sure it happens
7327 * to a new credential, rather than a shared one.
7328 *
7329 * Set the security token (this is probably obsolete, given that the token
7330 * should not technically be separate from the credential itself).
7331 *
7332 * Parameters: struct image_params * the image parameter block
7333 *
7334 * Returns: void No failure indication
7335 *
7336 * Implicit returns:
7337 * <process credential> Potentially modified/replaced
7338 * <task port> Potentially revoked
7339 * <process flags> P_SUGID bit potentially modified
7340 * <security token> Potentially modified
7341 */
7342 __attribute__((noinline))
7343 static int
exec_handle_sugid(struct image_params * imgp)7344 exec_handle_sugid(struct image_params *imgp)
7345 {
7346 proc_t p = vfs_context_proc(imgp->ip_vfs_context);
7347 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
7348 int i;
7349 int leave_sugid_clear = 0;
7350 int mac_reset_ipc = 0;
7351 int error = 0;
7352 #if CONFIG_MACF
7353 int mac_transition, disjoint_cred = 0;
7354 int label_update_return = 0;
7355
7356 /*
7357 * Determine whether a call to update the MAC label will result in the
7358 * credential changing.
7359 *
7360 * Note: MAC policies which do not actually end up modifying
7361 * the label subsequently are strongly encouraged to
7362 * return 0 for this check, since a non-zero answer will
7363 * slow down the exec fast path for normal binaries.
7364 */
7365 mac_transition = mac_cred_check_label_update_execve(
7366 imgp->ip_vfs_context,
7367 imgp->ip_vp,
7368 imgp->ip_arch_offset,
7369 imgp->ip_scriptvp,
7370 imgp->ip_scriptlabelp,
7371 imgp->ip_execlabelp,
7372 p,
7373 &imgp->ip_px_smpx);
7374 #endif
7375
7376 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
7377
7378 /*
7379 * Order of the following is important; group checks must go last,
7380 * as we use the success of the 'ismember' check combined with the
7381 * failure of the explicit match to indicate that we will be setting
7382 * the egid of the process even though the new process did not
7383 * require VSUID/VSGID bits in order for it to set the new group as
7384 * its egid.
7385 *
7386 * Note: Technically, by this we are implying a call to
7387 * setegid() in the new process, rather than implying
7388 * it used its VSGID bit to set the effective group,
7389 * even though there is no code in that process to make
7390 * such a call.
7391 */
7392 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
7393 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
7394 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
7395 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
7396 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
7397 #if CONFIG_MACF
7398 /* label for MAC transition and neither VSUID nor VSGID */
7399 handle_mac_transition:
7400 #endif
7401
7402 #if CONFIG_SETUID
7403 /*
7404 * Replace the credential with a copy of itself if euid or
7405 * egid change.
7406 *
7407 * Note: setuid binaries will automatically opt out of
7408 * group resolver participation as a side effect
7409 * of this operation. This is an intentional
7410 * part of the security model, which requires a
7411 * participating credential be established by
7412 * escalating privilege, setting up all other
7413 * aspects of the credential including whether
7414 * or not to participate in external group
7415 * membership resolution, then dropping their
7416 * effective privilege to that of the desired
7417 * final credential state.
7418 *
7419 * Modifications to p_ucred must be guarded using the
7420 * proc's ucred lock. This prevents others from accessing
7421 * a garbage credential.
7422 */
7423
7424 if (imgp->ip_origvattr->va_mode & VSUID) {
7425 kauth_cred_proc_update(p, PROC_SETTOKEN_NONE,
7426 ^bool (kauth_cred_t parent __unused, kauth_cred_t model) {
7427 return kauth_cred_model_setresuid(model,
7428 KAUTH_UID_NONE,
7429 imgp->ip_origvattr->va_uid,
7430 imgp->ip_origvattr->va_uid,
7431 KAUTH_UID_NONE);
7432 });
7433 }
7434
7435 if (imgp->ip_origvattr->va_mode & VSGID) {
7436 kauth_cred_proc_update(p, PROC_SETTOKEN_NONE,
7437 ^bool (kauth_cred_t parent __unused, kauth_cred_t model) {
7438 return kauth_cred_model_setresgid(model,
7439 KAUTH_GID_NONE,
7440 imgp->ip_origvattr->va_gid,
7441 imgp->ip_origvattr->va_gid);
7442 });
7443 }
7444 #endif /* CONFIG_SETUID */
7445
7446 #if CONFIG_MACF
7447 /*
7448 * If a policy has indicated that it will transition the label,
7449 * before making the call into the MAC policies, get a new
7450 * duplicate credential, so they can modify it without
7451 * modifying any others sharing it.
7452 */
7453 if (mac_transition) {
7454 /*
7455 * This hook may generate upcalls that require
7456 * importance donation from the kernel.
7457 * (23925818)
7458 */
7459 thread_t thread = current_thread();
7460 thread_enable_send_importance(thread, TRUE);
7461 kauth_proc_label_update_execve(p,
7462 imgp->ip_vfs_context,
7463 imgp->ip_vp,
7464 imgp->ip_arch_offset,
7465 imgp->ip_scriptvp,
7466 imgp->ip_scriptlabelp,
7467 imgp->ip_execlabelp,
7468 &imgp->ip_csflags,
7469 &imgp->ip_px_smpx,
7470 &disjoint_cred, /* will be non zero if disjoint */
7471 &label_update_return);
7472 thread_enable_send_importance(thread, FALSE);
7473
7474 if (disjoint_cred) {
7475 /*
7476 * If updating the MAC label resulted in a
7477 * disjoint credential, flag that we need to
7478 * set the P_SUGID bit. This protects
7479 * against debuggers being attached by an
7480 * insufficiently privileged process onto the
7481 * result of a transition to a more privileged
7482 * credential.
7483 */
7484 leave_sugid_clear = 0;
7485 }
7486
7487 imgp->ip_mac_return = label_update_return;
7488 }
7489
7490 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
7491
7492 #endif /* CONFIG_MACF */
7493
7494 /*
7495 * If 'leave_sugid_clear' is non-zero, then we passed the
7496 * VSUID and MACF checks, and successfully determined that
7497 * the previous cred was a member of the VSGID group, but
7498 * that it was not the default at the time of the execve,
7499 * and that the post-labelling credential was not disjoint.
7500 * So we don't set the P_SUGID or reset mach ports and fds
7501 * on the basis of simply running this code.
7502 */
7503 if (mac_reset_ipc || !leave_sugid_clear) {
7504 /*
7505 * Have mach reset the task and thread ports.
7506 * We don't want anyone who had the ports before
7507 * a setuid exec to be able to access/control the
7508 * task/thread after.
7509 */
7510 ipc_task_reset((imgp->ip_new_thread != NULL) ?
7511 get_threadtask(imgp->ip_new_thread) : proc_task(p));
7512 ipc_thread_reset((imgp->ip_new_thread != NULL) ?
7513 imgp->ip_new_thread : current_thread());
7514 }
7515
7516 if (!leave_sugid_clear) {
7517 /*
7518 * Flag the process as setuid.
7519 */
7520 OSBitOrAtomic(P_SUGID, &p->p_flag);
7521
7522 /*
7523 * Radar 2261856; setuid security hole fix
7524 * XXX For setuid processes, attempt to ensure that
7525 * stdin, stdout, and stderr are already allocated.
7526 * We do not want userland to accidentally allocate
7527 * descriptors in this range which has implied meaning
7528 * to libc.
7529 */
7530 for (i = 0; i < 3; i++) {
7531 if (fp_get_noref_locked(p, i) != NULL) {
7532 continue;
7533 }
7534
7535 /*
7536 * Do the kernel equivalent of
7537 *
7538 * if i == 0
7539 * (void) open("/dev/null", O_RDONLY);
7540 * else
7541 * (void) open("/dev/null", O_WRONLY);
7542 */
7543
7544 struct fileproc *fp;
7545 int indx;
7546 int flag;
7547 struct nameidata *ndp = NULL;
7548
7549 if (i == 0) {
7550 flag = FREAD;
7551 } else {
7552 flag = FWRITE;
7553 }
7554
7555 if ((error = falloc_exec(p, imgp->ip_vfs_context,
7556 &fp, &indx)) != 0) {
7557 continue;
7558 }
7559
7560 ndp = kalloc_type(struct nameidata,
7561 Z_WAITOK | Z_ZERO | Z_NOFAIL);
7562
7563 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
7564 CAST_USER_ADDR_T("/dev/null"),
7565 imgp->ip_vfs_context);
7566
7567 if ((error = vn_open(ndp, flag, 0)) != 0) {
7568 fp_free(p, indx, fp);
7569 kfree_type(struct nameidata, ndp);
7570 break;
7571 }
7572
7573 struct fileglob *fg = fp->fp_glob;
7574
7575 fg->fg_flag = flag;
7576 fg->fg_ops = &vnops;
7577 fp_set_data(fp, ndp->ni_vp);
7578
7579 vnode_put(ndp->ni_vp);
7580
7581 proc_fdlock(p);
7582 procfdtbl_releasefd(p, indx, NULL);
7583 fp_drop(p, indx, fp, 1);
7584 proc_fdunlock(p);
7585
7586 kfree_type(struct nameidata, ndp);
7587 }
7588 }
7589 }
7590 #if CONFIG_MACF
7591 else {
7592 /*
7593 * We are here because we were told that the MAC label will
7594 * be transitioned, and the binary is not VSUID or VSGID; to
7595 * deal with this case, we could either duplicate a lot of
7596 * code, or we can indicate we want to default the P_SUGID
7597 * bit clear and jump back up.
7598 */
7599 if (mac_transition) {
7600 leave_sugid_clear = 1;
7601 goto handle_mac_transition;
7602 }
7603 }
7604
7605 #endif /* CONFIG_MACF */
7606
7607 /* Update the process' identity version and set the security token.
7608 * Also, ensure we always see a modified identity version (rdar://129775819).
7609 */
7610 int previous_pid_version = proc_get_ro(p)->p_idversion;
7611 int new_pid_version;
7612 do {
7613 new_pid_version = OSIncrementAtomic(&nextpidversion);
7614 } while (new_pid_version == previous_pid_version);
7615 proc_setpidversion(p, new_pid_version);
7616 task_set_uniqueid(proc_task(p));
7617
7618 /*
7619 * Implement the semantic where the effective user and group become
7620 * the saved user and group in exec'ed programs.
7621 */
7622 kauth_cred_proc_update(p, PROC_SETTOKEN_ALWAYS,
7623 ^bool (kauth_cred_t parent __unused, kauth_cred_t model) {
7624 posix_cred_t pcred = posix_cred_get(model);
7625
7626 if (pcred->cr_svuid == pcred->cr_uid &&
7627 pcred->cr_svgid == pcred->cr_gid) {
7628 return false;
7629 }
7630
7631 pcred->cr_svuid = pcred->cr_uid;
7632 pcred->cr_svgid = pcred->cr_gid;
7633 return true;
7634 });
7635
7636 return error;
7637 }
7638
7639
7640 /*
7641 * create_unix_stack
7642 *
7643 * Description: Set the user stack address for the process to the provided
7644 * address. If a custom stack was not set as a result of the
7645 * load process (i.e. as specified by the image file for the
7646 * executable), then allocate the stack in the provided map and
7647 * set up appropriate guard pages for enforcing administrative
7648 * limits on stack growth, if they end up being needed.
7649 *
7650 * Parameters: p Process to set stack on
7651 * load_result Information from mach-o load commands
7652 * map Address map in which to allocate the new stack
7653 *
7654 * Returns: KERN_SUCCESS Stack successfully created
7655 * !KERN_SUCCESS Mach failure code
7656 */
7657 __attribute__((noinline))
7658 static kern_return_t
create_unix_stack(vm_map_t map,load_result_t * load_result,proc_t p)7659 create_unix_stack(vm_map_t map, load_result_t* load_result,
7660 proc_t p)
7661 {
7662 mach_vm_size_t size, prot_size;
7663 mach_vm_offset_t addr, prot_addr;
7664 kern_return_t kr;
7665
7666 mach_vm_address_t user_stack = load_result->user_stack;
7667
7668 proc_lock(p);
7669 p->user_stack = (uintptr_t)user_stack;
7670 if (load_result->custom_stack) {
7671 p->p_lflag |= P_LCUSTOM_STACK;
7672 }
7673 proc_unlock(p);
7674 if (vm_map_page_shift(map) < (int)PAGE_SHIFT) {
7675 DEBUG4K_LOAD("map %p user_stack 0x%llx custom %d user_stack_alloc_size 0x%llx\n", map, user_stack, load_result->custom_stack, load_result->user_stack_alloc_size);
7676 }
7677
7678 if (load_result->user_stack_alloc_size > 0) {
7679 /*
7680 * Allocate enough space for the maximum stack size we
7681 * will ever authorize and an extra page to act as
7682 * a guard page for stack overflows. For default stacks,
7683 * vm_initial_limit_stack takes care of the extra guard page.
7684 * Otherwise we must allocate it ourselves.
7685 */
7686 if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
7687 return KERN_INVALID_ARGUMENT;
7688 }
7689 addr = vm_map_trunc_page(load_result->user_stack - size,
7690 vm_map_page_mask(map));
7691 kr = mach_vm_allocate_kernel(map, &addr, size,
7692 VM_MAP_KERNEL_FLAGS_FIXED(.vm_tag = VM_MEMORY_STACK));
7693 if (kr != KERN_SUCCESS) {
7694 // Can't allocate at default location, try anywhere
7695 addr = 0;
7696 kr = mach_vm_allocate_kernel(map, &addr, size,
7697 VM_MAP_KERNEL_FLAGS_ANYWHERE(.vm_tag = VM_MEMORY_STACK));
7698 if (kr != KERN_SUCCESS) {
7699 return kr;
7700 }
7701
7702 user_stack = addr + size;
7703 load_result->user_stack = (user_addr_t)user_stack;
7704
7705 proc_lock(p);
7706 p->user_stack = (uintptr_t)user_stack;
7707 proc_unlock(p);
7708 }
7709
7710 load_result->user_stack_alloc = (user_addr_t)addr;
7711
7712 /*
7713 * And prevent access to what's above the current stack
7714 * size limit for this process.
7715 */
7716 if (load_result->user_stack_size == 0) {
7717 load_result->user_stack_size = proc_limitgetcur(p, RLIMIT_STACK);
7718 prot_size = vm_map_trunc_page(size - load_result->user_stack_size, vm_map_page_mask(map));
7719 } else {
7720 prot_size = PAGE_SIZE;
7721 }
7722
7723 prot_addr = addr;
7724 kr = mach_vm_protect(map,
7725 prot_addr,
7726 prot_size,
7727 FALSE,
7728 VM_PROT_NONE);
7729 if (kr != KERN_SUCCESS) {
7730 (void)mach_vm_deallocate(map, addr, size);
7731 return kr;
7732 }
7733 }
7734
7735 return KERN_SUCCESS;
7736 }
7737
7738 #include <sys/reboot.h>
7739
7740 /*
7741 * load_init_program_at_path
7742 *
7743 * Description: Load the "init" program; in most cases, this will be "launchd"
7744 *
7745 * Parameters: p Process to call execve() to create
7746 * the "init" program
7747 * scratch_addr Page in p, scratch space
7748 * path NULL terminated path
7749 *
7750 * Returns: KERN_SUCCESS Success
7751 * !KERN_SUCCESS See execve/mac_execve for error codes
7752 *
7753 * Notes: The process that is passed in is the first manufactured
7754 * process on the system, and gets here via bsd_ast() firing
7755 * for the first time. This is done to ensure that bsd_init()
7756 * has run to completion.
7757 *
7758 * The address map of the first manufactured process matches the
7759 * word width of the kernel. Once the self-exec completes, the
7760 * initproc might be different.
7761 */
7762 static int
load_init_program_at_path(proc_t p,user_addr_t scratch_addr,const char * path)7763 load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
7764 {
7765 int retval[2];
7766 int error;
7767 struct execve_args init_exec_args;
7768 user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
7769
7770 /*
7771 * Validate inputs and pre-conditions
7772 */
7773 assert(p);
7774 assert(scratch_addr);
7775 assert(path);
7776
7777 /*
7778 * Copy out program name.
7779 */
7780 size_t path_length = strlen(path) + 1;
7781 argv0 = scratch_addr;
7782 error = copyout(path, argv0, path_length);
7783 if (error) {
7784 return error;
7785 }
7786
7787 scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
7788
7789 /*
7790 * Put out first (and only) argument, similarly.
7791 * Assumes everything fits in a page as allocated above.
7792 */
7793 if (boothowto & RB_SINGLE) {
7794 const char *init_args = "-s";
7795 size_t init_args_length = strlen(init_args) + 1;
7796
7797 argv1 = scratch_addr;
7798 error = copyout(init_args, argv1, init_args_length);
7799 if (error) {
7800 return error;
7801 }
7802
7803 scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
7804 }
7805
7806 if (proc_is64bit(p)) {
7807 user64_addr_t argv64bit[3] = {};
7808
7809 argv64bit[0] = argv0;
7810 argv64bit[1] = argv1;
7811 argv64bit[2] = USER_ADDR_NULL;
7812
7813 error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
7814 if (error) {
7815 return error;
7816 }
7817 } else {
7818 user32_addr_t argv32bit[3] = {};
7819
7820 argv32bit[0] = (user32_addr_t)argv0;
7821 argv32bit[1] = (user32_addr_t)argv1;
7822 argv32bit[2] = USER_ADDR_NULL;
7823
7824 error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
7825 if (error) {
7826 return error;
7827 }
7828 }
7829
7830 /*
7831 * Set up argument block for fake call to execve.
7832 */
7833 init_exec_args.fname = argv0;
7834 init_exec_args.argp = scratch_addr;
7835 init_exec_args.envp = USER_ADDR_NULL;
7836
7837 /*
7838 * So that init task is set with uid,gid 0 token
7839 *
7840 * The access to the cred is safe:
7841 * the proc isn't running yet, it's stable.
7842 */
7843 set_security_token(p, proc_ucred_unsafe(p));
7844
7845 return execve(p, &init_exec_args, retval);
7846 }
7847
7848 static const char * init_programs[] = {
7849 #if DEBUG
7850 "/usr/appleinternal/sbin/launchd.debug",
7851 #endif
7852 #if DEVELOPMENT || DEBUG
7853 "/usr/appleinternal/sbin/launchd.development",
7854 #endif
7855 "/sbin/launchd",
7856 };
7857
7858 /*
7859 * load_init_program
7860 *
7861 * Description: Load the "init" program; in most cases, this will be "launchd"
7862 *
7863 * Parameters: p Process to call execve() to create
7864 * the "init" program
7865 *
7866 * Returns: (void)
7867 *
7868 * Notes: The process that is passed in is the first manufactured
7869 * process on the system, and gets here via bsd_ast() firing
7870 * for the first time. This is done to ensure that bsd_init()
7871 * has run to completion.
7872 *
7873 * In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
7874 * may be used to select a specific launchd executable. As with
7875 * the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
7876 * will force /sbin/launchd to be selected.
7877 *
7878 * Search order by build:
7879 *
7880 * DEBUG DEVELOPMENT RELEASE PATH
7881 * ----------------------------------------------------------------------------------
7882 * 1 1 NA /usr/appleinternal/sbin/launchd.$LAUNCHDSUFFIX
7883 * 2 NA NA /usr/appleinternal/sbin/launchd.debug
7884 * 3 2 NA /usr/appleinternal/sbin/launchd.development
7885 * 4 3 1 /sbin/launchd
7886 */
7887 void
load_init_program(proc_t p)7888 load_init_program(proc_t p)
7889 {
7890 uint32_t i;
7891 int error;
7892 vm_map_t map = current_map();
7893 mach_vm_offset_t scratch_addr = 0;
7894 mach_vm_size_t map_page_size = vm_map_page_size(map);
7895
7896 #if DEVELOPMENT || DEBUG
7897 /* Use the opportunity to initialize exec's debug log stream */
7898 exec_log_handle = os_log_create("com.apple.xnu.bsd", "exec");
7899 #endif /* DEVELOPMENT || DEBUG */
7900
7901 (void) mach_vm_allocate_kernel(map, &scratch_addr, map_page_size,
7902 VM_MAP_KERNEL_FLAGS_ANYWHERE());
7903 #if CONFIG_MEMORYSTATUS
7904 (void) memorystatus_init_at_boot_snapshot();
7905 #endif /* CONFIG_MEMORYSTATUS */
7906
7907 #if DEBUG || DEVELOPMENT
7908 /* Check for boot-arg suffix first */
7909 char launchd_suffix[64];
7910 if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
7911 char launchd_path[128];
7912 boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
7913 (strcmp(launchd_suffix, "release") == 0));
7914
7915 if (is_release_suffix) {
7916 printf("load_init_program: attempting to load /sbin/launchd\n");
7917 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
7918 if (!error) {
7919 return;
7920 }
7921
7922 panic("Process 1 exec of launchd.release failed, errno %d", error);
7923 } else {
7924 strlcpy(launchd_path, "/usr/appleinternal/sbin/launchd.", sizeof(launchd_path));
7925 strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
7926
7927 printf("load_init_program: attempting to load %s\n", launchd_path);
7928 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path);
7929 if (!error) {
7930 return;
7931 } else if (error != ENOENT) {
7932 printf("load_init_program: failed loading %s: errno %d\n", launchd_path, error);
7933 }
7934 }
7935 }
7936 #endif
7937
7938 error = ENOENT;
7939 for (i = 0; i < sizeof(init_programs) / sizeof(init_programs[0]); i++) {
7940 printf("load_init_program: attempting to load %s\n", init_programs[i]);
7941 error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
7942 if (!error) {
7943 return;
7944 } else if (error != ENOENT) {
7945 printf("load_init_program: failed loading %s: errno %d\n", init_programs[i], error);
7946 }
7947 }
7948
7949 panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "<null>" : init_programs[i - 1]), error);
7950 }
7951
7952 /*
7953 * load_return_to_errno
7954 *
7955 * Description: Convert a load_return_t (Mach error) to an errno (BSD error)
7956 *
7957 * Parameters: lrtn Mach error number
7958 *
7959 * Returns: (int) BSD error number
7960 * 0 Success
7961 * EBADARCH Bad architecture
7962 * EBADMACHO Bad Mach object file
7963 * ESHLIBVERS Bad shared library version
7964 * ENOMEM Out of memory/resource shortage
7965 * EACCES Access denied
7966 * ENOENT Entry not found (usually "file does
7967 * does not exist")
7968 * EIO An I/O error occurred
7969 * EBADEXEC The executable is corrupt/unknown
7970 */
7971 static int
load_return_to_errno(load_return_t lrtn)7972 load_return_to_errno(load_return_t lrtn)
7973 {
7974 switch (lrtn) {
7975 case LOAD_SUCCESS:
7976 return 0;
7977 case LOAD_BADARCH:
7978 return EBADARCH;
7979 case LOAD_BADMACHO:
7980 case LOAD_BADMACHO_UPX:
7981 return EBADMACHO;
7982 case LOAD_SHLIB:
7983 return ESHLIBVERS;
7984 case LOAD_NOSPACE:
7985 case LOAD_RESOURCE:
7986 return ENOMEM;
7987 case LOAD_PROTECT:
7988 return EACCES;
7989 case LOAD_ENOENT:
7990 return ENOENT;
7991 case LOAD_IOERROR:
7992 return EIO;
7993 case LOAD_DECRYPTFAIL:
7994 return EAUTH;
7995 case LOAD_FAILURE:
7996 default:
7997 return EBADEXEC;
7998 }
7999 }
8000
8001 #include <mach/mach_types.h>
8002 #include <mach/vm_prot.h>
8003 #include <mach/semaphore.h>
8004 #include <mach/sync_policy.h>
8005 #include <kern/clock.h>
8006 #include <mach/kern_return.h>
8007
8008 /*
8009 * execargs_alloc
8010 *
8011 * Description: Allocate the block of memory used by the execve arguments.
8012 * At the same time, we allocate a page so that we can read in
8013 * the first page of the image.
8014 *
8015 * Parameters: struct image_params * the image parameter block
8016 *
8017 * Returns: 0 Success
8018 * EINVAL Invalid argument
8019 * EACCES Permission denied
8020 * EINTR Interrupted function
8021 * ENOMEM Not enough space
8022 *
8023 * Notes: This is a temporary allocation into the kernel address space
8024 * to enable us to copy arguments in from user space. This is
8025 * necessitated by not mapping the process calling execve() into
8026 * the kernel address space during the execve() system call.
8027 *
8028 * We assemble the argument and environment, etc., into this
8029 * region before copying it as a single block into the child
8030 * process address space (at the top or bottom of the stack,
8031 * depending on which way the stack grows; see the function
8032 * exec_copyout_strings() for details).
8033 *
8034 * This ends up with a second (possibly unnecessary) copy compared
8035 * with assembing the data directly into the child address space,
8036 * instead, but since we cannot be guaranteed that the parent has
8037 * not modified its environment, we can't really know that it's
8038 * really a block there as well.
8039 */
8040
8041
8042 static int execargs_waiters = 0;
8043 static LCK_MTX_DECLARE_ATTR(execargs_cache_lock, &proc_lck_grp, &proc_lck_attr);
8044
8045 static void
execargs_lock_lock(void)8046 execargs_lock_lock(void)
8047 {
8048 lck_mtx_lock_spin(&execargs_cache_lock);
8049 }
8050
8051 static void
execargs_lock_unlock(void)8052 execargs_lock_unlock(void)
8053 {
8054 lck_mtx_unlock(&execargs_cache_lock);
8055 }
8056
8057 static wait_result_t
execargs_lock_sleep(void)8058 execargs_lock_sleep(void)
8059 {
8060 return lck_mtx_sleep(&execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE);
8061 }
8062
8063 static kern_return_t
execargs_purgeable_allocate(char ** execarg_address)8064 execargs_purgeable_allocate(char **execarg_address)
8065 {
8066 mach_vm_offset_t addr = 0;
8067 kern_return_t kr = mach_vm_allocate_kernel(bsd_pageable_map, &addr,
8068 BSD_PAGEABLE_SIZE_PER_EXEC,
8069 VM_MAP_KERNEL_FLAGS_ANYWHERE(.vmf_purgeable = true));
8070 *execarg_address = (char *)addr;
8071 assert(kr == KERN_SUCCESS);
8072 return kr;
8073 }
8074
8075 static kern_return_t
execargs_purgeable_reference(void * execarg_address)8076 execargs_purgeable_reference(void *execarg_address)
8077 {
8078 int state = VM_PURGABLE_NONVOLATILE;
8079 kern_return_t kr = vm_map_purgable_control(bsd_pageable_map,
8080 (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
8081
8082 assert(kr == KERN_SUCCESS);
8083 return kr;
8084 }
8085
8086 static kern_return_t
execargs_purgeable_volatilize(void * execarg_address)8087 execargs_purgeable_volatilize(void *execarg_address)
8088 {
8089 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
8090 kern_return_t kr;
8091 kr = vm_map_purgable_control(bsd_pageable_map,
8092 (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
8093
8094 assert(kr == KERN_SUCCESS);
8095
8096 return kr;
8097 }
8098
8099 static void
execargs_wakeup_waiters(void)8100 execargs_wakeup_waiters(void)
8101 {
8102 thread_wakeup(&execargs_free_count);
8103 }
8104
8105 static int
execargs_alloc(struct image_params * imgp)8106 execargs_alloc(struct image_params *imgp)
8107 {
8108 kern_return_t kret;
8109 wait_result_t res;
8110 int i, cache_index = -1;
8111
8112 execargs_lock_lock();
8113
8114 while (execargs_free_count == 0) {
8115 execargs_waiters++;
8116 res = execargs_lock_sleep();
8117 execargs_waiters--;
8118 if (res != THREAD_AWAKENED) {
8119 execargs_lock_unlock();
8120 return EINTR;
8121 }
8122 }
8123
8124 execargs_free_count--;
8125
8126 for (i = 0; i < execargs_cache_size; i++) {
8127 vm_offset_t element = execargs_cache[i];
8128 if (element) {
8129 cache_index = i;
8130 imgp->ip_strings = (char *)(execargs_cache[i]);
8131 execargs_cache[i] = 0;
8132 break;
8133 }
8134 }
8135
8136 assert(execargs_free_count >= 0);
8137
8138 execargs_lock_unlock();
8139
8140 if (cache_index == -1) {
8141 kret = execargs_purgeable_allocate(&imgp->ip_strings);
8142 } else {
8143 kret = execargs_purgeable_reference(imgp->ip_strings);
8144 }
8145
8146 assert(kret == KERN_SUCCESS);
8147 if (kret != KERN_SUCCESS) {
8148 return ENOMEM;
8149 }
8150
8151 /* last page used to read in file headers */
8152 imgp->ip_vdata = imgp->ip_strings + (NCARGS + PAGE_SIZE);
8153 imgp->ip_strendp = imgp->ip_strings;
8154 imgp->ip_argspace = NCARGS;
8155 imgp->ip_strspace = (NCARGS + PAGE_SIZE);
8156
8157 return 0;
8158 }
8159
8160 /*
8161 * execargs_free
8162 *
8163 * Description: Free the block of memory used by the execve arguments and the
8164 * first page of the executable by a previous call to the function
8165 * execargs_alloc().
8166 *
8167 * Parameters: struct image_params * the image parameter block
8168 *
8169 * Returns: 0 Success
8170 * EINVAL Invalid argument
8171 * EINTR Oeration interrupted
8172 */
8173 static int
execargs_free(struct image_params * imgp)8174 execargs_free(struct image_params *imgp)
8175 {
8176 kern_return_t kret;
8177 int i;
8178 boolean_t needs_wakeup = FALSE;
8179
8180 kret = execargs_purgeable_volatilize(imgp->ip_strings);
8181
8182 execargs_lock_lock();
8183 execargs_free_count++;
8184
8185 for (i = 0; i < execargs_cache_size; i++) {
8186 vm_offset_t element = execargs_cache[i];
8187 if (element == 0) {
8188 execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
8189 imgp->ip_strings = NULL;
8190 break;
8191 }
8192 }
8193
8194 assert(imgp->ip_strings == NULL);
8195
8196 if (execargs_waiters > 0) {
8197 needs_wakeup = TRUE;
8198 }
8199
8200 execargs_lock_unlock();
8201
8202 if (needs_wakeup == TRUE) {
8203 execargs_wakeup_waiters();
8204 }
8205
8206 return kret == KERN_SUCCESS ? 0 : EINVAL;
8207 }
8208
8209 void
uthread_set_exec_data(struct uthread * uth,struct image_params * imgp)8210 uthread_set_exec_data(struct uthread *uth, struct image_params *imgp)
8211 {
8212 uth->uu_save.uus_exec_data.imgp = imgp;
8213 }
8214
8215 size_t
thread_get_current_exec_path(char * path,size_t size)8216 thread_get_current_exec_path(char *path, size_t size)
8217 {
8218 struct uthread *uth = current_uthread();
8219 struct image_params *imgp = uth->uu_save.uus_exec_data.imgp;
8220 size_t string_size = 0;
8221 char *exec_path;
8222
8223 if (path == NULL || imgp == NULL || imgp->ip_strings == NULL) {
8224 return 0;
8225 }
8226
8227 exec_path = imgp->ip_strings + strlen(EXECUTABLE_KEY);
8228 string_size = imgp->ip_strendp - exec_path;
8229 string_size = MIN(MAXPATHLEN, string_size);
8230 string_size = MIN(size, string_size);
8231
8232 string_size = strlcpy(path, exec_path, string_size);
8233 return string_size;
8234 }
8235 static void
exec_resettextvp(proc_t p,struct image_params * imgp)8236 exec_resettextvp(proc_t p, struct image_params *imgp)
8237 {
8238 vnode_t vp;
8239 off_t offset;
8240 vnode_t tvp = p->p_textvp;
8241 int ret;
8242
8243 vp = imgp->ip_vp;
8244 offset = imgp->ip_arch_offset;
8245
8246 if (vp == NULLVP) {
8247 panic("exec_resettextvp: expected valid vp");
8248 }
8249
8250 ret = vnode_ref(vp);
8251 proc_lock(p);
8252 if (ret == 0) {
8253 p->p_textvp = vp;
8254 p->p_textoff = offset;
8255 } else {
8256 p->p_textvp = NULLVP; /* this is paranoia */
8257 p->p_textoff = 0;
8258 }
8259 proc_unlock(p);
8260
8261 if (tvp != NULLVP) {
8262 if (vnode_getwithref(tvp) == 0) {
8263 vnode_rele(tvp);
8264 vnode_put(tvp);
8265 }
8266 }
8267 }
8268
8269 // Includes the 0-byte (therefore "SIZE" instead of "LEN").
8270 static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * 2 + 1;
8271
8272 static void
cdhash_to_string(char str[CS_CDHASH_STRING_SIZE],uint8_t const * const cdhash)8273 cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash)
8274 {
8275 static char const nibble[] = "0123456789abcdef";
8276
8277 /* Apparently still the safest way to get a hex representation
8278 * of binary data.
8279 * xnu's printf routines have %*D/%20D in theory, but "not really", see:
8280 * <rdar://problem/33328859> confusion around %*D/%nD in printf
8281 */
8282 for (int i = 0; i < CS_CDHASH_LEN; ++i) {
8283 str[i * 2] = nibble[(cdhash[i] & 0xf0) >> 4];
8284 str[i * 2 + 1] = nibble[cdhash[i] & 0x0f];
8285 }
8286 str[CS_CDHASH_STRING_SIZE - 1] = 0;
8287 }
8288
8289 /*
8290 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
8291 *
8292 * Description: Waits for the userspace daemon to respond to the request
8293 * we made. Function declared non inline to be visible in
8294 * stackshots and spindumps as well as debugging.
8295 */
8296 __attribute__((noinline)) int
__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port,int32_t new_pid)8297 __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
8298 {
8299 return find_code_signature(task_access_port, new_pid);
8300 }
8301
8302 /*
8303 * Update signature dependent process state, called by
8304 * process_signature.
8305 */
8306 static int
proc_process_signature(proc_t p,os_reason_t * signature_failure_reason)8307 proc_process_signature(proc_t p, os_reason_t *signature_failure_reason)
8308 {
8309 int error = 0;
8310 char const *error_msg = NULL;
8311
8312 kern_return_t kr = machine_task_process_signature(proc_get_task_raw(p), proc_platform(p), proc_sdk(p), &error_msg);
8313
8314 if (kr != KERN_SUCCESS) {
8315 error = EINVAL;
8316
8317 if (error_msg != NULL) {
8318 uint32_t error_msg_len = (uint32_t)strlen(error_msg) + 1;
8319 mach_vm_address_t data_addr = 0;
8320 int reason_error = 0;
8321 int kcdata_error = 0;
8322
8323 os_reason_t reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
8324 reason->osr_flags = OS_REASON_FLAG_GENERATE_CRASH_REPORT | OS_REASON_FLAG_CONSISTENT_FAILURE;
8325
8326 if ((reason_error = os_reason_alloc_buffer_noblock(reason,
8327 kcdata_estimate_required_buffer_size(1, error_msg_len))) == 0 &&
8328 (kcdata_error = kcdata_get_memory_addr(&reason->osr_kcd_descriptor,
8329 EXIT_REASON_USER_DESC, error_msg_len,
8330 &data_addr)) == KERN_SUCCESS) {
8331 kern_return_t mc_error = kcdata_memcpy(&reason->osr_kcd_descriptor, (mach_vm_address_t)data_addr,
8332 error_msg, error_msg_len);
8333
8334 if (mc_error != KERN_SUCCESS) {
8335 printf("process_signature: failed to copy reason string (kcdata_memcpy error: %d)\n",
8336 mc_error);
8337 }
8338 } else {
8339 printf("failed to allocate space for reason string (os_reason_alloc_buffer error: %d, kcdata error: %d, length: %u)\n",
8340 reason_error, kcdata_error, error_msg_len);
8341 }
8342
8343 assert(*signature_failure_reason == NULL); // shouldn't have gotten so far
8344 *signature_failure_reason = reason;
8345 }
8346 }
8347 return error;
8348 }
8349
8350
8351 #define DT_UNRESTRICTED_SUBSYSTEM_ROOT "unrestricted-subsystem-root"
8352
8353 static bool
allow_unrestricted_subsystem_root(void)8354 allow_unrestricted_subsystem_root(void)
8355 {
8356 #if !(DEVELOPMENT || DEBUG)
8357 static bool allow_unrestricted_subsystem_root = false;
8358 static bool has_been_set = false;
8359
8360 if (!has_been_set) {
8361 DTEntry chosen;
8362 const uint32_t *value;
8363 unsigned size;
8364
8365 has_been_set = true;
8366 if (SecureDTLookupEntry(0, "/chosen", &chosen) == kSuccess &&
8367 SecureDTGetProperty(chosen, DT_UNRESTRICTED_SUBSYSTEM_ROOT, (const void**)&value, &size) == kSuccess &&
8368 value != NULL &&
8369 size == sizeof(uint32_t)) {
8370 allow_unrestricted_subsystem_root = (bool)*value;
8371 }
8372 }
8373
8374 return allow_unrestricted_subsystem_root;
8375 #else
8376 return true;
8377 #endif
8378 }
8379
8380 static int
process_signature(proc_t p,struct image_params * imgp)8381 process_signature(proc_t p, struct image_params *imgp)
8382 {
8383 mach_port_t port = IPC_PORT_NULL;
8384 kern_return_t kr = KERN_FAILURE;
8385 int error = EACCES;
8386 boolean_t unexpected_failure = FALSE;
8387 struct cs_blob *csb;
8388 boolean_t require_success = FALSE;
8389 int spawn = (imgp->ip_flags & IMGPF_SPAWN);
8390 const int vfexec = 0;
8391 os_reason_t signature_failure_reason = OS_REASON_NULL;
8392
8393 /*
8394 * Override inherited code signing flags with the
8395 * ones for the process that is being successfully
8396 * loaded
8397 */
8398 proc_lock(p);
8399 proc_csflags_update(p, imgp->ip_csflags);
8400 proc_unlock(p);
8401
8402 /* Set the switch_protect flag on the map */
8403 if (proc_getcsflags(p) & (CS_HARD | CS_KILL)) {
8404 vm_map_switch_protect(get_task_map(proc_task(p)), TRUE);
8405 }
8406 /* set the cs_enforced flags in the map */
8407 if (proc_getcsflags(p) & CS_ENFORCEMENT) {
8408 vm_map_cs_enforcement_set(get_task_map(proc_task(p)), TRUE);
8409 } else {
8410 vm_map_cs_enforcement_set(get_task_map(proc_task(p)), FALSE);
8411 }
8412
8413 /*
8414 * image activation may be failed due to policy
8415 * which is unexpected but security framework does not
8416 * approve of exec, kill and return immediately.
8417 */
8418 if (imgp->ip_mac_return != 0) {
8419 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
8420 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, 0, 0);
8421 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
8422 error = imgp->ip_mac_return;
8423 unexpected_failure = TRUE;
8424 goto done;
8425 }
8426
8427 if (imgp->ip_cs_error != OS_REASON_NULL) {
8428 signature_failure_reason = imgp->ip_cs_error;
8429 imgp->ip_cs_error = OS_REASON_NULL;
8430 error = EACCES;
8431 goto done;
8432 }
8433
8434 /* call the launch constraints hook */
8435 os_reason_t launch_constraint_reason;
8436 if ((error = mac_proc_check_launch_constraints(p, imgp, &launch_constraint_reason)) != 0) {
8437 signature_failure_reason = launch_constraint_reason;
8438 goto done;
8439 }
8440
8441 /*
8442 * Reject when there's subsystem root path set, but the image is restricted, and doesn't require
8443 * library validation. This is to avoid subsystem root being used to inject unsigned code
8444 */
8445 if (!allow_unrestricted_subsystem_root()) {
8446 if ((imgp->ip_csflags & CS_RESTRICT || proc_issetugid(p)) &&
8447 !(imgp->ip_csflags & CS_REQUIRE_LV) &&
8448 (imgp->ip_subsystem_root_path != NULL)) {
8449 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
8450 error = EACCES;
8451 goto done;
8452 }
8453 }
8454
8455 #if XNU_TARGET_OS_OSX
8456 /* Check for platform passed in spawn attr if iOS binary is being spawned */
8457 if (proc_platform(p) == PLATFORM_IOS) {
8458 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
8459 if (psa == NULL || psa->psa_platform == 0) {
8460 boolean_t no_sandbox_entitled = FALSE;
8461 #if DEBUG || DEVELOPMENT
8462 /*
8463 * Allow iOS binaries to spawn on internal systems
8464 * if no-sandbox entitlement is present of unentitled_ios_sim_launch
8465 * boot-arg set to true
8466 */
8467 if (unentitled_ios_sim_launch) {
8468 no_sandbox_entitled = TRUE;
8469 } else {
8470 no_sandbox_entitled = IOVnodeHasEntitlement(imgp->ip_vp,
8471 (int64_t)imgp->ip_arch_offset, "com.apple.private.security.no-sandbox");
8472 }
8473 #endif /* DEBUG || DEVELOPMENT */
8474 if (!no_sandbox_entitled) {
8475 signature_failure_reason = os_reason_create(OS_REASON_EXEC,
8476 EXEC_EXIT_REASON_WRONG_PLATFORM);
8477 error = EACCES;
8478 goto done;
8479 }
8480 printf("Allowing spawn of iOS binary %s since it has "
8481 "com.apple.private.security.no-sandbox entitlement or unentitled_ios_sim_launch "
8482 "boot-arg set to true\n", p->p_name);
8483 } else if (psa->psa_platform != PLATFORM_IOS) {
8484 /* Simulator binary spawned with wrong platform */
8485 signature_failure_reason = os_reason_create(OS_REASON_EXEC,
8486 EXEC_EXIT_REASON_WRONG_PLATFORM);
8487 error = EACCES;
8488 goto done;
8489 } else {
8490 printf("Allowing spawn of iOS binary %s since correct platform was passed in spawn\n",
8491 p->p_name);
8492 }
8493 }
8494 #endif /* XNU_TARGET_OS_OSX */
8495
8496 /* If the code signature came through the image activation path, we skip the
8497 * taskgated / externally attached path. */
8498 if (imgp->ip_csflags & CS_SIGNED) {
8499 error = 0;
8500 goto done;
8501 }
8502
8503 /* The rest of the code is for signatures that either already have been externally
8504 * attached (likely, but not necessarily by a previous run through the taskgated
8505 * path), or that will now be attached by taskgated. */
8506
8507 kr = task_get_task_access_port(proc_task(p), &port);
8508 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
8509 error = 0;
8510 if (require_success) {
8511 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
8512 proc_getpid(p), OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, 0, 0);
8513 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
8514 error = EACCES;
8515 }
8516 goto done;
8517 }
8518
8519 /*
8520 * taskgated returns KERN_SUCCESS if it has completed its work
8521 * and the exec should continue, KERN_FAILURE if the exec should
8522 * fail, or it may error out with different error code in an
8523 * event of mig failure (e.g. process was signalled during the
8524 * rpc call, taskgated died, mig server died etc.).
8525 */
8526
8527 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, proc_getpid(p));
8528 switch (kr) {
8529 case KERN_SUCCESS:
8530 error = 0;
8531 break;
8532 case KERN_FAILURE:
8533 error = EACCES;
8534
8535 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
8536 proc_getpid(p), OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, 0, 0);
8537 signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
8538 goto done;
8539 default:
8540 error = EACCES;
8541
8542 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,
8543 proc_getpid(p), OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, 0, 0);
8544 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
8545 unexpected_failure = TRUE;
8546 goto done;
8547 }
8548
8549 /* Only do this if exec_resettextvp() did not fail */
8550 if (p->p_textvp != NULLVP) {
8551 csb = ubc_cs_blob_get(p->p_textvp, -1, -1, p->p_textoff);
8552
8553 if (csb != NULL) {
8554 /* As the enforcement we can do here is very limited, we only allow things that
8555 * are the only reason why this code path still exists:
8556 * Adhoc signed non-platform binaries without special cs_flags and without any
8557 * entitlements (unrestricted ones still pass AMFI). */
8558 if (
8559 /* Revalidate the blob if necessary through bumped generation count. */
8560 (ubc_cs_generation_check(p->p_textvp) == 0 ||
8561 ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, 0, proc_platform(p)) == 0) &&
8562 /* Only CS_ADHOC, no CS_KILL, CS_HARD etc. */
8563 (csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC &&
8564 /* If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. */
8565 csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size,
8566 CSSLOT_SIGNATURESLOT,
8567 CSMAGIC_BLOBWRAPPER) == NULL &&
8568 /* It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. */
8569 csb->csb_platform_binary == 0 &&
8570 /* No entitlements, not even unrestricted ones. */
8571 csb->csb_entitlements_blob == NULL &&
8572 csb->csb_der_entitlements_blob == NULL) {
8573 proc_lock(p);
8574 proc_csflags_set(p, CS_SIGNED | CS_VALID);
8575 proc_unlock(p);
8576 } else {
8577 uint8_t cdhash[CS_CDHASH_LEN];
8578 char cdhash_string[CS_CDHASH_STRING_SIZE];
8579 proc_getcdhash(p, cdhash);
8580 cdhash_to_string(cdhash_string, cdhash);
8581 printf("ignoring detached code signature on '%s' with cdhash '%s' "
8582 "because it is invalid, or not a simple adhoc signature.\n",
8583 p->p_name, cdhash_string);
8584 }
8585 }
8586 }
8587
8588 done:
8589 if (0 == error) {
8590 /*
8591 * Update the new process's signature-dependent process state.
8592 * state.
8593 */
8594
8595 error = proc_process_signature(p, &signature_failure_reason);
8596 }
8597
8598 if (0 == error) {
8599 /*
8600 * Update the new main thread's signature-dependent thread
8601 * state. This was also called when the thread was created,
8602 * but for the main thread the signature was not yet attached
8603 * at that time.
8604 */
8605 kr = thread_process_signature(imgp->ip_new_thread, proc_get_task_raw(p));
8606
8607 if (kr != KERN_SUCCESS) {
8608 error = EINVAL;
8609 signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_MACHINE_THREAD);
8610 }
8611 }
8612
8613 if (0 == error) {
8614 /* The process's code signature related properties are
8615 * fully set up, so this is an opportune moment to log
8616 * platform binary execution, if desired. */
8617 if (platform_exec_logging != 0 && csproc_get_platform_binary(p)) {
8618 uint8_t cdhash[CS_CDHASH_LEN];
8619 char cdhash_string[CS_CDHASH_STRING_SIZE];
8620 proc_getcdhash(p, cdhash);
8621 cdhash_to_string(cdhash_string, cdhash);
8622
8623 os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary "
8624 "'%s' with cdhash %s\n", p->p_name, cdhash_string);
8625 }
8626 } else {
8627 if (!unexpected_failure) {
8628 proc_csflags_set(p, CS_KILLED);
8629 }
8630 /* make very sure execution fails */
8631 if (vfexec || spawn) {
8632 assert(signature_failure_reason != OS_REASON_NULL);
8633 psignal_vfork_with_reason(p, proc_task(p), imgp->ip_new_thread,
8634 SIGKILL, signature_failure_reason);
8635 signature_failure_reason = OS_REASON_NULL;
8636 error = 0;
8637 } else {
8638 assert(signature_failure_reason != OS_REASON_NULL);
8639 psignal_with_reason(p, SIGKILL, signature_failure_reason);
8640 signature_failure_reason = OS_REASON_NULL;
8641 }
8642 }
8643
8644 if (port != IPC_PORT_NULL) {
8645 ipc_port_release_send(port);
8646 }
8647
8648 /* If we hit this, we likely would have leaked an exit reason */
8649 assert(signature_failure_reason == OS_REASON_NULL);
8650 return error;
8651 }
8652
8653 /*
8654 * Typically as soon as we start executing this process, the
8655 * first instruction will trigger a VM fault to bring the text
8656 * pages (as executable) into the address space, followed soon
8657 * thereafter by dyld data structures (for dynamic executable).
8658 * To optimize this, as well as improve support for hardware
8659 * debuggers that can only access resident pages present
8660 * in the process' page tables, we prefault some pages if
8661 * possible. Errors are non-fatal.
8662 */
8663 #ifndef PREVENT_CALLER_STACK_USE
8664 #define PREVENT_CALLER_STACK_USE __attribute__((noinline))
8665 #endif
8666
8667 /*
8668 * Prefaulting dyld data does not work (rdar://76621401)
8669 */
8670 #define FIXED_76621401 0
8671 static void PREVENT_CALLER_STACK_USE
exec_prefault_data(__unused proc_t p,__unused struct image_params * imgp,__unused load_result_t * load_result)8672 exec_prefault_data(
8673 __unused proc_t p,
8674 __unused struct image_params *imgp,
8675 __unused load_result_t *load_result)
8676 {
8677 #if FIXED_76621401
8678 int ret;
8679 size_t expected_all_image_infos_size;
8680 #endif /* FIXED_76621401 */
8681 kern_return_t kr;
8682
8683 /*
8684 * Prefault executable or dyld entry point.
8685 */
8686 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8687 DEBUG4K_LOAD("entry_point 0x%llx\n", (uint64_t)load_result->entry_point);
8688 }
8689 kr = vm_fault(current_map(),
8690 vm_map_trunc_page(load_result->entry_point,
8691 vm_map_page_mask(current_map())),
8692 VM_PROT_READ | VM_PROT_EXECUTE,
8693 FALSE, VM_KERN_MEMORY_NONE,
8694 THREAD_UNINT, NULL, 0);
8695 if (kr != KERN_SUCCESS) {
8696 DEBUG4K_ERROR("map %p va 0x%llx -> 0x%x\n", current_map(), (uint64_t)vm_map_trunc_page(load_result->entry_point, vm_map_page_mask(current_map())), kr);
8697 }
8698
8699 #if FIXED_76621401
8700 if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
8701 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
8702 } else {
8703 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
8704 }
8705
8706 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */
8707 if (load_result->dynlinker &&
8708 load_result->all_image_info_addr &&
8709 load_result->all_image_info_size >= expected_all_image_infos_size) {
8710 union {
8711 struct user64_dyld_all_image_infos infos64;
8712 struct user32_dyld_all_image_infos infos32;
8713 } all_image_infos;
8714
8715 /*
8716 * Pre-fault to avoid copyin() going through the trap handler
8717 * and recovery path.
8718 */
8719 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8720 DEBUG4K_LOAD("all_image_info_addr 0x%llx\n", load_result->all_image_info_addr);
8721 }
8722 kr = vm_fault(current_map(),
8723 vm_map_trunc_page(load_result->all_image_info_addr,
8724 vm_map_page_mask(current_map())),
8725 VM_PROT_READ | VM_PROT_WRITE,
8726 FALSE, VM_KERN_MEMORY_NONE,
8727 THREAD_UNINT, NULL, 0);
8728 if (kr != KERN_SUCCESS) {
8729 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr, vm_map_page_mask(current_map())), kr);
8730 }
8731 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
8732 /* all_image_infos straddles a page */
8733 kr = vm_fault(current_map(),
8734 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
8735 vm_map_page_mask(current_map())),
8736 VM_PROT_READ | VM_PROT_WRITE,
8737 FALSE, VM_KERN_MEMORY_NONE,
8738 THREAD_UNINT, NULL, 0);
8739 if (kr != KERN_SUCCESS) {
8740 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size -1, vm_map_page_mask(current_map())), kr);
8741 }
8742 }
8743
8744 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8745 DEBUG4K_LOAD("copyin(0x%llx, 0x%lx)\n", load_result->all_image_info_addr, expected_all_image_infos_size);
8746 }
8747 ret = copyin((user_addr_t)load_result->all_image_info_addr,
8748 &all_image_infos,
8749 expected_all_image_infos_size);
8750 if (ret == 0 && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) {
8751 user_addr_t notification_address;
8752 user_addr_t dyld_image_address;
8753 user_addr_t dyld_version_address;
8754 user_addr_t dyld_all_image_infos_address;
8755 user_addr_t dyld_slide_amount;
8756
8757 if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
8758 notification_address = (user_addr_t)all_image_infos.infos64.notification;
8759 dyld_image_address = (user_addr_t)all_image_infos.infos64.dyldImageLoadAddress;
8760 dyld_version_address = (user_addr_t)all_image_infos.infos64.dyldVersion;
8761 dyld_all_image_infos_address = (user_addr_t)all_image_infos.infos64.dyldAllImageInfosAddress;
8762 } else {
8763 notification_address = all_image_infos.infos32.notification;
8764 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
8765 dyld_version_address = all_image_infos.infos32.dyldVersion;
8766 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
8767 }
8768
8769 /*
8770 * dyld statically sets up the all_image_infos in its Mach-O
8771 * binary at static link time, with pointers relative to its default
8772 * load address. Since ASLR might slide dyld before its first
8773 * instruction is executed, "dyld_slide_amount" tells us how far
8774 * dyld was loaded compared to its default expected load address.
8775 * All other pointers into dyld's image should be adjusted by this
8776 * amount. At some point later, dyld will fix up pointers to take
8777 * into account the slide, at which point the all_image_infos_address
8778 * field in the structure will match the runtime load address, and
8779 * "dyld_slide_amount" will be 0, if we were to consult it again.
8780 */
8781
8782 dyld_slide_amount = (user_addr_t)load_result->all_image_info_addr - dyld_all_image_infos_address;
8783
8784 #if 0
8785 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
8786 (uint64_t)load_result->all_image_info_addr,
8787 all_image_infos.infos32.version,
8788 (uint64_t)notification_address,
8789 (uint64_t)dyld_image_address,
8790 (uint64_t)dyld_version_address,
8791 (uint64_t)dyld_all_image_infos_address);
8792 #endif
8793
8794 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8795 DEBUG4K_LOAD("notification_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)notification_address, (uint64_t)dyld_slide_amount);
8796 }
8797 kr = vm_fault(current_map(),
8798 vm_map_trunc_page(notification_address + dyld_slide_amount,
8799 vm_map_page_mask(current_map())),
8800 VM_PROT_READ | VM_PROT_EXECUTE,
8801 FALSE, VM_KERN_MEMORY_NONE,
8802 THREAD_UNINT, NULL, 0);
8803 if (kr != KERN_SUCCESS) {
8804 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(notification_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
8805 }
8806 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8807 DEBUG4K_LOAD("dyld_image_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_image_address, (uint64_t)dyld_slide_amount);
8808 }
8809 kr = vm_fault(current_map(),
8810 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
8811 vm_map_page_mask(current_map())),
8812 VM_PROT_READ | VM_PROT_EXECUTE,
8813 FALSE, VM_KERN_MEMORY_NONE,
8814 THREAD_UNINT, NULL, 0);
8815 if (kr != KERN_SUCCESS) {
8816 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_image_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
8817 }
8818 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8819 DEBUG4K_LOAD("dyld_version_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount);
8820 }
8821 kr = vm_fault(current_map(),
8822 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
8823 vm_map_page_mask(current_map())),
8824 VM_PROT_READ,
8825 FALSE, VM_KERN_MEMORY_NONE,
8826 THREAD_UNINT, NULL, 0);
8827 if (kr != KERN_SUCCESS) {
8828 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_version_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
8829 }
8830 if (vm_map_page_shift(current_map()) < (int)PAGE_SHIFT) {
8831 DEBUG4K_LOAD("dyld_all_image_infos_address 0x%llx dyld_slide_amount 0x%llx\n", (uint64_t)dyld_version_address, (uint64_t)dyld_slide_amount);
8832 }
8833 kr = vm_fault(current_map(),
8834 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
8835 vm_map_page_mask(current_map())),
8836 VM_PROT_READ | VM_PROT_WRITE,
8837 FALSE, VM_KERN_MEMORY_NONE,
8838 THREAD_UNINT, NULL, 0);
8839 if (kr != KERN_SUCCESS) {
8840 // printf("%s:%d map %p va 0x%llx -> 0x%x\n", __FUNCTION__, __LINE__, current_map(), vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, vm_map_page_mask(current_map())), kr);
8841 }
8842 }
8843 }
8844 #endif /* FIXED_76621401 */
8845 }
8846
8847 static int
8848 sysctl_libmalloc_experiments SYSCTL_HANDLER_ARGS
8849 {
8850 #pragma unused(oidp, arg2, req)
8851 int changed;
8852 errno_t error;
8853 uint64_t value = os_atomic_load_wide(&libmalloc_experiment_factors, relaxed);
8854
8855 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
8856 if (error) {
8857 return error;
8858 }
8859
8860 if (changed) {
8861 os_atomic_store_wide(&libmalloc_experiment_factors, value, relaxed);
8862 }
8863
8864 return 0;
8865 }
8866
8867 EXPERIMENT_FACTOR_LEGACY_PROC(_kern, libmalloc_experiments, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_libmalloc_experiments, "A", "");
8868
8869 SYSCTL_NODE(_kern, OID_AUTO, sec_transition,
8870 CTLFLAG_RD | CTLFLAG_LOCKED, 0, "sec_transition");
8871
8872 #if DEBUG || DEVELOPMENT
8873 static int
sysctl_setup_ensure_pidversion_changes_on_exec(__unused int64_t in,int64_t * out)8874 sysctl_setup_ensure_pidversion_changes_on_exec(__unused int64_t in, int64_t *out)
8875 {
8876 // Tweak nextpidversion to try to trigger a reuse (unless the exec code is doing the right thing)
8877 int current_pid_version = proc_get_ro(current_proc())->p_idversion;
8878 nextpidversion = current_pid_version;
8879 *out = 0;
8880 return KERN_SUCCESS;
8881 }
8882
8883 SYSCTL_TEST_REGISTER(setup_ensure_pidversion_changes_on_exec, sysctl_setup_ensure_pidversion_changes_on_exec);
8884 #endif /* DEBUG || DEVELOPMENT */
8885