xref: /xnu-11215.81.4/osfmk/vm/vm_shared_region.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http://www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * Shared region (... and comm page)
26  *
27  * This file handles the VM shared region and comm page.
28  *
29  */
30 /*
31  * SHARED REGIONS
32  * --------------
33  *
34  * A shared region is a submap that contains the most common system shared
35  * libraries for a given environment which is defined by:
36  * - cpu-type
37  * - 64-bitness
38  * - root directory
39  * - Team ID - when we have pointer authentication.
40  *
41  * The point of a shared region is to reduce the setup overhead when exec'ing
42  * a new process. A shared region uses a shared VM submap that gets mapped
43  * automatically at exec() time, see vm_map_exec().  The first process of a given
44  * environment sets up the shared region and all further processes in that
45  * environment can re-use that shared region without having to re-create
46  * the same mappings in their VM map.  All they need is contained in the shared
47  * region.
48  *
49  * The region can also share a pmap (mostly for read-only parts but also for the
50  * initial version of some writable parts), which gets "nested" into the
51  * process's pmap.  This reduces the number of soft faults:  once one process
52  * brings in a page in the shared region, all the other processes can access
53  * it without having to enter it in their own pmap.
54  *
55  * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56  * to map the appropriate shared region in the process's address space.
57  * We look up the appropriate shared region for the process's environment.
58  * If we can't find one, we create a new (empty) one and add it to the list.
59  * Otherwise, we just take an extra reference on the shared region we found.
60  *
61  * The "dyld" runtime, mapped into the process's address space at exec() time,
62  * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63  * system calls to validate and/or populate the shared region with the
64  * appropriate dyld_shared_cache file.
65  *
66  * The shared region is inherited on fork() and the child simply takes an
67  * extra reference on its parent's shared region.
68  *
69  * When the task terminates, we release the reference on its shared region.
70  * When the last reference is released, we destroy the shared region.
71  *
72  * After a chroot(), the calling process keeps using its original shared region,
73  * since that's what was mapped when it was started.  But its children
74  * will use a different shared region, because they need to use the shared
75  * cache that's relative to the new root directory.
76  */
77 
78 /*
79  * COMM PAGE
80  *
81  * A "comm page" is an area of memory that is populated by the kernel with
82  * the appropriate platform-specific version of some commonly used code.
83  * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84  * for the native cpu-type.  No need to overly optimize translated code
85  * for hardware that is not really there !
86  *
87  * The comm pages are created and populated at boot time.
88  *
89  * The appropriate comm page is mapped into a process's address space
90  * at exec() time, in vm_map_exec(). It is then inherited on fork().
91  *
92  * The comm page is shared between the kernel and all applications of
93  * a given platform. Only the kernel can modify it.
94  *
95  * Applications just branch to fixed addresses in the comm page and find
96  * the right version of the code for the platform.  There is also some
97  * data provided and updated by the kernel for processes to retrieve easily
98  * without having to do a system call.
99  */
100 
101 #include <debug.h>
102 
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106 
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109 
110 #include <vm/vm_map_internal.h>
111 #include <vm/vm_memory_entry_xnu.h>
112 #include <vm/vm_shared_region_internal.h>
113 #include <vm/vm_kern_xnu.h>
114 #include <vm/memory_object_internal.h>
115 #include <vm/vm_protos_internal.h>
116 #include <vm/vm_object_internal.h>
117 
118 #include <machine/commpage.h>
119 #include <machine/cpu_capabilities.h>
120 #include <sys/random.h>
121 #include <sys/errno.h>
122 
123 #if defined(__arm64__)
124 #include <arm/cpu_data_internal.h>
125 #include <arm/misc_protos.h>
126 #endif
127 
128 /*
129  * the following codes are used in the  subclass
130  * of the DBG_MACH_SHAREDREGION class
131  */
132 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
133 
134 #if __has_feature(ptrauth_calls)
135 #include <ptrauth.h>
136 #endif /* __has_feature(ptrauth_calls) */
137 
138 /* "dyld" uses this to figure out what the kernel supports */
139 int shared_region_version = 3;
140 
141 /* trace level, output is sent to the system log file */
142 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
143 
144 /* should local (non-chroot) shared regions persist when no task uses them ? */
145 int shared_region_persistence = 0;      /* no by default */
146 
147 
148 /* delay in seconds before reclaiming an unused shared region */
149 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
150 
151 #if DEVELOPMENT || DEBUG
152 #define PANIC_ON_DYLD_ISSUE_DEFAULT 1
153 #else /* DEVELOPMENT || DEBUG */
154 #define PANIC_ON_DYLD_ISSUE_DEFAULT 0
155 #endif /* DEVELOPMENT || DEBUG */
156 TUNABLE_WRITEABLE(int, panic_on_dyld_issue, "panic_on_dyld_issue", PANIC_ON_DYLD_ISSUE_DEFAULT);
157 
158 /*
159  * Cached pointer to the most recently mapped shared region from PID 1, which should
160  * be the most commonly mapped shared region in the system.  There are many processes
161  * which do not use this, for a variety of reasons.
162  *
163  * The main consumer of this is stackshot.
164  */
165 struct vm_shared_region *primary_system_shared_region = NULL;
166 
167 #if XNU_TARGET_OS_OSX
168 /*
169  * Only one cache gets to slide on Desktop, since we can't
170  * tear down slide info properly today and the desktop actually
171  * produces lots of shared caches.
172  */
173 boolean_t shared_region_completed_slide = FALSE;
174 #endif /* XNU_TARGET_OS_OSX */
175 
176 /* this lock protects all the shared region data structures */
177 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
178 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
179 
180 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
181 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
182 #define vm_shared_region_sleep(event, interruptible)                    \
183 	lck_mtx_sleep_with_inheritor(&vm_shared_region_lock,            \
184 	              LCK_SLEEP_DEFAULT,                                \
185 	              (event_t) (event),                                \
186 	              *(event),                                         \
187 	              (interruptible) | THREAD_WAIT_NOREPORT,           \
188 	              TIMEOUT_WAIT_FOREVER)
189 #define vm_shared_region_wakeup(event)                                  \
190 	wakeup_all_with_inheritor((event), THREAD_AWAKENED)
191 
192 /* the list of currently available shared regions (one per environment) */
193 queue_head_t    vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
194 int             vm_shared_region_count = 0;
195 int             vm_shared_region_peak = 0;
196 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
197 
198 /*
199  * the number of times an event has forced the recalculation of the reslide
200  * shared region slide.
201  */
202 #if __has_feature(ptrauth_calls)
203 int                             vm_shared_region_reslide_count = 0;
204 #endif /* __has_feature(ptrauth_calls) */
205 
206 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
207 static vm_shared_region_t vm_shared_region_create(
208 	void          *root_dir,
209 	cpu_type_t    cputype,
210 	cpu_subtype_t cpu_subtype,
211 	boolean_t     is_64bit,
212 	int           target_page_shift,
213 	boolean_t     reslide,
214 	boolean_t     is_driverkit,
215 	uint32_t      rsr_version);
216 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
217 
218 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
219 static void vm_shared_region_timeout(thread_call_param_t param0,
220     thread_call_param_t param1);
221 static kern_return_t vm_shared_region_slide_mapping(
222 	vm_shared_region_t sr,
223 	user_addr_t        slide_info_addr,
224 	mach_vm_size_t     slide_info_size,
225 	mach_vm_offset_t   start,
226 	mach_vm_size_t     size,
227 	mach_vm_offset_t   slid_mapping,
228 	uint32_t           slide,
229 	memory_object_control_t,
230 	vm_prot_t          prot); /* forward */
231 
232 static int __commpage_setup = 0;
233 #if XNU_TARGET_OS_OSX
234 static int __system_power_source = 1;   /* init to extrnal power source */
235 static void post_sys_powersource_internal(int i, int internal);
236 #endif /* XNU_TARGET_OS_OSX */
237 
238 extern u_int32_t random(void);
239 
240 /*
241  * Retrieve a task's shared region and grab an extra reference to
242  * make sure it doesn't disappear while the caller is using it.
243  * The caller is responsible for consuming that extra reference if
244  * necessary.
245  */
246 vm_shared_region_t
vm_shared_region_get(task_t task)247 vm_shared_region_get(
248 	task_t          task)
249 {
250 	vm_shared_region_t      shared_region;
251 
252 	SHARED_REGION_TRACE_DEBUG(
253 		("shared_region: -> get(%p)\n",
254 		(void *)VM_KERNEL_ADDRPERM(task)));
255 
256 	task_lock(task);
257 	vm_shared_region_lock();
258 	shared_region = task->shared_region;
259 	if (shared_region) {
260 		assert(shared_region->sr_ref_count > 0);
261 		vm_shared_region_reference_locked(shared_region);
262 	}
263 	vm_shared_region_unlock();
264 	task_unlock(task);
265 
266 	SHARED_REGION_TRACE_DEBUG(
267 		("shared_region: get(%p) <- %p\n",
268 		(void *)VM_KERNEL_ADDRPERM(task),
269 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
270 
271 	return shared_region;
272 }
273 
274 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)275 vm_shared_region_vm_map(
276 	vm_shared_region_t      shared_region)
277 {
278 	ipc_port_t              sr_handle;
279 	vm_named_entry_t        sr_mem_entry;
280 	vm_map_t                sr_map;
281 
282 	SHARED_REGION_TRACE_DEBUG(
283 		("shared_region: -> vm_map(%p)\n",
284 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
285 	assert(shared_region->sr_ref_count > 0);
286 
287 	sr_handle = shared_region->sr_mem_entry;
288 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
289 	sr_map = sr_mem_entry->backing.map;
290 	assert(sr_mem_entry->is_sub_map);
291 
292 	SHARED_REGION_TRACE_DEBUG(
293 		("shared_region: vm_map(%p) <- %p\n",
294 		(void *)VM_KERNEL_ADDRPERM(shared_region),
295 		(void *)VM_KERNEL_ADDRPERM(sr_map)));
296 	return sr_map;
297 }
298 
299 /*
300  * Set the shared region the process should use.
301  * A NULL new shared region means that we just want to release the old
302  * shared region.
303  * The caller should already have an extra reference on the new shared region
304  * (if any).  We release a reference on the old shared region (if any).
305  */
306 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)307 vm_shared_region_set(
308 	task_t                  task,
309 	vm_shared_region_t      new_shared_region)
310 {
311 	vm_shared_region_t      old_shared_region;
312 
313 	SHARED_REGION_TRACE_DEBUG(
314 		("shared_region: -> set(%p, %p)\n",
315 		(void *)VM_KERNEL_ADDRPERM(task),
316 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
317 
318 	task_lock(task);
319 	vm_shared_region_lock();
320 
321 	old_shared_region = task->shared_region;
322 	if (new_shared_region) {
323 		assert(new_shared_region->sr_ref_count > 0);
324 	}
325 
326 	task->shared_region = new_shared_region;
327 
328 	vm_shared_region_unlock();
329 	task_unlock(task);
330 
331 	if (old_shared_region) {
332 		assert(old_shared_region->sr_ref_count > 0);
333 		vm_shared_region_deallocate(old_shared_region);
334 	}
335 
336 	SHARED_REGION_TRACE_DEBUG(
337 		("shared_region: set(%p) <- old=%p new=%p\n",
338 		(void *)VM_KERNEL_ADDRPERM(task),
339 		(void *)VM_KERNEL_ADDRPERM(old_shared_region),
340 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
341 }
342 
343 /*
344  * New arm64 shared regions match with an existing arm64e region.
345  * They just get a private non-authenticating pager.
346  */
347 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)348 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
349 {
350 	if (exist == new) {
351 		return true;
352 	}
353 	if (cputype == CPU_TYPE_ARM64 &&
354 	    exist == CPU_SUBTYPE_ARM64E &&
355 	    new == CPU_SUBTYPE_ARM64_ALL) {
356 		return true;
357 	}
358 	return false;
359 }
360 
361 
362 /*
363  * Lookup up the shared region for the desired environment.
364  * If none is found, create a new (empty) one.
365  * Grab an extra reference on the returned shared region, to make sure
366  * it doesn't get destroyed before the caller is done with it.  The caller
367  * is responsible for consuming that extra reference if necessary.
368  */
369 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)370 vm_shared_region_lookup(
371 	void            *root_dir,
372 	cpu_type_t      cputype,
373 	cpu_subtype_t   cpu_subtype,
374 	boolean_t       is_64bit,
375 	int             target_page_shift,
376 	boolean_t       reslide,
377 	boolean_t       is_driverkit,
378 	uint32_t        rsr_version)
379 {
380 	vm_shared_region_t      shared_region;
381 	vm_shared_region_t      new_shared_region;
382 
383 	SHARED_REGION_TRACE_DEBUG(
384 		("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
385 		(void *)VM_KERNEL_ADDRPERM(root_dir),
386 		cputype, cpu_subtype, is_64bit, target_page_shift,
387 		reslide, is_driverkit));
388 
389 	shared_region = NULL;
390 	new_shared_region = NULL;
391 
392 	vm_shared_region_lock();
393 	for (;;) {
394 		queue_iterate(&vm_shared_region_queue,
395 		    shared_region,
396 		    vm_shared_region_t,
397 		    sr_q) {
398 			assert(shared_region->sr_ref_count > 0);
399 			if (shared_region->sr_cpu_type == cputype &&
400 			    match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
401 			    shared_region->sr_root_dir == root_dir &&
402 			    shared_region->sr_64bit == is_64bit &&
403 #if __ARM_MIXED_PAGE_SIZE__
404 			    shared_region->sr_page_shift == target_page_shift &&
405 #endif /* __ARM_MIXED_PAGE_SIZE__ */
406 #if __has_feature(ptrauth_calls)
407 			    shared_region->sr_reslide == reslide &&
408 #endif /* __has_feature(ptrauth_calls) */
409 			    shared_region->sr_driverkit == is_driverkit &&
410 			    shared_region->sr_rsr_version == rsr_version &&
411 			    !shared_region->sr_stale) {
412 				/* found a match ! */
413 				vm_shared_region_reference_locked(shared_region);
414 				goto done;
415 			}
416 		}
417 		if (new_shared_region == NULL) {
418 			/* no match: create a new one */
419 			vm_shared_region_unlock();
420 			new_shared_region = vm_shared_region_create(root_dir,
421 			    cputype,
422 			    cpu_subtype,
423 			    is_64bit,
424 			    target_page_shift,
425 			    reslide,
426 			    is_driverkit,
427 			    rsr_version);
428 			/* do the lookup again, in case we lost a race */
429 			vm_shared_region_lock();
430 			continue;
431 		}
432 		/* still no match: use our new one */
433 		shared_region = new_shared_region;
434 		new_shared_region = NULL;
435 		uint32_t newid = ++vm_shared_region_lastid;
436 		if (newid == 0) {
437 			panic("shared_region: vm_shared_region_lastid wrapped");
438 		}
439 		shared_region->sr_id = newid;
440 		shared_region->sr_install_time = mach_absolute_time();
441 		queue_enter(&vm_shared_region_queue,
442 		    shared_region,
443 		    vm_shared_region_t,
444 		    sr_q);
445 		vm_shared_region_count++;
446 		if (vm_shared_region_count > vm_shared_region_peak) {
447 			vm_shared_region_peak = vm_shared_region_count;
448 		}
449 		break;
450 	}
451 
452 done:
453 	vm_shared_region_unlock();
454 
455 	if (new_shared_region) {
456 		/*
457 		 * We lost a race with someone else to create a new shared
458 		 * region for that environment. Get rid of our unused one.
459 		 */
460 		assert(new_shared_region->sr_ref_count == 1);
461 		new_shared_region->sr_ref_count--;
462 		vm_shared_region_destroy(new_shared_region);
463 		new_shared_region = NULL;
464 	}
465 
466 	SHARED_REGION_TRACE_DEBUG(
467 		("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
468 		(void *)VM_KERNEL_ADDRPERM(root_dir),
469 		cputype, cpu_subtype, is_64bit, target_page_shift,
470 		reslide, is_driverkit,
471 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
472 
473 	assert(shared_region->sr_ref_count > 0);
474 	return shared_region;
475 }
476 
477 /*
478  * Take an extra reference on a shared region.
479  * The vm_shared_region_lock should already be held by the caller.
480  */
481 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)482 vm_shared_region_reference_locked(
483 	vm_shared_region_t      shared_region)
484 {
485 	LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
486 
487 	SHARED_REGION_TRACE_DEBUG(
488 		("shared_region: -> reference_locked(%p)\n",
489 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
490 	assert(shared_region->sr_ref_count > 0);
491 	shared_region->sr_ref_count++;
492 	assert(shared_region->sr_ref_count != 0);
493 
494 	if (shared_region->sr_timer_call != NULL) {
495 		boolean_t cancelled;
496 
497 		/* cancel and free any pending timeout */
498 		cancelled = thread_call_cancel(shared_region->sr_timer_call);
499 		if (cancelled) {
500 			thread_call_free(shared_region->sr_timer_call);
501 			shared_region->sr_timer_call = NULL;
502 			/* release the reference held by the cancelled timer */
503 			shared_region->sr_ref_count--;
504 		} else {
505 			/* the timer will drop the reference and free itself */
506 		}
507 	}
508 
509 	SHARED_REGION_TRACE_DEBUG(
510 		("shared_region: reference_locked(%p) <- %d\n",
511 		(void *)VM_KERNEL_ADDRPERM(shared_region),
512 		shared_region->sr_ref_count));
513 }
514 
515 /*
516  * Take a reference on a shared region.
517  */
518 void
vm_shared_region_reference(vm_shared_region_t shared_region)519 vm_shared_region_reference(vm_shared_region_t shared_region)
520 {
521 	SHARED_REGION_TRACE_DEBUG(
522 		("shared_region: -> reference(%p)\n",
523 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
524 
525 	vm_shared_region_lock();
526 	vm_shared_region_reference_locked(shared_region);
527 	vm_shared_region_unlock();
528 
529 	SHARED_REGION_TRACE_DEBUG(
530 		("shared_region: reference(%p) <- %d\n",
531 		(void *)VM_KERNEL_ADDRPERM(shared_region),
532 		shared_region->sr_ref_count));
533 }
534 
535 /*
536  * Release a reference on the shared region.
537  * Destroy it if there are no references left.
538  */
539 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)540 vm_shared_region_deallocate(
541 	vm_shared_region_t      shared_region)
542 {
543 	SHARED_REGION_TRACE_DEBUG(
544 		("shared_region: -> deallocate(%p)\n",
545 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
546 
547 	vm_shared_region_lock();
548 
549 	assert(shared_region->sr_ref_count > 0);
550 
551 	if (shared_region->sr_root_dir == NULL) {
552 		/*
553 		 * Local (i.e. based on the boot volume) shared regions
554 		 * can persist or not based on the "shared_region_persistence"
555 		 * sysctl.
556 		 * Make sure that this one complies.
557 		 *
558 		 * See comments in vm_shared_region_slide() for notes about
559 		 * shared regions we have slid (which are not torn down currently).
560 		 */
561 		if (shared_region_persistence &&
562 		    !shared_region->sr_persists) {
563 			/* make this one persistent */
564 			shared_region->sr_ref_count++;
565 			shared_region->sr_persists = TRUE;
566 		} else if (!shared_region_persistence &&
567 		    shared_region->sr_persists) {
568 			/* make this one no longer persistent */
569 			assert(shared_region->sr_ref_count > 1);
570 			shared_region->sr_ref_count--;
571 			shared_region->sr_persists = FALSE;
572 		}
573 	}
574 
575 	assert(shared_region->sr_ref_count > 0);
576 	shared_region->sr_ref_count--;
577 	SHARED_REGION_TRACE_DEBUG(
578 		("shared_region: deallocate(%p): ref now %d\n",
579 		(void *)VM_KERNEL_ADDRPERM(shared_region),
580 		shared_region->sr_ref_count));
581 
582 	if (shared_region->sr_ref_count == 0) {
583 		uint64_t deadline;
584 
585 		/*
586 		 * Even though a shared region is unused, delay a while before
587 		 * tearing it down, in case a new app launch can use it.
588 		 * We don't keep around stale shared regions, nor older RSR ones.
589 		 */
590 		if (shared_region->sr_timer_call == NULL &&
591 		    shared_region_destroy_delay != 0 &&
592 		    !shared_region->sr_stale &&
593 		    !(shared_region->sr_rsr_version != 0 &&
594 		    shared_region->sr_rsr_version != rsr_get_version())) {
595 			/* hold one reference for the timer */
596 			assert(!shared_region->sr_mapping_in_progress);
597 			shared_region->sr_ref_count++;
598 
599 			/* set up the timer */
600 			shared_region->sr_timer_call = thread_call_allocate(
601 				(thread_call_func_t) vm_shared_region_timeout,
602 				(thread_call_param_t) shared_region);
603 
604 			/* schedule the timer */
605 			clock_interval_to_deadline(shared_region_destroy_delay,
606 			    NSEC_PER_SEC,
607 			    &deadline);
608 			thread_call_enter_delayed(shared_region->sr_timer_call,
609 			    deadline);
610 
611 			SHARED_REGION_TRACE_DEBUG(
612 				("shared_region: deallocate(%p): armed timer\n",
613 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
614 
615 			vm_shared_region_unlock();
616 		} else {
617 			/* timer expired: let go of this shared region */
618 
619 			/* Make sure there's no cached pointer to the region. */
620 			if (primary_system_shared_region == shared_region) {
621 				primary_system_shared_region = NULL;
622 			}
623 
624 			/*
625 			 * Remove it from the queue first, so no one can find
626 			 * it...
627 			 */
628 			queue_remove(&vm_shared_region_queue,
629 			    shared_region,
630 			    vm_shared_region_t,
631 			    sr_q);
632 			vm_shared_region_count--;
633 			vm_shared_region_unlock();
634 
635 			/* ... and destroy it */
636 			vm_shared_region_destroy(shared_region);
637 			shared_region = NULL;
638 		}
639 	} else {
640 		vm_shared_region_unlock();
641 	}
642 
643 	SHARED_REGION_TRACE_DEBUG(
644 		("shared_region: deallocate(%p) <-\n",
645 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
646 }
647 
648 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)649 vm_shared_region_timeout(
650 	thread_call_param_t     param0,
651 	__unused thread_call_param_t    param1)
652 {
653 	vm_shared_region_t      shared_region;
654 
655 	shared_region = (vm_shared_region_t) param0;
656 
657 	vm_shared_region_deallocate(shared_region);
658 }
659 
660 
661 /*
662  * Create a new (empty) shared region for a new environment.
663  */
664 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)665 vm_shared_region_create(
666 	void                    *root_dir,
667 	cpu_type_t              cputype,
668 	cpu_subtype_t           cpu_subtype,
669 	boolean_t               is_64bit,
670 	int                     target_page_shift,
671 #if !__has_feature(ptrauth_calls)
672 	__unused
673 #endif /* __has_feature(ptrauth_calls) */
674 	boolean_t               reslide,
675 	boolean_t               is_driverkit,
676 	uint32_t                rsr_version)
677 {
678 	vm_named_entry_t        mem_entry;
679 	ipc_port_t              mem_entry_port;
680 	vm_shared_region_t      shared_region;
681 	vm_map_t                sub_map;
682 	mach_vm_offset_t        base_address, pmap_nesting_start;
683 	mach_vm_size_t          size, pmap_nesting_size;
684 
685 	SHARED_REGION_TRACE_INFO(
686 		("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
687 		(void *)VM_KERNEL_ADDRPERM(root_dir),
688 		cputype, cpu_subtype, is_64bit, target_page_shift,
689 		reslide, is_driverkit));
690 
691 	base_address = 0;
692 	size = 0;
693 	mem_entry = NULL;
694 	mem_entry_port = IPC_PORT_NULL;
695 	sub_map = VM_MAP_NULL;
696 
697 	/* create a new shared region structure... */
698 	shared_region = kalloc_type(struct vm_shared_region,
699 	    Z_WAITOK | Z_NOFAIL);
700 
701 	/* figure out the correct settings for the desired environment */
702 	if (is_64bit) {
703 		switch (cputype) {
704 #if defined(__arm64__)
705 		case CPU_TYPE_ARM64:
706 			base_address = SHARED_REGION_BASE_ARM64;
707 			size = SHARED_REGION_SIZE_ARM64;
708 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
709 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
710 			break;
711 #else
712 		case CPU_TYPE_I386:
713 			base_address = SHARED_REGION_BASE_X86_64;
714 			size = SHARED_REGION_SIZE_X86_64;
715 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
716 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
717 			break;
718 		case CPU_TYPE_POWERPC:
719 			base_address = SHARED_REGION_BASE_PPC64;
720 			size = SHARED_REGION_SIZE_PPC64;
721 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
722 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
723 			break;
724 #endif
725 		default:
726 			SHARED_REGION_TRACE_ERROR(
727 				("shared_region: create: unknown cpu type %d\n",
728 				cputype));
729 			kfree_type(struct vm_shared_region, shared_region);
730 			shared_region = NULL;
731 			goto done;
732 		}
733 	} else {
734 		switch (cputype) {
735 #if defined(__arm64__)
736 		case CPU_TYPE_ARM:
737 			base_address = SHARED_REGION_BASE_ARM;
738 			size = SHARED_REGION_SIZE_ARM;
739 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
740 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
741 			break;
742 #else
743 		case CPU_TYPE_I386:
744 			base_address = SHARED_REGION_BASE_I386;
745 			size = SHARED_REGION_SIZE_I386;
746 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
747 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
748 			break;
749 		case CPU_TYPE_POWERPC:
750 			base_address = SHARED_REGION_BASE_PPC;
751 			size = SHARED_REGION_SIZE_PPC;
752 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
753 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
754 			break;
755 #endif
756 		default:
757 			SHARED_REGION_TRACE_ERROR(
758 				("shared_region: create: unknown cpu type %d\n",
759 				cputype));
760 			kfree_type(struct vm_shared_region, shared_region);
761 			shared_region = NULL;
762 			goto done;
763 		}
764 	}
765 
766 	/* create a memory entry structure and a Mach port handle */
767 	mem_entry = mach_memory_entry_allocate(&mem_entry_port);
768 
769 #if defined(__arm64__)
770 	{
771 		struct pmap *pmap_nested;
772 		int pmap_flags = 0;
773 		pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
774 
775 
776 #if __ARM_MIXED_PAGE_SIZE__
777 		if (cputype == CPU_TYPE_ARM64 &&
778 		    target_page_shift == FOURK_PAGE_SHIFT) {
779 			/* arm64/4k address space */
780 			pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
781 		}
782 #endif /* __ARM_MIXED_PAGE_SIZE__ */
783 
784 		pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
785 		if (pmap_nested != PMAP_NULL) {
786 			pmap_set_nested(pmap_nested);
787 			sub_map = vm_map_create_options(pmap_nested, 0,
788 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
789 
790 			if (is_64bit ||
791 			    page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
792 				/* enforce 16KB alignment of VM map entries */
793 				vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
794 			}
795 #if __ARM_MIXED_PAGE_SIZE__
796 			if (cputype == CPU_TYPE_ARM64 &&
797 			    target_page_shift == FOURK_PAGE_SHIFT) {
798 				/* arm64/4k address space */
799 				vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
800 			}
801 #endif /* __ARM_MIXED_PAGE_SIZE__ */
802 		} else {
803 			sub_map = VM_MAP_NULL;
804 		}
805 	}
806 #else /* defined(__arm64__) */
807 	{
808 		/* create a VM sub map and its pmap */
809 		pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
810 		if (pmap != NULL) {
811 			sub_map = vm_map_create_options(pmap, 0,
812 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
813 		} else {
814 			sub_map = VM_MAP_NULL;
815 		}
816 	}
817 #endif /* defined(__arm64__) */
818 	if (sub_map == VM_MAP_NULL) {
819 		ipc_port_release_send(mem_entry_port);
820 		kfree_type(struct vm_shared_region, shared_region);
821 		shared_region = NULL;
822 		SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
823 		goto done;
824 	}
825 
826 	/* shared regions should always enforce code-signing */
827 	vm_map_cs_enforcement_set(sub_map, true);
828 	assert(vm_map_cs_enforcement(sub_map));
829 	assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
830 
831 	assert(!sub_map->disable_vmentry_reuse);
832 	sub_map->is_nested_map = TRUE;
833 
834 	/* make the memory entry point to the VM sub map */
835 	mem_entry->is_sub_map = TRUE;
836 	mem_entry->backing.map = sub_map;
837 	mem_entry->size = size;
838 	mem_entry->protection = VM_PROT_ALL;
839 
840 	/* make the shared region point at the memory entry */
841 	shared_region->sr_mem_entry = mem_entry_port;
842 
843 	/* fill in the shared region's environment and settings */
844 	shared_region->sr_base_address = base_address;
845 	shared_region->sr_size = size;
846 	shared_region->sr_pmap_nesting_start = pmap_nesting_start;
847 	shared_region->sr_pmap_nesting_size = pmap_nesting_size;
848 	shared_region->sr_cpu_type = cputype;
849 	shared_region->sr_cpu_subtype = cpu_subtype;
850 	shared_region->sr_64bit = (uint8_t)is_64bit;
851 #if __ARM_MIXED_PAGE_SIZE__
852 	shared_region->sr_page_shift = (uint8_t)target_page_shift;
853 #endif /* __ARM_MIXED_PAGE_SIZE__ */
854 	shared_region->sr_driverkit = (uint8_t)is_driverkit;
855 	shared_region->sr_rsr_version = rsr_version;
856 	shared_region->sr_root_dir = root_dir;
857 
858 	queue_init(&shared_region->sr_q);
859 	shared_region->sr_mapping_in_progress = THREAD_NULL;
860 	shared_region->sr_slide_in_progress = THREAD_NULL;
861 	shared_region->sr_persists = FALSE;
862 	shared_region->sr_stale = FALSE;
863 	shared_region->sr_timer_call = NULL;
864 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
865 
866 	/* grab a reference for the caller */
867 	shared_region->sr_ref_count = 1;
868 
869 	shared_region->sr_slide = 0; /* not slid yet */
870 
871 	/* Initialize UUID and other metadata */
872 	memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
873 	shared_region->sr_uuid_copied = FALSE;
874 	shared_region->sr_images_count = 0;
875 	shared_region->sr_images = NULL;
876 #if __has_feature(ptrauth_calls)
877 	shared_region->sr_reslide = reslide;
878 	shared_region->sr_num_auth_section = 0;
879 	shared_region->sr_next_auth_section = 0;
880 	shared_region->sr_auth_section = NULL;
881 #endif /* __has_feature(ptrauth_calls) */
882 
883 done:
884 	if (shared_region) {
885 		SHARED_REGION_TRACE_INFO(
886 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
887 			"base=0x%llx,size=0x%llx) <- "
888 			"%p mem=(%p,%p) map=%p pmap=%p\n",
889 			(void *)VM_KERNEL_ADDRPERM(root_dir),
890 			cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
891 			(long long)base_address,
892 			(long long)size,
893 			(void *)VM_KERNEL_ADDRPERM(shared_region),
894 			(void *)VM_KERNEL_ADDRPERM(mem_entry_port),
895 			(void *)VM_KERNEL_ADDRPERM(mem_entry),
896 			(void *)VM_KERNEL_ADDRPERM(sub_map),
897 			(void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
898 	} else {
899 		SHARED_REGION_TRACE_INFO(
900 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
901 			"base=0x%llx,size=0x%llx) <- NULL",
902 			(void *)VM_KERNEL_ADDRPERM(root_dir),
903 			cputype, cpu_subtype, is_64bit, is_driverkit,
904 			(long long)base_address,
905 			(long long)size));
906 	}
907 	return shared_region;
908 }
909 
910 /*
911  * Destroy a now-unused shared region.
912  * The shared region is no longer in the queue and can not be looked up.
913  */
914 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)915 vm_shared_region_destroy(
916 	vm_shared_region_t      shared_region)
917 {
918 	vm_named_entry_t        mem_entry;
919 	vm_map_t                map;
920 
921 	SHARED_REGION_TRACE_INFO(
922 		("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
923 		(void *)VM_KERNEL_ADDRPERM(shared_region),
924 		(void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
925 		shared_region->sr_cpu_type,
926 		shared_region->sr_cpu_subtype,
927 		shared_region->sr_64bit,
928 		shared_region->sr_driverkit));
929 
930 	assert(shared_region->sr_ref_count == 0);
931 	assert(!shared_region->sr_persists);
932 
933 	mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
934 	assert(mem_entry->is_sub_map);
935 	assert(!mem_entry->internal);
936 	assert(!mem_entry->is_copy);
937 	map = mem_entry->backing.map;
938 
939 	/*
940 	 * Clean up the pmap first.  The virtual addresses that were
941 	 * entered in this possibly "nested" pmap may have different values
942 	 * than the VM map's min and max offsets, if the VM sub map was
943 	 * mapped at a non-zero offset in the processes' main VM maps, which
944 	 * is usually the case, so the clean-up we do in vm_map_destroy() would
945 	 * not be enough.
946 	 */
947 	if (map->pmap) {
948 		pmap_remove(map->pmap,
949 		    (vm_map_offset_t)shared_region->sr_base_address,
950 		    (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
951 	}
952 
953 	/*
954 	 * Release our (one and only) handle on the memory entry.
955 	 * This will generate a no-senders notification, which will be processed
956 	 * by ipc_kobject_notify_no_senders(), which will release the one and only
957 	 * reference on the memory entry and cause it to be destroyed, along
958 	 * with the VM sub map and its pmap.
959 	 */
960 	mach_memory_entry_port_release(shared_region->sr_mem_entry);
961 	mem_entry = NULL;
962 	shared_region->sr_mem_entry = IPC_PORT_NULL;
963 
964 	if (shared_region->sr_timer_call) {
965 		thread_call_free(shared_region->sr_timer_call);
966 	}
967 
968 #if __has_feature(ptrauth_calls)
969 	/*
970 	 * Free the cached copies of slide_info for the AUTH regions.
971 	 */
972 	for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
973 		vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
974 		if (si != NULL) {
975 			vm_object_deallocate(si->si_slide_object);
976 			kfree_data(si->si_slide_info_entry,
977 			    si->si_slide_info_size);
978 			kfree_type(struct vm_shared_region_slide_info, si);
979 			shared_region->sr_auth_section[i] = NULL;
980 		}
981 	}
982 	if (shared_region->sr_auth_section != NULL) {
983 		assert(shared_region->sr_num_auth_section > 0);
984 		kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
985 		shared_region->sr_auth_section = NULL;
986 		shared_region->sr_num_auth_section = 0;
987 	}
988 #endif /* __has_feature(ptrauth_calls) */
989 
990 	/* release the shared region structure... */
991 	kfree_type(struct vm_shared_region, shared_region);
992 
993 	SHARED_REGION_TRACE_DEBUG(
994 		("shared_region: destroy(%p) <-\n",
995 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
996 	shared_region = NULL;
997 }
998 
999 /*
1000  * Gets the address of the first (in time) mapping in the shared region.
1001  * If used during initial task setup by dyld, task should non-NULL.
1002  */
1003 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)1004 vm_shared_region_start_address(
1005 	vm_shared_region_t      shared_region,
1006 	mach_vm_offset_t        *start_address,
1007 	task_t                  task)
1008 {
1009 	kern_return_t           kr;
1010 	mach_vm_offset_t        sr_base_address;
1011 	mach_vm_offset_t        sr_first_mapping;
1012 
1013 	SHARED_REGION_TRACE_DEBUG(
1014 		("shared_region: -> start_address(%p)\n",
1015 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1016 
1017 	vm_shared_region_lock();
1018 
1019 	/*
1020 	 * Wait if there's another thread establishing a mapping
1021 	 * in this shared region right when we're looking at it.
1022 	 * We want a consistent view of the map...
1023 	 */
1024 	while (shared_region->sr_mapping_in_progress) {
1025 		/* wait for our turn... */
1026 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1027 		    THREAD_UNINT);
1028 	}
1029 	assert(!shared_region->sr_mapping_in_progress);
1030 	assert(shared_region->sr_ref_count > 0);
1031 
1032 	sr_base_address = shared_region->sr_base_address;
1033 	sr_first_mapping = shared_region->sr_first_mapping;
1034 
1035 	if (sr_first_mapping == (mach_vm_offset_t) -1) {
1036 		/* shared region is empty */
1037 		kr = KERN_INVALID_ADDRESS;
1038 	} else {
1039 		kr = KERN_SUCCESS;
1040 		*start_address = sr_base_address + sr_first_mapping;
1041 	}
1042 
1043 
1044 	uint32_t slide = shared_region->sr_slide;
1045 
1046 	vm_shared_region_unlock();
1047 
1048 	/*
1049 	 * Cache shared region info in the task for telemetry gathering, if we're
1050 	 * passed in the task. No task lock here as we're still in intial task set up.
1051 	 */
1052 	if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1053 		uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1054 		if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1055 		    (char *)&task->task_shared_region_uuid,
1056 		    sizeof(task->task_shared_region_uuid)) == 0) {
1057 			task->task_shared_region_slide = slide;
1058 		}
1059 	}
1060 
1061 	SHARED_REGION_TRACE_DEBUG(
1062 		("shared_region: start_address(%p) <- 0x%llx\n",
1063 		(void *)VM_KERNEL_ADDRPERM(shared_region),
1064 		(long long)shared_region->sr_base_address));
1065 
1066 	return kr;
1067 }
1068 
1069 /*
1070  * Look up a pre-existing mapping in shared region, for replacement.
1071  * Takes an extra object reference if found.
1072  */
1073 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1074 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1075 {
1076 	vm_map_entry_t found;
1077 
1078 	/* find the shared region's map entry to slide */
1079 	vm_map_lock_read(map);
1080 	if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1081 		/* no mapping there */
1082 		vm_map_unlock(map);
1083 		return KERN_INVALID_ARGUMENT;
1084 	}
1085 
1086 	*entry = *found;
1087 	/* extra ref to keep object alive while map is unlocked */
1088 	vm_object_reference(VME_OBJECT(found));
1089 	vm_map_unlock_read(map);
1090 	return KERN_SUCCESS;
1091 }
1092 
1093 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1094 shared_region_make_permanent(
1095 	vm_shared_region_t sr,
1096 	vm_prot_t max_prot)
1097 {
1098 	if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1099 		return false;
1100 	}
1101 	if (max_prot & VM_PROT_WRITE) {
1102 		/*
1103 		 * Potentially writable mapping: no major issue with allowing
1104 		 * it to be replaced since its contents could be modified
1105 		 * anyway.
1106 		 */
1107 		return false;
1108 	}
1109 	if (max_prot & VM_PROT_EXECUTE) {
1110 		/*
1111 		 * Potentially executable mapping: some software might want
1112 		 * to try and replace it to interpose their own code when a
1113 		 * given routine is called or returns, for example.
1114 		 * So let's not make it "permanent".
1115 		 */
1116 		return false;
1117 	}
1118 	/*
1119 	 * Make this mapping "permanent" to prevent it from being deleted
1120 	 * and/or replaced with another mapping.
1121 	 */
1122 	return true;
1123 }
1124 
1125 static bool
shared_region_tpro_protect(vm_shared_region_t sr,vm_prot_t max_prot __unused)1126 shared_region_tpro_protect(
1127 	vm_shared_region_t sr,
1128 	vm_prot_t max_prot __unused)
1129 {
1130 	if (sr->sr_cpu_type != CPU_TYPE_ARM64) {
1131 		return false;
1132 	}
1133 
1134 
1135 	/*
1136 	 * Unless otherwise explicitly requested all other mappings do not get
1137 	 * TPRO protection.
1138 	 */
1139 	return false;
1140 }
1141 
1142 #if __has_feature(ptrauth_calls)
1143 
1144 /*
1145  * Determine if this task is actually using pointer signing.
1146  */
1147 static boolean_t
task_sign_pointers(task_t task)1148 task_sign_pointers(task_t task)
1149 {
1150 	if (task->map &&
1151 	    task->map->pmap &&
1152 	    !task->map->pmap->disable_jop) {
1153 		return TRUE;
1154 	}
1155 	return FALSE;
1156 }
1157 
1158 /*
1159  * If the shared region contains mappings that are authenticated, then
1160  * remap them into the task private map.
1161  *
1162  * Failures are possible in this routine when jetsam kills a process
1163  * just as dyld is trying to set it up. The vm_map and task shared region
1164  * info get torn down w/o waiting for this thread to finish up.
1165  */
1166 __attribute__((noinline))
1167 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1168 vm_shared_region_auth_remap(vm_shared_region_t sr)
1169 {
1170 	memory_object_t               sr_pager = MEMORY_OBJECT_NULL;
1171 	task_t                        task = current_task();
1172 	vm_shared_region_slide_info_t si;
1173 	uint_t                        i;
1174 	vm_object_t                   object;
1175 	vm_map_t                      sr_map;
1176 	struct vm_map_entry           tmp_entry_store = {0};
1177 	vm_map_entry_t                tmp_entry = NULL;
1178 	vm_map_kernel_flags_t         vmk_flags;
1179 	vm_map_offset_t               map_addr;
1180 	kern_return_t                 kr = KERN_SUCCESS;
1181 	boolean_t                     use_ptr_auth = task_sign_pointers(task);
1182 
1183 	/*
1184 	 * Don't do this more than once and avoid any race conditions in finishing it.
1185 	 */
1186 	vm_shared_region_lock();
1187 	while (sr->sr_mapping_in_progress) {
1188 		/* wait for our turn... */
1189 		vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1190 	}
1191 	assert(!sr->sr_mapping_in_progress);
1192 	assert(sr->sr_ref_count > 0);
1193 
1194 	/* Just return if already done. */
1195 	if (task->shared_region_auth_remapped) {
1196 		vm_shared_region_unlock();
1197 		return KERN_SUCCESS;
1198 	}
1199 
1200 	/* let others know to wait while we're working in this shared region */
1201 	sr->sr_mapping_in_progress = current_thread();
1202 	vm_shared_region_unlock();
1203 
1204 	/*
1205 	 * Remap any sections with pointer authentications into the private map.
1206 	 */
1207 	for (i = 0; i < sr->sr_num_auth_section; ++i) {
1208 		si = sr->sr_auth_section[i];
1209 		assert(si != NULL);
1210 		assert(si->si_ptrauth);
1211 
1212 		/*
1213 		 * We have mapping that needs to be private.
1214 		 * Look for an existing slid mapping's pager with matching
1215 		 * object, offset, slide info and shared_region_id to reuse.
1216 		 */
1217 		object = si->si_slide_object;
1218 		sr_pager = shared_region_pager_match(object, si->si_start, si,
1219 		    use_ptr_auth ? task->jop_pid : 0);
1220 		if (sr_pager == MEMORY_OBJECT_NULL) {
1221 			printf("%s(): shared_region_pager_match() failed\n", __func__);
1222 			kr = KERN_FAILURE;
1223 			goto done;
1224 		}
1225 
1226 		/*
1227 		 * verify matching jop_pid for this task and this pager
1228 		 */
1229 		if (use_ptr_auth) {
1230 			shared_region_pager_match_task_key(sr_pager, task);
1231 		}
1232 
1233 		sr_map = vm_shared_region_vm_map(sr);
1234 		tmp_entry = NULL;
1235 
1236 		kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1237 		if (kr != KERN_SUCCESS) {
1238 			printf("%s(): find_mapping_to_slide() failed\n", __func__);
1239 			goto done;
1240 		}
1241 		tmp_entry = &tmp_entry_store;
1242 
1243 		/*
1244 		 * Check that the object exactly covers the region to slide.
1245 		 */
1246 		if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1247 			printf("%s(): doesn't fully cover\n", __func__);
1248 			kr = KERN_FAILURE;
1249 			goto done;
1250 		}
1251 
1252 		/*
1253 		 * map the pager over the portion of the mapping that needs sliding
1254 		 */
1255 		vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1256 		vmk_flags.vmkf_overwrite_immutable = true;
1257 		vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1258 		    tmp_entry->max_protection);
1259 
1260 		/* Preserve the TPRO flag if task has TPRO enabled */
1261 		vmk_flags.vmf_tpro = (vm_map_tpro(task->map) &&
1262 		    tmp_entry->used_for_tpro &&
1263 		    task_is_hardened_binary(task));
1264 
1265 		map_addr = si->si_slid_address;
1266 		kr = mach_vm_map_kernel(task->map,
1267 		    vm_sanitize_wrap_addr_ref(&map_addr),
1268 		    si->si_end - si->si_start,
1269 		    0,
1270 		    vmk_flags,
1271 		    (ipc_port_t)(uintptr_t) sr_pager,
1272 		    0,
1273 		    TRUE,
1274 		    tmp_entry->protection,
1275 		    tmp_entry->max_protection,
1276 		    tmp_entry->inheritance);
1277 		memory_object_deallocate(sr_pager);
1278 		sr_pager = MEMORY_OBJECT_NULL;
1279 		if (kr != KERN_SUCCESS) {
1280 			printf("%s(): mach_vm_map_kernel() failed\n", __func__);
1281 			goto done;
1282 		}
1283 		assertf(map_addr == si->si_slid_address,
1284 		    "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1285 		    (uint64_t)map_addr,
1286 		    (uint64_t)si->si_slid_address,
1287 		    tmp_entry);
1288 
1289 		/* Drop the ref count grabbed by find_mapping_to_slide */
1290 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1291 		tmp_entry = NULL;
1292 	}
1293 
1294 done:
1295 	if (tmp_entry) {
1296 		/* Drop the ref count grabbed by find_mapping_to_slide */
1297 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1298 		tmp_entry = NULL;
1299 	}
1300 
1301 	/*
1302 	 * Drop any extra reference to the pager in case we're quitting due to an error above.
1303 	 */
1304 	if (sr_pager != MEMORY_OBJECT_NULL) {
1305 		memory_object_deallocate(sr_pager);
1306 	}
1307 
1308 	/*
1309 	 * Mark the region as having it's auth sections remapped.
1310 	 */
1311 	vm_shared_region_lock();
1312 	task->shared_region_auth_remapped = TRUE;
1313 	assert(sr->sr_mapping_in_progress == current_thread());
1314 	sr->sr_mapping_in_progress = THREAD_NULL;
1315 	vm_shared_region_wakeup((event_t)&sr->sr_mapping_in_progress);
1316 	vm_shared_region_unlock();
1317 	return kr;
1318 }
1319 #endif /* __has_feature(ptrauth_calls) */
1320 
1321 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1322 vm_shared_region_undo_mappings(
1323 	vm_map_t                 sr_map,
1324 	mach_vm_offset_t         sr_base_address,
1325 	struct _sr_file_mappings *srf_mappings,
1326 	struct _sr_file_mappings *srf_mappings_current,
1327 	unsigned int             srf_current_mappings_count)
1328 {
1329 	unsigned int             j = 0;
1330 	vm_shared_region_t       shared_region = NULL;
1331 	boolean_t                reset_shared_region_state = FALSE;
1332 	struct _sr_file_mappings *srfmp;
1333 	unsigned int             mappings_count;
1334 	struct shared_file_mapping_slide_np *mappings;
1335 
1336 	shared_region = vm_shared_region_get(current_task());
1337 	if (shared_region == NULL) {
1338 		printf("Failed to undo mappings because of NULL shared region.\n");
1339 		return;
1340 	}
1341 
1342 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1343 
1344 	if (sr_map == NULL) {
1345 		ipc_port_t              sr_handle;
1346 		vm_named_entry_t        sr_mem_entry;
1347 
1348 		vm_shared_region_lock();
1349 		assert(shared_region->sr_ref_count > 0);
1350 
1351 		while (shared_region->sr_mapping_in_progress) {
1352 			/* wait for our turn... */
1353 			vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1354 			    THREAD_UNINT);
1355 		}
1356 		assert(!shared_region->sr_mapping_in_progress);
1357 		assert(shared_region->sr_ref_count > 0);
1358 		/* let others know we're working in this shared region */
1359 		shared_region->sr_mapping_in_progress = current_thread();
1360 
1361 		vm_shared_region_unlock();
1362 
1363 		reset_shared_region_state = TRUE;
1364 
1365 		/* no need to lock because this data is never modified... */
1366 		sr_handle = shared_region->sr_mem_entry;
1367 		sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1368 		sr_map = sr_mem_entry->backing.map;
1369 		sr_base_address = shared_region->sr_base_address;
1370 	}
1371 	/*
1372 	 * Undo the mappings we've established so far.
1373 	 */
1374 	for (srfmp = &srf_mappings[0];
1375 	    srfmp <= srf_mappings_current;
1376 	    srfmp++) {
1377 		mappings = srfmp->mappings;
1378 		mappings_count = srfmp->mappings_count;
1379 		if (srfmp == srf_mappings_current) {
1380 			mappings_count = srf_current_mappings_count;
1381 		}
1382 
1383 		for (j = 0; j < mappings_count; j++) {
1384 			kern_return_t kr2;
1385 			mach_vm_offset_t start, end;
1386 
1387 			if (mappings[j].sms_size == 0) {
1388 				/*
1389 				 * We didn't establish this
1390 				 * mapping, so nothing to undo.
1391 				 */
1392 				continue;
1393 			}
1394 			SHARED_REGION_TRACE_INFO(
1395 				("shared_region: mapping[%d]: "
1396 				"address:0x%016llx "
1397 				"size:0x%016llx "
1398 				"offset:0x%016llx "
1399 				"maxprot:0x%x prot:0x%x: "
1400 				"undoing...\n",
1401 				j,
1402 				(long long)mappings[j].sms_address,
1403 				(long long)mappings[j].sms_size,
1404 				(long long)mappings[j].sms_file_offset,
1405 				mappings[j].sms_max_prot,
1406 				mappings[j].sms_init_prot));
1407 			start = (mappings[j].sms_address - sr_base_address);
1408 			end = start + mappings[j].sms_size;
1409 			start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1410 			end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1411 			kr2 = vm_map_remove_guard(sr_map,
1412 			    start,
1413 			    end,
1414 			    VM_MAP_REMOVE_IMMUTABLE,
1415 			    KMEM_GUARD_NONE).kmr_return;
1416 			assert(kr2 == KERN_SUCCESS);
1417 		}
1418 	}
1419 
1420 	if (reset_shared_region_state) {
1421 		vm_shared_region_lock();
1422 		assert(shared_region->sr_ref_count > 0);
1423 		assert(shared_region->sr_mapping_in_progress == current_thread());
1424 		/* we're done working on that shared region */
1425 		shared_region->sr_mapping_in_progress = THREAD_NULL;
1426 		vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1427 		vm_shared_region_unlock();
1428 		reset_shared_region_state = FALSE;
1429 	}
1430 
1431 	vm_shared_region_deallocate(shared_region);
1432 }
1433 
1434 /*
1435  * First part of vm_shared_region_map_file(). Split out to
1436  * avoid kernel stack overflow.
1437  */
1438 __attribute__((noinline))
1439 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1440 vm_shared_region_map_file_setup(
1441 	vm_shared_region_t              shared_region,
1442 	int                             sr_file_mappings_count,
1443 	struct _sr_file_mappings        *sr_file_mappings,
1444 	unsigned int                    *mappings_to_slide_cnt,
1445 	struct shared_file_mapping_slide_np **mappings_to_slide,
1446 	mach_vm_offset_t                *slid_mappings,
1447 	memory_object_control_t         *slid_file_controls,
1448 	mach_vm_offset_t                *sfm_min_address,
1449 	mach_vm_offset_t                *sfm_max_address,
1450 	vm_map_t                        *sr_map_ptr,
1451 	vm_map_offset_t                 *lowest_unnestable_addr_ptr,
1452 	unsigned int                    vmsr_num_slides)
1453 {
1454 	kern_return_t           kr = KERN_SUCCESS;
1455 	memory_object_control_t file_control;
1456 	vm_object_t             file_object;
1457 	ipc_port_t              sr_handle;
1458 	vm_named_entry_t        sr_mem_entry;
1459 	vm_map_t                sr_map;
1460 	mach_vm_offset_t        sr_base_address;
1461 	unsigned int            i = 0;
1462 	mach_port_t             map_port;
1463 	vm_map_offset_t         target_address;
1464 	vm_object_t             object;
1465 	vm_object_size_t        obj_size;
1466 	vm_map_offset_t         lowest_unnestable_addr = 0;
1467 	vm_map_kernel_flags_t   vmk_flags;
1468 	mach_vm_offset_t        sfm_end;
1469 	uint32_t                mappings_count;
1470 	struct shared_file_mapping_slide_np *mappings;
1471 	struct _sr_file_mappings *srfmp;
1472 
1473 	vm_shared_region_lock();
1474 	assert(shared_region->sr_ref_count > 0);
1475 
1476 	/*
1477 	 * Make sure we handle only one mapping at a time in a given
1478 	 * shared region, to avoid race conditions.  This should not
1479 	 * happen frequently...
1480 	 */
1481 	while (shared_region->sr_mapping_in_progress) {
1482 		/* wait for our turn... */
1483 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1484 		    THREAD_UNINT);
1485 	}
1486 	assert(!shared_region->sr_mapping_in_progress);
1487 	assert(shared_region->sr_ref_count > 0);
1488 
1489 
1490 	/* let others know we're working in this shared region */
1491 	shared_region->sr_mapping_in_progress = current_thread();
1492 
1493 	/*
1494 	 * Did someone race in and map this shared region already?
1495 	 */
1496 	if (shared_region->sr_first_mapping != -1) {
1497 		vm_shared_region_unlock();
1498 #if DEVELOPMENT || DEBUG
1499 		printf("shared_region: caught race in map and slide\n");
1500 #endif /* DEVELOPMENT || DEBUG */
1501 		return KERN_FAILURE;
1502 	}
1503 
1504 	vm_shared_region_unlock();
1505 
1506 	/* no need to lock because this data is never modified... */
1507 	sr_handle = shared_region->sr_mem_entry;
1508 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1509 	sr_map = sr_mem_entry->backing.map;
1510 	sr_base_address = shared_region->sr_base_address;
1511 
1512 	SHARED_REGION_TRACE_DEBUG(
1513 		("shared_region: -> map(%p)\n",
1514 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1515 
1516 	mappings_count = 0;
1517 	mappings = NULL;
1518 	srfmp = NULL;
1519 
1520 	/* process all the files to be mapped */
1521 	for (srfmp = &sr_file_mappings[0];
1522 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1523 	    srfmp++) {
1524 		mappings_count = srfmp->mappings_count;
1525 		mappings = srfmp->mappings;
1526 		file_control = srfmp->file_control;
1527 
1528 		if (mappings_count == 0) {
1529 			/* no mappings here... */
1530 			continue;
1531 		}
1532 
1533 		/*
1534 		 * The code below can only correctly "slide" (perform relocations) for one
1535 		 * value of the slide amount. So if a file has a non-zero slide, it has to
1536 		 * match any previous value. A zero slide value is ok for things that are
1537 		 * just directly mapped.
1538 		 */
1539 		if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1540 			shared_region->sr_slide = srfmp->slide;
1541 		} else if (shared_region->sr_slide != 0 &&
1542 		    srfmp->slide != 0 &&
1543 		    shared_region->sr_slide != srfmp->slide) {
1544 			SHARED_REGION_TRACE_ERROR(
1545 				("shared_region: more than 1 non-zero slide value amount "
1546 				"slide 1:0x%x slide 2:0x%x\n ",
1547 				shared_region->sr_slide, srfmp->slide));
1548 			kr = KERN_INVALID_ARGUMENT;
1549 			break;
1550 		}
1551 
1552 		/*
1553 		 * An FD of -1 means we need to copyin the data to an anonymous object.
1554 		 */
1555 		if (srfmp->fd == -1) {
1556 			assert(mappings_count == 1);
1557 			SHARED_REGION_TRACE_INFO(
1558 				("shared_region: mapping[0]: "
1559 				"address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1560 				"maxprot:0x%x prot:0x%x fd==-1\n",
1561 				(long long)mappings[0].sms_address,
1562 				(long long)mappings[0].sms_size,
1563 				(long long)mappings[0].sms_file_offset,
1564 				mappings[0].sms_max_prot,
1565 				mappings[0].sms_init_prot));
1566 
1567 			/*
1568 			 * We need an anon object to hold the data in the shared region.
1569 			 * The size needs to be suitable to map into kernel.
1570 			 */
1571 			obj_size = vm_object_round_page(mappings->sms_size);
1572 			object = vm_object_allocate(obj_size);
1573 			if (object == VM_OBJECT_NULL) {
1574 				printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1575 				kr = KERN_RESOURCE_SHORTAGE;
1576 				break;
1577 			}
1578 
1579 			/*
1580 			 * map the object into the kernel
1581 			 */
1582 			vm_map_offset_t kaddr = 0;
1583 			vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1584 			vmk_flags.vmkf_no_copy_on_read = 1;
1585 			vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1586 
1587 			kr = vm_map_enter(kernel_map,
1588 			    &kaddr,
1589 			    obj_size,
1590 			    0,
1591 			    vmk_flags,
1592 			    object,
1593 			    0,
1594 			    FALSE,
1595 			    (VM_PROT_READ | VM_PROT_WRITE),
1596 			    (VM_PROT_READ | VM_PROT_WRITE),
1597 			    VM_INHERIT_NONE);
1598 			if (kr != KERN_SUCCESS) {
1599 				printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1600 				vm_object_deallocate(object);
1601 				object = VM_OBJECT_NULL;
1602 				break;
1603 			}
1604 
1605 			/*
1606 			 * We'll need another reference to keep the object alive after
1607 			 * we vm_map_remove() it from the kernel.
1608 			 */
1609 			vm_object_reference(object);
1610 
1611 			/*
1612 			 * Zero out the object's pages, so we can't leak data.
1613 			 */
1614 			bzero((void *)kaddr, obj_size);
1615 
1616 			/*
1617 			 * Copyin the data from dyld to the new object.
1618 			 * Then remove the kernel mapping.
1619 			 */
1620 			int copyin_err =
1621 			    copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1622 			vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1623 			if (copyin_err) {
1624 				printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1625 				switch (copyin_err) {
1626 				case EPERM:
1627 				case EACCES:
1628 					kr = KERN_PROTECTION_FAILURE;
1629 					break;
1630 				case EFAULT:
1631 					kr = KERN_INVALID_ADDRESS;
1632 					break;
1633 				default:
1634 					kr = KERN_FAILURE;
1635 					break;
1636 				}
1637 				vm_object_deallocate(object);
1638 				object = VM_OBJECT_NULL;
1639 				break;
1640 			}
1641 
1642 			/*
1643 			 * Finally map the object into the shared region.
1644 			 */
1645 			target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1646 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1647 			vmk_flags.vmkf_already = TRUE;
1648 			vmk_flags.vmkf_no_copy_on_read = 1;
1649 			vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1650 			    mappings[0].sms_max_prot);
1651 
1652 			kr = vm_map_enter(
1653 				sr_map,
1654 				&target_address,
1655 				vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1656 				0,
1657 				vmk_flags,
1658 				object,
1659 				0,
1660 				TRUE,
1661 				mappings[0].sms_init_prot & VM_PROT_ALL,
1662 				mappings[0].sms_max_prot & VM_PROT_ALL,
1663 				VM_INHERIT_DEFAULT);
1664 			if (kr != KERN_SUCCESS) {
1665 				printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1666 				vm_object_deallocate(object);
1667 				break;
1668 			}
1669 
1670 			if (mappings[0].sms_address < *sfm_min_address) {
1671 				*sfm_min_address = mappings[0].sms_address;
1672 			}
1673 
1674 			if (os_add_overflow(mappings[0].sms_address,
1675 			    mappings[0].sms_size,
1676 			    &sfm_end) ||
1677 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1678 			    mappings[0].sms_address)) {
1679 				/* overflow */
1680 				kr = KERN_INVALID_ARGUMENT;
1681 				break;
1682 			}
1683 
1684 			if (sfm_end > *sfm_max_address) {
1685 				*sfm_max_address = sfm_end;
1686 			}
1687 
1688 			continue;
1689 		}
1690 
1691 		/* get the VM object associated with the file to be mapped */
1692 		file_object = memory_object_control_to_vm_object(file_control);
1693 		assert(file_object);
1694 
1695 		if (!file_object->object_is_shared_cache) {
1696 			vm_object_lock(file_object);
1697 			file_object->object_is_shared_cache = true;
1698 			vm_object_unlock(file_object);
1699 		}
1700 
1701 #if CONFIG_SECLUDED_MEMORY
1702 		/*
1703 		 * Camera will need the shared cache, so don't put the pages
1704 		 * on the secluded queue, assume that's the primary region.
1705 		 * Also keep DEXT shared cache pages off secluded.
1706 		 */
1707 		if (primary_system_shared_region == NULL ||
1708 		    primary_system_shared_region == shared_region ||
1709 		    shared_region->sr_driverkit) {
1710 			memory_object_mark_eligible_for_secluded(file_control, FALSE);
1711 		}
1712 #endif /* CONFIG_SECLUDED_MEMORY */
1713 
1714 		/* establish the mappings for that file */
1715 		for (i = 0; i < mappings_count; i++) {
1716 			SHARED_REGION_TRACE_INFO(
1717 				("shared_region: mapping[%d]: "
1718 				"address:0x%016llx size:0x%016llx offset:0x%016llx "
1719 				"maxprot:0x%x prot:0x%x\n",
1720 				i,
1721 				(long long)mappings[i].sms_address,
1722 				(long long)mappings[i].sms_size,
1723 				(long long)mappings[i].sms_file_offset,
1724 				mappings[i].sms_max_prot,
1725 				mappings[i].sms_init_prot));
1726 
1727 			if (mappings[i].sms_address < *sfm_min_address) {
1728 				*sfm_min_address = mappings[i].sms_address;
1729 			}
1730 
1731 			if (os_add_overflow(mappings[i].sms_address,
1732 			    mappings[i].sms_size,
1733 			    &sfm_end) ||
1734 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1735 			    mappings[i].sms_address)) {
1736 				/* overflow */
1737 				kr = KERN_INVALID_ARGUMENT;
1738 				break;
1739 			}
1740 
1741 			if (sfm_end > *sfm_max_address) {
1742 				*sfm_max_address = sfm_end;
1743 			}
1744 
1745 			if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1746 				/* zero-filled memory */
1747 				map_port = MACH_PORT_NULL;
1748 			} else {
1749 				/* file-backed memory */
1750 				__IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1751 			}
1752 
1753 			/*
1754 			 * Remember which mappings need sliding.
1755 			 */
1756 			if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1757 				if (*mappings_to_slide_cnt == vmsr_num_slides) {
1758 					SHARED_REGION_TRACE_INFO(
1759 						("shared_region: mapping[%d]: "
1760 						"address:0x%016llx size:0x%016llx "
1761 						"offset:0x%016llx "
1762 						"maxprot:0x%x prot:0x%x "
1763 						"too many mappings to slide...\n",
1764 						i,
1765 						(long long)mappings[i].sms_address,
1766 						(long long)mappings[i].sms_size,
1767 						(long long)mappings[i].sms_file_offset,
1768 						mappings[i].sms_max_prot,
1769 						mappings[i].sms_init_prot));
1770 				} else {
1771 					mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1772 					*mappings_to_slide_cnt += 1;
1773 				}
1774 			}
1775 
1776 			/* mapping's address is relative to the shared region base */
1777 			if (__improbable(
1778 				    os_sub_overflow(
1779 					    mappings[i].sms_address,
1780 					    sr_base_address,
1781 					    &target_address))) {
1782 				kr = KERN_INVALID_ARGUMENT;
1783 				break;
1784 			}
1785 
1786 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1787 			vmk_flags.vmkf_already = TRUE;
1788 			/* no copy-on-read for mapped binaries */
1789 			vmk_flags.vmkf_no_copy_on_read = 1;
1790 			vmk_flags.vmf_permanent = shared_region_make_permanent(
1791 				shared_region,
1792 				mappings[i].sms_max_prot);
1793 			vmk_flags.vmf_tpro = shared_region_tpro_protect(
1794 				shared_region,
1795 				mappings[i].sms_max_prot);
1796 
1797 			/* establish that mapping, OK if it's "already" there */
1798 			if (map_port == MACH_PORT_NULL) {
1799 				/*
1800 				 * We want to map some anonymous memory in a shared region.
1801 				 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1802 				 */
1803 				obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1804 				object = vm_object_allocate(obj_size);
1805 				if (object == VM_OBJECT_NULL) {
1806 					kr = KERN_RESOURCE_SHORTAGE;
1807 				} else {
1808 					kr = vm_map_enter(
1809 						sr_map,
1810 						&target_address,
1811 						vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1812 						0,
1813 						vmk_flags,
1814 						object,
1815 						0,
1816 						TRUE,
1817 						mappings[i].sms_init_prot & VM_PROT_ALL,
1818 						mappings[i].sms_max_prot & VM_PROT_ALL,
1819 						VM_INHERIT_DEFAULT);
1820 				}
1821 			} else {
1822 				object = VM_OBJECT_NULL; /* no anonymous memory here */
1823 				kr = mach_vm_map_kernel(
1824 					sr_map,
1825 					vm_sanitize_wrap_addr_ref(&target_address),
1826 					vm_map_round_page(
1827 						mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1828 					0,
1829 					vmk_flags,
1830 					map_port,
1831 					mappings[i].sms_file_offset,
1832 					TRUE,
1833 					mappings[i].sms_init_prot & VM_PROT_ALL,
1834 					mappings[i].sms_max_prot & VM_PROT_ALL,
1835 					VM_INHERIT_DEFAULT);
1836 			}
1837 
1838 			if (kr == KERN_SUCCESS) {
1839 				/*
1840 				 * Record the first successful mapping(s) in the shared
1841 				 * region by file. We're protected by "sr_mapping_in_progress"
1842 				 * here, so no need to lock "shared_region".
1843 				 *
1844 				 * Note that if we have an AOT shared cache (ARM) for a
1845 				 * translated task, then it's always the first file.
1846 				 * The original "native" (i.e. x86) shared cache is the
1847 				 * second file.
1848 				 */
1849 
1850 				if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1851 					shared_region->sr_first_mapping = target_address;
1852 				}
1853 
1854 				if (*mappings_to_slide_cnt > 0 &&
1855 				    mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1856 					slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1857 					slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1858 				}
1859 
1860 				/*
1861 				 * Record the lowest writable address in this
1862 				 * sub map, to log any unexpected unnesting below
1863 				 * that address (see log_unnest_badness()).
1864 				 */
1865 				if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1866 				    sr_map->is_nested_map &&
1867 				    (lowest_unnestable_addr == 0 ||
1868 				    (target_address < lowest_unnestable_addr))) {
1869 					lowest_unnestable_addr = target_address;
1870 				}
1871 			} else {
1872 				if (map_port == MACH_PORT_NULL) {
1873 					/*
1874 					 * Get rid of the VM object we just created
1875 					 * but failed to map.
1876 					 */
1877 					vm_object_deallocate(object);
1878 					object = VM_OBJECT_NULL;
1879 				}
1880 				if (kr == KERN_MEMORY_PRESENT) {
1881 					/*
1882 					 * This exact mapping was already there:
1883 					 * that's fine.
1884 					 */
1885 					SHARED_REGION_TRACE_INFO(
1886 						("shared_region: mapping[%d]: "
1887 						"address:0x%016llx size:0x%016llx "
1888 						"offset:0x%016llx "
1889 						"maxprot:0x%x prot:0x%x "
1890 						"already mapped...\n",
1891 						i,
1892 						(long long)mappings[i].sms_address,
1893 						(long long)mappings[i].sms_size,
1894 						(long long)mappings[i].sms_file_offset,
1895 						mappings[i].sms_max_prot,
1896 						mappings[i].sms_init_prot));
1897 					/*
1898 					 * We didn't establish this mapping ourselves;
1899 					 * let's reset its size, so that we do not
1900 					 * attempt to undo it if an error occurs later.
1901 					 */
1902 					mappings[i].sms_size = 0;
1903 					kr = KERN_SUCCESS;
1904 				} else {
1905 					break;
1906 				}
1907 			}
1908 		}
1909 
1910 		if (kr != KERN_SUCCESS) {
1911 			break;
1912 		}
1913 	}
1914 
1915 	if (kr != KERN_SUCCESS) {
1916 		/* the last mapping we tried (mappings[i]) failed ! */
1917 		assert(i < mappings_count);
1918 		SHARED_REGION_TRACE_ERROR(
1919 			("shared_region: mapping[%d]: "
1920 			"address:0x%016llx size:0x%016llx "
1921 			"offset:0x%016llx "
1922 			"maxprot:0x%x prot:0x%x failed 0x%x\n",
1923 			i,
1924 			(long long)mappings[i].sms_address,
1925 			(long long)mappings[i].sms_size,
1926 			(long long)mappings[i].sms_file_offset,
1927 			mappings[i].sms_max_prot,
1928 			mappings[i].sms_init_prot,
1929 			kr));
1930 
1931 		/*
1932 		 * Respect the design of vm_shared_region_undo_mappings
1933 		 * as we are holding the sr_mapping_in_progress here.
1934 		 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1935 		 * will be blocked at waiting sr_mapping_in_progress to be NULL.
1936 		 */
1937 		assert(sr_map != NULL);
1938 		/* undo all the previous mappings */
1939 		vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1940 		return kr;
1941 	}
1942 
1943 	*lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1944 	*sr_map_ptr = sr_map;
1945 	return KERN_SUCCESS;
1946 }
1947 
1948 /* forwared declaration */
1949 __attribute__((noinline))
1950 static void
1951 vm_shared_region_map_file_final(
1952 	vm_shared_region_t shared_region,
1953 	vm_map_t           sr_map,
1954 	mach_vm_offset_t   sfm_min_address,
1955 	mach_vm_offset_t   sfm_max_address);
1956 
1957 /*
1958  * Establish some mappings of a file in the shared region.
1959  * This is used by "dyld" via the shared_region_map_np() system call
1960  * to populate the shared region with the appropriate shared cache.
1961  *
1962  * One could also call it several times to incrementally load several
1963  * libraries, as long as they do not overlap.
1964  * It will return KERN_SUCCESS if the mappings were successfully established
1965  * or if they were already established identically by another process.
1966  */
1967 __attribute__((noinline))
1968 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1969 vm_shared_region_map_file(
1970 	vm_shared_region_t       shared_region,
1971 	int                      sr_file_mappings_count,
1972 	struct _sr_file_mappings *sr_file_mappings)
1973 {
1974 	kern_return_t           kr = KERN_SUCCESS;
1975 	unsigned int            i;
1976 	unsigned int            mappings_to_slide_cnt = 0;
1977 	mach_vm_offset_t        sfm_min_address = (mach_vm_offset_t)-1;
1978 	mach_vm_offset_t        sfm_max_address = 0;
1979 	vm_map_t                sr_map = NULL;
1980 	vm_map_offset_t         lowest_unnestable_addr = 0;
1981 	unsigned int            vmsr_num_slides = 0;
1982 	typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1983 	slid_mappings_t         *slid_mappings = NULL;                  /* [0..vmsr_num_slides] */
1984 	memory_object_control_t *slid_file_controls = NULL;             /* [0..vmsr_num_slides] */
1985 	struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1986 	struct _sr_file_mappings *srfmp;
1987 
1988 	/*
1989 	 * Figure out how many of the mappings have slides.
1990 	 */
1991 	for (srfmp = &sr_file_mappings[0];
1992 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1993 	    srfmp++) {
1994 		for (i = 0; i < srfmp->mappings_count; ++i) {
1995 			if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1996 				++vmsr_num_slides;
1997 			}
1998 		}
1999 	}
2000 
2001 	/* Allocate per slide data structures */
2002 	if (vmsr_num_slides > 0) {
2003 		slid_mappings =
2004 		    kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
2005 		slid_file_controls =
2006 		    kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
2007 		mappings_to_slide =
2008 		    kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
2009 	}
2010 
2011 	kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
2012 	    &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
2013 	    &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
2014 	if (kr != KERN_SUCCESS) {
2015 		vm_shared_region_lock();
2016 		goto done;
2017 	}
2018 	assert(vmsr_num_slides == mappings_to_slide_cnt);
2019 
2020 	/*
2021 	 * The call above installed direct mappings to the shared cache file.
2022 	 * Now we go back and overwrite the mappings that need relocation
2023 	 * with a special shared region pager.
2024 	 *
2025 	 * Note that this does copyin() of data, needed by the pager, which
2026 	 * the previous code just established mappings for. This is why we
2027 	 * do it in a separate pass.
2028 	 */
2029 #if __has_feature(ptrauth_calls)
2030 	/*
2031 	 * need to allocate storage needed for any sr_auth_sections
2032 	 */
2033 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2034 		if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2035 		    shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2036 		    !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2037 			++shared_region->sr_num_auth_section;
2038 		}
2039 	}
2040 	if (shared_region->sr_num_auth_section > 0) {
2041 		shared_region->sr_auth_section =
2042 		    kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2043 		    Z_WAITOK | Z_ZERO);
2044 	}
2045 #endif /* __has_feature(ptrauth_calls) */
2046 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2047 		kr = vm_shared_region_slide(shared_region->sr_slide,
2048 		    mappings_to_slide[i]->sms_file_offset,
2049 		    mappings_to_slide[i]->sms_size,
2050 		    mappings_to_slide[i]->sms_slide_start,
2051 		    mappings_to_slide[i]->sms_slide_size,
2052 		    slid_mappings[i],
2053 		    slid_file_controls[i],
2054 		    mappings_to_slide[i]->sms_max_prot);
2055 		if (kr != KERN_SUCCESS) {
2056 			SHARED_REGION_TRACE_ERROR(
2057 				("shared_region: region_slide("
2058 				"slide:0x%x start:0x%016llx "
2059 				"size:0x%016llx) failed 0x%x\n",
2060 				shared_region->sr_slide,
2061 				(long long)mappings_to_slide[i]->sms_slide_start,
2062 				(long long)mappings_to_slide[i]->sms_slide_size,
2063 				kr));
2064 			vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2065 			    &sr_file_mappings[0],
2066 			    &sr_file_mappings[sr_file_mappings_count - 1],
2067 			    sr_file_mappings_count);
2068 			vm_shared_region_lock();
2069 			goto done;
2070 		}
2071 	}
2072 
2073 	assert(kr == KERN_SUCCESS);
2074 
2075 	/* adjust the map's "lowest_unnestable_start" */
2076 	lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2077 	if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2078 		vm_map_lock(sr_map);
2079 		sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2080 		vm_map_unlock(sr_map);
2081 	}
2082 
2083 	vm_shared_region_lock();
2084 	assert(shared_region->sr_ref_count > 0);
2085 	assert(shared_region->sr_mapping_in_progress == current_thread());
2086 
2087 	vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2088 
2089 done:
2090 	/*
2091 	 * We're done working on that shared region.
2092 	 * Wake up any waiting threads.
2093 	 */
2094 	assert(shared_region->sr_mapping_in_progress == current_thread());
2095 	shared_region->sr_mapping_in_progress = THREAD_NULL;
2096 	vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2097 	vm_shared_region_unlock();
2098 
2099 #if __has_feature(ptrauth_calls)
2100 	if (kr == KERN_SUCCESS) {
2101 		/*
2102 		 * Since authenticated mappings were just added to the shared region,
2103 		 * go back and remap them into private mappings for this task.
2104 		 */
2105 		kr = vm_shared_region_auth_remap(shared_region);
2106 	}
2107 #endif /* __has_feature(ptrauth_calls) */
2108 
2109 	/* Cache shared region info needed for telemetry in the task */
2110 	task_t task;
2111 	if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2112 		mach_vm_offset_t start_address;
2113 		(void)vm_shared_region_start_address(shared_region, &start_address, task);
2114 	}
2115 
2116 	SHARED_REGION_TRACE_DEBUG(
2117 		("shared_region: map(%p) <- 0x%x \n",
2118 		(void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2119 	if (vmsr_num_slides > 0) {
2120 		kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2121 		kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2122 		kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2123 		    mappings_to_slide);
2124 	}
2125 	return kr;
2126 }
2127 
2128 /*
2129  * Final part of vm_shared_region_map_file().
2130  * Kept in separate function to avoid blowing out the stack.
2131  */
2132 __attribute__((noinline))
2133 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2134 vm_shared_region_map_file_final(
2135 	vm_shared_region_t        shared_region,
2136 	vm_map_t                  sr_map __unused,
2137 	mach_vm_offset_t          sfm_min_address __unused,
2138 	mach_vm_offset_t          sfm_max_address __unused)
2139 {
2140 	struct _dyld_cache_header sr_cache_header;
2141 	int                       error;
2142 	size_t                    image_array_length;
2143 	struct _dyld_cache_image_text_info *sr_image_layout;
2144 	boolean_t                 locally_built = FALSE;
2145 
2146 
2147 	/*
2148 	 * copy in the shared region UUID to the shared region structure.
2149 	 * we do this indirectly by first copying in the shared cache header
2150 	 * and then copying the UUID from there because we'll need to look
2151 	 * at other content from the shared cache header.
2152 	 */
2153 	if (!shared_region->sr_uuid_copied) {
2154 		error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2155 		    (char *)&sr_cache_header,
2156 		    sizeof(sr_cache_header));
2157 		if (error == 0) {
2158 			memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2159 			shared_region->sr_uuid_copied = TRUE;
2160 			locally_built = sr_cache_header.locallyBuiltCache;
2161 		} else {
2162 #if DEVELOPMENT || DEBUG
2163 			panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2164 			    "offset:0 size:0x%016llx) failed with %d\n",
2165 			    (long long)shared_region->sr_base_address,
2166 			    (long long)shared_region->sr_first_mapping,
2167 			    (long long)sizeof(sr_cache_header),
2168 			    error);
2169 #endif /* DEVELOPMENT || DEBUG */
2170 			shared_region->sr_uuid_copied = FALSE;
2171 		}
2172 	}
2173 
2174 	/*
2175 	 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd.  This is used by
2176 	 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2177 	 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2178 	 * region.  In that case, launchd re-exec's itself, so we may go through this path multiple times.  We
2179 	 * let the most recent one win.
2180 	 *
2181 	 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2182 	 */
2183 	bool is_init_task = (task_pid(current_task()) == 1);
2184 	if (shared_region->sr_uuid_copied && is_init_task) {
2185 		/* Copy in the shared cache layout if we're running with a locally built shared cache */
2186 		if (locally_built) {
2187 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2188 			image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2189 			sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2190 			error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2191 			    sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2192 			if (error == 0) {
2193 				if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2194 					panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2195 				}
2196 				shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2197 				for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2198 					memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2199 					    sizeof(shared_region->sr_images[index].imageUUID));
2200 					shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2201 				}
2202 
2203 				shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2204 			} else {
2205 #if DEVELOPMENT || DEBUG
2206 				panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2207 				    "offset:0x%016llx size:0x%016llx) failed with %d\n",
2208 				    (long long)shared_region->sr_base_address,
2209 				    (long long)shared_region->sr_first_mapping,
2210 				    (long long)sr_cache_header.imagesTextOffset,
2211 				    (long long)image_array_length,
2212 				    error);
2213 #endif /* DEVELOPMENT || DEBUG */
2214 			}
2215 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2216 			kfree_data(sr_image_layout, image_array_length);
2217 			sr_image_layout = NULL;
2218 		}
2219 		primary_system_shared_region = shared_region;
2220 	}
2221 
2222 #ifndef NO_NESTED_PMAP
2223 	/*
2224 	 * If we succeeded, we know the bounds of the shared region.
2225 	 * Trim our pmaps to only cover this range (if applicable to
2226 	 * this platform).
2227 	 */
2228 	if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2229 		pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2230 	}
2231 #endif
2232 }
2233 
2234 /*
2235  * Retrieve a task's shared region and grab an extra reference to
2236  * make sure it doesn't disappear while the caller is using it.
2237  * The caller is responsible for consuming that extra reference if
2238  * necessary.
2239  *
2240  * This also tries to trim the pmap for the shared region.
2241  */
2242 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2243 vm_shared_region_trim_and_get(task_t task)
2244 {
2245 	vm_shared_region_t shared_region;
2246 	ipc_port_t sr_handle;
2247 	vm_named_entry_t sr_mem_entry;
2248 	vm_map_t sr_map;
2249 
2250 	/* Get the shared region and the map. */
2251 	shared_region = vm_shared_region_get(task);
2252 	if (shared_region == NULL) {
2253 		return NULL;
2254 	}
2255 
2256 	sr_handle = shared_region->sr_mem_entry;
2257 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2258 	sr_map = sr_mem_entry->backing.map;
2259 
2260 #ifndef NO_NESTED_PMAP
2261 	/* Trim the pmap if possible. */
2262 	if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2263 		pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2264 	}
2265 #endif
2266 
2267 	return shared_region;
2268 }
2269 
2270 /*
2271  * Enter the appropriate shared region into "map" for "task".
2272  * This involves looking up the shared region (and possibly creating a new
2273  * one) for the desired environment, then mapping the VM sub map into the
2274  * task's VM "map", with the appropriate level of pmap-nesting.
2275  */
2276 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2277 vm_shared_region_enter(
2278 	struct _vm_map          *map,
2279 	struct task             *task,
2280 	boolean_t               is_64bit,
2281 	void                    *fsroot,
2282 	cpu_type_t              cpu,
2283 	cpu_subtype_t           cpu_subtype,
2284 	boolean_t               reslide,
2285 	boolean_t               is_driverkit,
2286 	uint32_t                rsr_version)
2287 {
2288 	kern_return_t           kr;
2289 	vm_shared_region_t      shared_region;
2290 	vm_map_offset_t         sr_address, sr_offset, target_address;
2291 	vm_map_size_t           sr_size, mapping_size;
2292 	vm_map_offset_t         sr_pmap_nesting_start;
2293 	vm_map_size_t           sr_pmap_nesting_size;
2294 	ipc_port_t              sr_handle;
2295 	vm_prot_t               cur_prot, max_prot;
2296 	vm_map_kernel_flags_t   vmk_flags;
2297 
2298 	SHARED_REGION_TRACE_DEBUG(
2299 		("shared_region: -> "
2300 		"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2301 		(void *)VM_KERNEL_ADDRPERM(map),
2302 		(void *)VM_KERNEL_ADDRPERM(task),
2303 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2304 		cpu, cpu_subtype, is_64bit, is_driverkit));
2305 
2306 	/* lookup (create if needed) the shared region for this environment */
2307 	shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2308 	if (shared_region == NULL) {
2309 		/* this should not happen ! */
2310 		SHARED_REGION_TRACE_ERROR(
2311 			("shared_region: -> "
2312 			"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2313 			"lookup failed !\n",
2314 			(void *)VM_KERNEL_ADDRPERM(map),
2315 			(void *)VM_KERNEL_ADDRPERM(task),
2316 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2317 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2318 		//panic("shared_region_enter: lookup failed");
2319 		return KERN_FAILURE;
2320 	}
2321 
2322 	kr = KERN_SUCCESS;
2323 	/* no need to lock since this data is never modified */
2324 	sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2325 	sr_size = (vm_map_size_t)shared_region->sr_size;
2326 	sr_handle = shared_region->sr_mem_entry;
2327 	sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2328 	sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2329 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2330 
2331 	cur_prot = VM_PROT_READ;
2332 	if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2333 		/*
2334 		 * XXX BINARY COMPATIBILITY
2335 		 * java6 apparently needs to modify some code in the
2336 		 * dyld shared cache and needs to be allowed to add
2337 		 * write access...
2338 		 */
2339 		max_prot = VM_PROT_ALL;
2340 	} else {
2341 		max_prot = VM_PROT_READ;
2342 		/* make it "permanent" to protect against re-mappings */
2343 		vmk_flags.vmf_permanent = true;
2344 	}
2345 
2346 	/*
2347 	 * Start mapping the shared region's VM sub map into the task's VM map.
2348 	 */
2349 	sr_offset = 0;
2350 
2351 	if (sr_pmap_nesting_start > sr_address) {
2352 		/* we need to map a range without pmap-nesting first */
2353 		target_address = sr_address;
2354 		mapping_size = sr_pmap_nesting_start - sr_address;
2355 		kr = mach_vm_map_kernel(
2356 			map,
2357 			vm_sanitize_wrap_addr_ref(&target_address),
2358 			mapping_size,
2359 			0,
2360 			vmk_flags,
2361 			sr_handle,
2362 			sr_offset,
2363 			TRUE,
2364 			cur_prot,
2365 			max_prot,
2366 			VM_INHERIT_SHARE);
2367 		if (kr != KERN_SUCCESS) {
2368 			SHARED_REGION_TRACE_ERROR(
2369 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2370 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2371 				(void *)VM_KERNEL_ADDRPERM(map),
2372 				(void *)VM_KERNEL_ADDRPERM(task),
2373 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2374 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2375 				(long long)target_address,
2376 				(long long)mapping_size,
2377 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2378 			goto done;
2379 		}
2380 		SHARED_REGION_TRACE_DEBUG(
2381 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2382 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2383 			(void *)VM_KERNEL_ADDRPERM(map),
2384 			(void *)VM_KERNEL_ADDRPERM(task),
2385 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2386 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2387 			(long long)target_address, (long long)mapping_size,
2388 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2389 		sr_offset += mapping_size;
2390 		sr_size -= mapping_size;
2391 	}
2392 
2393 	/* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2394 	vmk_flags.vmkf_nested_pmap = true;
2395 	vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2396 
2397 	/*
2398 	 * Use pmap-nesting to map the majority of the shared region into the task's
2399 	 * VM space. Very rarely will architectures have a shared region that isn't
2400 	 * the same size as the pmap-nesting region, or start at a different address
2401 	 * than the pmap-nesting region, so this code will map the entirety of the
2402 	 * shared region for most architectures.
2403 	 */
2404 	assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2405 	target_address = sr_pmap_nesting_start;
2406 	kr = mach_vm_map_kernel(
2407 		map,
2408 		vm_sanitize_wrap_addr_ref(&target_address),
2409 		sr_pmap_nesting_size,
2410 		0,
2411 		vmk_flags,
2412 		sr_handle,
2413 		sr_offset,
2414 		TRUE,
2415 		cur_prot,
2416 		max_prot,
2417 		VM_INHERIT_SHARE);
2418 	if (kr != KERN_SUCCESS) {
2419 		SHARED_REGION_TRACE_ERROR(
2420 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2421 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2422 			(void *)VM_KERNEL_ADDRPERM(map),
2423 			(void *)VM_KERNEL_ADDRPERM(task),
2424 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2425 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2426 			(long long)target_address,
2427 			(long long)sr_pmap_nesting_size,
2428 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2429 		goto done;
2430 	}
2431 	SHARED_REGION_TRACE_DEBUG(
2432 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2433 		"nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2434 		(void *)VM_KERNEL_ADDRPERM(map),
2435 		(void *)VM_KERNEL_ADDRPERM(task),
2436 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2437 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2438 		(long long)target_address, (long long)sr_pmap_nesting_size,
2439 		(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2440 
2441 	sr_offset += sr_pmap_nesting_size;
2442 	sr_size -= sr_pmap_nesting_size;
2443 
2444 	if (sr_size > 0) {
2445 		/* and there's some left to be mapped without pmap-nesting */
2446 		vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2447 		target_address = sr_address + sr_offset;
2448 		mapping_size = sr_size;
2449 		kr = mach_vm_map_kernel(
2450 			map,
2451 			vm_sanitize_wrap_addr_ref(&target_address),
2452 			mapping_size,
2453 			0,
2454 			VM_MAP_KERNEL_FLAGS_FIXED(),
2455 			sr_handle,
2456 			sr_offset,
2457 			TRUE,
2458 			cur_prot,
2459 			max_prot,
2460 			VM_INHERIT_SHARE);
2461 		if (kr != KERN_SUCCESS) {
2462 			SHARED_REGION_TRACE_ERROR(
2463 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2464 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2465 				(void *)VM_KERNEL_ADDRPERM(map),
2466 				(void *)VM_KERNEL_ADDRPERM(task),
2467 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2468 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2469 				(long long)target_address,
2470 				(long long)mapping_size,
2471 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2472 			goto done;
2473 		}
2474 		SHARED_REGION_TRACE_DEBUG(
2475 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2476 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2477 			(void *)VM_KERNEL_ADDRPERM(map),
2478 			(void *)VM_KERNEL_ADDRPERM(task),
2479 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2480 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2481 			(long long)target_address, (long long)mapping_size,
2482 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2483 		sr_offset += mapping_size;
2484 		sr_size -= mapping_size;
2485 	}
2486 	assert(sr_size == 0);
2487 
2488 done:
2489 	if (kr == KERN_SUCCESS) {
2490 		/* let the task use that shared region */
2491 		vm_shared_region_set(task, shared_region);
2492 	} else {
2493 		/* drop our reference since we're not using it */
2494 		vm_shared_region_deallocate(shared_region);
2495 		vm_shared_region_set(task, NULL);
2496 	}
2497 
2498 	SHARED_REGION_TRACE_DEBUG(
2499 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2500 		(void *)VM_KERNEL_ADDRPERM(map),
2501 		(void *)VM_KERNEL_ADDRPERM(task),
2502 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2503 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2504 		kr));
2505 	return kr;
2506 }
2507 
2508 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2509 vm_shared_region_remove(
2510 	task_t task,
2511 	vm_shared_region_t sr)
2512 {
2513 	vm_map_t map;
2514 	mach_vm_offset_t start;
2515 	mach_vm_size_t size;
2516 	vm_map_kernel_flags_t vmk_flags;
2517 	kern_return_t kr;
2518 
2519 	if (sr == NULL) {
2520 		return;
2521 	}
2522 	map = get_task_map(task);
2523 	start = sr->sr_base_address;
2524 	size = sr->sr_size;
2525 
2526 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2527 	vmk_flags.vmkf_overwrite_immutable = true;
2528 	vmk_flags.vm_tag = VM_MEMORY_DYLD;
2529 
2530 	/* range_id is set by mach_vm_map_kernel */
2531 	kr = mach_vm_map_kernel(map,
2532 	    vm_sanitize_wrap_addr_ref(&start),
2533 	    size,
2534 	    0,                     /* mask */
2535 	    vmk_flags,
2536 	    MACH_PORT_NULL,
2537 	    0,
2538 	    FALSE,                     /* copy */
2539 	    VM_PROT_NONE,
2540 	    VM_PROT_NONE,
2541 	    VM_INHERIT_DEFAULT);
2542 	if (kr != KERN_SUCCESS) {
2543 		printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2544 	}
2545 }
2546 
2547 #define SANE_SLIDE_INFO_SIZE            (2560*1024) /*Can be changed if needed*/
2548 
2549 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2550 vm_shared_region_sliding_valid(uint32_t slide)
2551 {
2552 	kern_return_t kr = KERN_SUCCESS;
2553 	vm_shared_region_t sr = vm_shared_region_get(current_task());
2554 
2555 	/* No region yet? we're fine. */
2556 	if (sr == NULL) {
2557 		return kr;
2558 	}
2559 
2560 	if (sr->sr_slide != 0 && slide != 0) {
2561 		if (slide == sr->sr_slide) {
2562 			/*
2563 			 * Request for sliding when we've
2564 			 * already done it with exactly the
2565 			 * same slide value before.
2566 			 * This isn't wrong technically but
2567 			 * we don't want to slide again and
2568 			 * so we return this value.
2569 			 */
2570 			kr = KERN_INVALID_ARGUMENT;
2571 		} else {
2572 			printf("Mismatched shared region slide\n");
2573 			kr = KERN_FAILURE;
2574 		}
2575 	}
2576 	vm_shared_region_deallocate(sr);
2577 	return kr;
2578 }
2579 
2580 /*
2581  * Actually create (really overwrite) the mapping to part of the shared cache which
2582  * undergoes relocation.  This routine reads in the relocation info from dyld and
2583  * verifies it. It then creates a (or finds a matching) shared region pager which
2584  * handles the actual modification of the page contents and installs the mapping
2585  * using that pager.
2586  */
2587 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2588 vm_shared_region_slide_mapping(
2589 	vm_shared_region_t      sr,
2590 	user_addr_t             slide_info_addr,
2591 	mach_vm_size_t          slide_info_size,
2592 	mach_vm_offset_t        start,
2593 	mach_vm_size_t          size,
2594 	mach_vm_offset_t        slid_mapping,
2595 	uint32_t                slide,
2596 	memory_object_control_t sr_file_control,
2597 	vm_prot_t               prot)
2598 {
2599 	kern_return_t           kr;
2600 	vm_object_t             object = VM_OBJECT_NULL;
2601 	vm_shared_region_slide_info_t si = NULL;
2602 	vm_map_entry_t          tmp_entry = VM_MAP_ENTRY_NULL;
2603 	struct vm_map_entry     tmp_entry_store;
2604 	memory_object_t         sr_pager = MEMORY_OBJECT_NULL;
2605 	vm_map_t                sr_map;
2606 	vm_map_kernel_flags_t   vmk_flags;
2607 	vm_map_offset_t         map_addr;
2608 	void                    *slide_info_entry = NULL;
2609 	int                     error;
2610 
2611 	assert(sr->sr_slide_in_progress);
2612 
2613 	if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2614 		return KERN_INVALID_ARGUMENT;
2615 	}
2616 
2617 	/*
2618 	 * Copy in and verify the relocation information.
2619 	 */
2620 	if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2621 		printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2622 		return KERN_FAILURE;
2623 	}
2624 	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2625 		printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2626 		return KERN_FAILURE;
2627 	}
2628 
2629 	slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2630 	if (slide_info_entry == NULL) {
2631 		return KERN_RESOURCE_SHORTAGE;
2632 	}
2633 	error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2634 	if (error) {
2635 		printf("copyin of slide_info failed\n");
2636 		kr = KERN_INVALID_ADDRESS;
2637 		goto done;
2638 	}
2639 
2640 	if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2641 		printf("Sanity Check failed for slide_info\n");
2642 		goto done;
2643 	}
2644 
2645 	/*
2646 	 * Allocate and fill in a vm_shared_region_slide_info.
2647 	 * This will either be used by a new pager, or used to find
2648 	 * a pre-existing matching pager.
2649 	 */
2650 	object = memory_object_control_to_vm_object(sr_file_control);
2651 	if (object == VM_OBJECT_NULL || object->internal) {
2652 		object = VM_OBJECT_NULL;
2653 		kr = KERN_INVALID_ADDRESS;
2654 		goto done;
2655 	}
2656 
2657 	si = kalloc_type(struct vm_shared_region_slide_info,
2658 	    Z_WAITOK | Z_NOFAIL);
2659 	vm_object_lock(object);
2660 
2661 	vm_object_reference_locked(object);     /* for si->slide_object */
2662 	object->object_is_shared_cache = TRUE;
2663 	vm_object_unlock(object);
2664 
2665 	si->si_slide_info_entry = slide_info_entry;
2666 	si->si_slide_info_size = slide_info_size;
2667 
2668 	assert(slid_mapping != (mach_vm_offset_t) -1);
2669 	si->si_slid_address = slid_mapping + sr->sr_base_address;
2670 	si->si_slide_object = object;
2671 	si->si_start = start;
2672 	si->si_end = si->si_start + size;
2673 	si->si_slide = slide;
2674 #if __has_feature(ptrauth_calls)
2675 	/*
2676 	 * If there is authenticated pointer data in this slid mapping,
2677 	 * then just add the information needed to create new pagers for
2678 	 * different shared_region_id's later.
2679 	 */
2680 	if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2681 	    sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2682 	    !(prot & VM_PROT_NOAUTH)) {
2683 		if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2684 			printf("Too many auth/private sections for shared region!!\n");
2685 			kr = KERN_INVALID_ARGUMENT;
2686 			goto done;
2687 		}
2688 		si->si_ptrauth = TRUE;
2689 		sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2690 		/*
2691 		 * Remember the shared region, since that's where we'll
2692 		 * stash this info for all auth pagers to share. Each pager
2693 		 * will need to take a reference to it.
2694 		 */
2695 		si->si_shared_region = sr;
2696 		kr = KERN_SUCCESS;
2697 		goto done;
2698 	}
2699 	si->si_shared_region = NULL;
2700 	si->si_ptrauth = FALSE;
2701 #endif /* __has_feature(ptrauth_calls) */
2702 
2703 	/*
2704 	 * find the pre-existing shared region's map entry to slide
2705 	 */
2706 	sr_map = vm_shared_region_vm_map(sr);
2707 	kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2708 	if (kr != KERN_SUCCESS) {
2709 		goto done;
2710 	}
2711 	tmp_entry = &tmp_entry_store;
2712 
2713 	/*
2714 	 * The object must exactly cover the region to slide.
2715 	 */
2716 	assert(VME_OFFSET(tmp_entry) == start);
2717 	assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2718 
2719 	/* create a "shared_region" sliding pager */
2720 	sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2721 	if (sr_pager == MEMORY_OBJECT_NULL) {
2722 		kr = KERN_RESOURCE_SHORTAGE;
2723 		goto done;
2724 	}
2725 
2726 #if CONFIG_SECLUDED_MEMORY
2727 	/*
2728 	 * The shared region pagers used by camera or DEXT should have
2729 	 * pagers that won't go on the secluded queue.
2730 	 */
2731 	if (primary_system_shared_region == NULL ||
2732 	    primary_system_shared_region == sr ||
2733 	    sr->sr_driverkit) {
2734 		memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2735 	}
2736 #endif /* CONFIG_SECLUDED_MEMORY */
2737 
2738 	/* map that pager over the portion of the mapping that needs sliding */
2739 	map_addr = tmp_entry->vme_start;
2740 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2741 	vmk_flags.vmkf_overwrite_immutable = true;
2742 	vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2743 	    tmp_entry->max_protection);
2744 	vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2745 	    prot);
2746 	kr = mach_vm_map_kernel(sr_map,
2747 	    vm_sanitize_wrap_addr_ref(&map_addr),
2748 	    tmp_entry->vme_end - tmp_entry->vme_start,
2749 	    0,
2750 	    vmk_flags,
2751 	    (ipc_port_t)(uintptr_t) sr_pager,
2752 	    0,
2753 	    TRUE,
2754 	    tmp_entry->protection,
2755 	    tmp_entry->max_protection,
2756 	    tmp_entry->inheritance);
2757 	assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2758 	assertf(map_addr == tmp_entry->vme_start,
2759 	    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2760 	    (uint64_t)map_addr,
2761 	    (uint64_t) tmp_entry->vme_start,
2762 	    tmp_entry);
2763 
2764 	/* success! */
2765 	kr = KERN_SUCCESS;
2766 
2767 done:
2768 	if (sr_pager != NULL) {
2769 		/*
2770 		 * Release the sr_pager reference obtained by shared_region_pager_setup().
2771 		 * The mapping, if it succeeded, is now holding a reference on the memory object.
2772 		 */
2773 		memory_object_deallocate(sr_pager);
2774 		sr_pager = MEMORY_OBJECT_NULL;
2775 	}
2776 	if (tmp_entry != NULL) {
2777 		/* release extra ref on tmp_entry's VM object */
2778 		vm_object_deallocate(VME_OBJECT(tmp_entry));
2779 		tmp_entry = VM_MAP_ENTRY_NULL;
2780 	}
2781 
2782 	if (kr != KERN_SUCCESS) {
2783 		/* cleanup */
2784 		if (si != NULL) {
2785 			if (si->si_slide_object) {
2786 				vm_object_deallocate(si->si_slide_object);
2787 				si->si_slide_object = VM_OBJECT_NULL;
2788 			}
2789 			kfree_type(struct vm_shared_region_slide_info, si);
2790 			si = NULL;
2791 		}
2792 		if (slide_info_entry != NULL) {
2793 			kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2794 			slide_info_entry = NULL;
2795 		}
2796 	}
2797 	return kr;
2798 }
2799 
2800 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2801 vm_shared_region_slide_sanity_check_v1(
2802 	vm_shared_region_slide_info_entry_v1_t s_info)
2803 {
2804 	uint32_t pageIndex = 0;
2805 	uint16_t entryIndex = 0;
2806 	uint16_t *toc = NULL;
2807 
2808 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2809 	for (; pageIndex < s_info->toc_count; pageIndex++) {
2810 		entryIndex =  (uint16_t)(toc[pageIndex]);
2811 
2812 		if (entryIndex >= s_info->entry_count) {
2813 			printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2814 			return KERN_FAILURE;
2815 		}
2816 	}
2817 	return KERN_SUCCESS;
2818 }
2819 
2820 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2821 vm_shared_region_slide_sanity_check_v2(
2822 	vm_shared_region_slide_info_entry_v2_t s_info,
2823 	mach_vm_size_t slide_info_size)
2824 {
2825 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2826 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2827 		return KERN_FAILURE;
2828 	}
2829 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2830 		return KERN_FAILURE;
2831 	}
2832 
2833 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2834 
2835 	uint32_t page_starts_count = s_info->page_starts_count;
2836 	uint32_t page_extras_count = s_info->page_extras_count;
2837 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2838 	if (num_trailing_entries < page_starts_count) {
2839 		return KERN_FAILURE;
2840 	}
2841 
2842 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2843 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2844 	if (trailing_size >> 1 != num_trailing_entries) {
2845 		return KERN_FAILURE;
2846 	}
2847 
2848 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2849 	if (required_size < sizeof(*s_info)) {
2850 		return KERN_FAILURE;
2851 	}
2852 
2853 	if (required_size > slide_info_size) {
2854 		return KERN_FAILURE;
2855 	}
2856 
2857 	return KERN_SUCCESS;
2858 }
2859 
2860 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2861 vm_shared_region_slide_sanity_check_v3(
2862 	vm_shared_region_slide_info_entry_v3_t s_info,
2863 	mach_vm_size_t slide_info_size)
2864 {
2865 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2866 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2867 		return KERN_FAILURE;
2868 	}
2869 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2870 		printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2871 		return KERN_FAILURE;
2872 	}
2873 
2874 	uint32_t page_starts_count = s_info->page_starts_count;
2875 	mach_vm_size_t num_trailing_entries = page_starts_count;
2876 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2877 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2878 	if (required_size < sizeof(*s_info)) {
2879 		printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2880 		return KERN_FAILURE;
2881 	}
2882 
2883 	if (required_size > slide_info_size) {
2884 		printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2885 		return KERN_FAILURE;
2886 	}
2887 
2888 	return KERN_SUCCESS;
2889 }
2890 
2891 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2892 vm_shared_region_slide_sanity_check_v4(
2893 	vm_shared_region_slide_info_entry_v4_t s_info,
2894 	mach_vm_size_t slide_info_size)
2895 {
2896 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2897 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2898 		return KERN_FAILURE;
2899 	}
2900 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2901 		return KERN_FAILURE;
2902 	}
2903 
2904 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2905 
2906 	uint32_t page_starts_count = s_info->page_starts_count;
2907 	uint32_t page_extras_count = s_info->page_extras_count;
2908 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2909 	if (num_trailing_entries < page_starts_count) {
2910 		return KERN_FAILURE;
2911 	}
2912 
2913 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2914 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2915 	if (trailing_size >> 1 != num_trailing_entries) {
2916 		return KERN_FAILURE;
2917 	}
2918 
2919 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2920 	if (required_size < sizeof(*s_info)) {
2921 		return KERN_FAILURE;
2922 	}
2923 
2924 	if (required_size > slide_info_size) {
2925 		return KERN_FAILURE;
2926 	}
2927 
2928 	return KERN_SUCCESS;
2929 }
2930 
2931 static kern_return_t
vm_shared_region_slide_sanity_check_v5(vm_shared_region_slide_info_entry_v5_t s_info,mach_vm_size_t slide_info_size)2932 vm_shared_region_slide_sanity_check_v5(
2933 	vm_shared_region_slide_info_entry_v5_t s_info,
2934 	mach_vm_size_t slide_info_size)
2935 {
2936 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v5)) {
2937 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2938 		return KERN_FAILURE;
2939 	}
2940 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE_16KB) {
2941 		printf("vm_shared_region_slide_sanity_check_v5: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE_16KB);
2942 		return KERN_FAILURE;
2943 	}
2944 
2945 	uint32_t page_starts_count = s_info->page_starts_count;
2946 	mach_vm_size_t num_trailing_entries = page_starts_count;
2947 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2948 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2949 	if (required_size < sizeof(*s_info)) {
2950 		printf("vm_shared_region_slide_sanity_check_v5: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2951 		return KERN_FAILURE;
2952 	}
2953 
2954 	if (required_size > slide_info_size) {
2955 		printf("vm_shared_region_slide_sanity_check_v5: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2956 		return KERN_FAILURE;
2957 	}
2958 
2959 	return KERN_SUCCESS;
2960 }
2961 
2962 
2963 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2964 vm_shared_region_slide_sanity_check(
2965 	vm_shared_region_slide_info_entry_t s_info,
2966 	mach_vm_size_t s_info_size)
2967 {
2968 	kern_return_t kr;
2969 
2970 	switch (s_info->version) {
2971 	case 1:
2972 		kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2973 		break;
2974 	case 2:
2975 		kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2976 		break;
2977 	case 3:
2978 		kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2979 		break;
2980 	case 4:
2981 		kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2982 		break;
2983 	case 5:
2984 		kr = vm_shared_region_slide_sanity_check_v5(&s_info->v5, s_info_size);
2985 		break;
2986 	default:
2987 		kr = KERN_FAILURE;
2988 	}
2989 	return kr;
2990 }
2991 
2992 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2993 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2994 {
2995 	uint16_t *toc = NULL;
2996 	slide_info_entry_toc_t bitmap = NULL;
2997 	uint32_t i = 0, j = 0;
2998 	uint8_t b = 0;
2999 	uint32_t slide = si->si_slide;
3000 	int is_64 = task_has_64Bit_addr(current_task());
3001 
3002 	vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
3003 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
3004 
3005 	if (pageIndex >= s_info->toc_count) {
3006 		printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
3007 	} else {
3008 		uint16_t entryIndex =  (uint16_t)(toc[pageIndex]);
3009 		slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
3010 
3011 		if (entryIndex >= s_info->entry_count) {
3012 			printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
3013 		} else {
3014 			bitmap = &slide_info_entries[entryIndex];
3015 
3016 			for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
3017 				b = bitmap->entry[i];
3018 				if (b != 0) {
3019 					for (j = 0; j < 8; ++j) {
3020 						if (b & (1 << j)) {
3021 							uint32_t *ptr_to_slide;
3022 							uint32_t old_value;
3023 
3024 							ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
3025 							old_value = *ptr_to_slide;
3026 							*ptr_to_slide += slide;
3027 							if (is_64 && *ptr_to_slide < old_value) {
3028 								/*
3029 								 * We just slid the low 32 bits of a 64-bit pointer
3030 								 * and it looks like there should have been a carry-over
3031 								 * to the upper 32 bits.
3032 								 * The sliding failed...
3033 								 */
3034 								printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
3035 								    i, j, b, slide, old_value, *ptr_to_slide);
3036 								return KERN_FAILURE;
3037 							}
3038 						}
3039 					}
3040 				}
3041 			}
3042 		}
3043 	}
3044 
3045 	return KERN_SUCCESS;
3046 }
3047 
3048 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3049 rebase_chain_32(
3050 	uint8_t *page_content,
3051 	uint16_t start_offset,
3052 	uint32_t slide_amount,
3053 	vm_shared_region_slide_info_entry_v2_t s_info)
3054 {
3055 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3056 
3057 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3058 	const uint32_t value_mask = ~delta_mask;
3059 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3060 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3061 
3062 	uint32_t page_offset = start_offset;
3063 	uint32_t delta = 1;
3064 
3065 	while (delta != 0 && page_offset <= last_page_offset) {
3066 		uint8_t *loc;
3067 		uint32_t value;
3068 
3069 		loc = page_content + page_offset;
3070 		memcpy(&value, loc, sizeof(value));
3071 		delta = (value & delta_mask) >> delta_shift;
3072 		value &= value_mask;
3073 
3074 		if (value != 0) {
3075 			value += value_add;
3076 			value += slide_amount;
3077 		}
3078 		memcpy(loc, &value, sizeof(value));
3079 		page_offset += delta;
3080 	}
3081 
3082 	/* If the offset went past the end of the page, then the slide data is invalid. */
3083 	if (page_offset > last_page_offset) {
3084 		return KERN_FAILURE;
3085 	}
3086 	return KERN_SUCCESS;
3087 }
3088 
3089 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3090 rebase_chain_64(
3091 	uint8_t *page_content,
3092 	uint16_t start_offset,
3093 	uint32_t slide_amount,
3094 	vm_shared_region_slide_info_entry_v2_t s_info)
3095 {
3096 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3097 
3098 	const uint64_t delta_mask = s_info->delta_mask;
3099 	const uint64_t value_mask = ~delta_mask;
3100 	const uint64_t value_add = s_info->value_add;
3101 	const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3102 
3103 	uint32_t page_offset = start_offset;
3104 	uint32_t delta = 1;
3105 
3106 	while (delta != 0 && page_offset <= last_page_offset) {
3107 		uint8_t *loc;
3108 		uint64_t value;
3109 
3110 		loc = page_content + page_offset;
3111 		memcpy(&value, loc, sizeof(value));
3112 		delta = (uint32_t)((value & delta_mask) >> delta_shift);
3113 		value &= value_mask;
3114 
3115 		if (value != 0) {
3116 			value += value_add;
3117 			value += slide_amount;
3118 		}
3119 		memcpy(loc, &value, sizeof(value));
3120 		page_offset += delta;
3121 	}
3122 
3123 	if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3124 		/* If a pointer straddling the page boundary needs to be adjusted, then
3125 		 * add the slide to the lower half. The encoding guarantees that the upper
3126 		 * half on the next page will need no masking.
3127 		 *
3128 		 * This assumes a little-endian machine and that the region being slid
3129 		 * never crosses a 4 GB boundary. */
3130 
3131 		uint8_t *loc = page_content + page_offset;
3132 		uint32_t value;
3133 
3134 		memcpy(&value, loc, sizeof(value));
3135 		value += slide_amount;
3136 		memcpy(loc, &value, sizeof(value));
3137 	} else if (page_offset > last_page_offset) {
3138 		return KERN_FAILURE;
3139 	}
3140 
3141 	return KERN_SUCCESS;
3142 }
3143 
3144 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3145 rebase_chain(
3146 	boolean_t is_64,
3147 	uint32_t pageIndex,
3148 	uint8_t *page_content,
3149 	uint16_t start_offset,
3150 	uint32_t slide_amount,
3151 	vm_shared_region_slide_info_entry_v2_t s_info)
3152 {
3153 	kern_return_t kr;
3154 	if (is_64) {
3155 		kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3156 	} else {
3157 		kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3158 	}
3159 
3160 	if (kr != KERN_SUCCESS) {
3161 		printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3162 		    pageIndex, start_offset, slide_amount);
3163 	}
3164 	return kr;
3165 }
3166 
3167 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3168 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3169 {
3170 	vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3171 	const uint32_t slide_amount = si->si_slide;
3172 
3173 	/* The high bits of the delta_mask field are nonzero precisely when the shared
3174 	 * cache is 64-bit. */
3175 	const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3176 
3177 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3178 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3179 
3180 	uint8_t *page_content = (uint8_t *)vaddr;
3181 	uint16_t page_entry;
3182 
3183 	if (pageIndex >= s_info->page_starts_count) {
3184 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3185 		    pageIndex, s_info->page_starts_count);
3186 		return KERN_FAILURE;
3187 	}
3188 	page_entry = page_starts[pageIndex];
3189 
3190 	if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3191 		return KERN_SUCCESS;
3192 	}
3193 
3194 	if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3195 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3196 		uint16_t info;
3197 
3198 		do {
3199 			uint16_t page_start_offset;
3200 			kern_return_t kr;
3201 
3202 			if (chain_index >= s_info->page_extras_count) {
3203 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3204 				    chain_index, s_info->page_extras_count);
3205 				return KERN_FAILURE;
3206 			}
3207 			info = page_extras[chain_index];
3208 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3209 
3210 			kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3211 			if (kr != KERN_SUCCESS) {
3212 				return KERN_FAILURE;
3213 			}
3214 
3215 			chain_index++;
3216 		} while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3217 	} else {
3218 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3219 		kern_return_t kr;
3220 
3221 		kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3222 		if (kr != KERN_SUCCESS) {
3223 			return KERN_FAILURE;
3224 		}
3225 	}
3226 
3227 	return KERN_SUCCESS;
3228 }
3229 
3230 
3231 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3232 vm_shared_region_slide_page_v3(
3233 	vm_shared_region_slide_info_t si,
3234 	vm_offset_t vaddr,
3235 	__unused mach_vm_offset_t uservaddr,
3236 	uint32_t pageIndex,
3237 #if !__has_feature(ptrauth_calls)
3238 	__unused
3239 #endif /* !__has_feature(ptrauth_calls) */
3240 	uint64_t jop_key)
3241 {
3242 	vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3243 	const uint32_t slide_amount = si->si_slide;
3244 
3245 	uint8_t *page_content = (uint8_t *)vaddr;
3246 	uint16_t page_entry;
3247 
3248 	if (pageIndex >= s_info->page_starts_count) {
3249 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3250 		    pageIndex, s_info->page_starts_count);
3251 		return KERN_FAILURE;
3252 	}
3253 	page_entry = s_info->page_starts[pageIndex];
3254 
3255 	if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3256 		return KERN_SUCCESS;
3257 	}
3258 
3259 	uint8_t* rebaseLocation = page_content;
3260 	uint64_t delta = page_entry;
3261 	do {
3262 		rebaseLocation += delta;
3263 		uint64_t value;
3264 		memcpy(&value, rebaseLocation, sizeof(value));
3265 		delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3266 
3267 		// A pointer is one of :
3268 		// {
3269 		//	 uint64_t pointerValue : 51;
3270 		//	 uint64_t offsetToNextPointer : 11;
3271 		//	 uint64_t isBind : 1 = 0;
3272 		//	 uint64_t authenticated : 1 = 0;
3273 		// }
3274 		// {
3275 		//	 uint32_t offsetFromSharedCacheBase;
3276 		//	 uint16_t diversityData;
3277 		//	 uint16_t hasAddressDiversity : 1;
3278 		//	 uint16_t hasDKey : 1;
3279 		//	 uint16_t hasBKey : 1;
3280 		//	 uint16_t offsetToNextPointer : 11;
3281 		//	 uint16_t isBind : 1;
3282 		//	 uint16_t authenticated : 1 = 1;
3283 		// }
3284 
3285 		bool isBind = (value & (1ULL << 62)) != 0;
3286 		if (isBind) {
3287 			return KERN_FAILURE;
3288 		}
3289 
3290 #if __has_feature(ptrauth_calls)
3291 		uint16_t diversity_data = (uint16_t)(value >> 32);
3292 		bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3293 		ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3294 #endif /* __has_feature(ptrauth_calls) */
3295 		bool isAuthenticated = (value & (1ULL << 63)) != 0;
3296 
3297 		if (isAuthenticated) {
3298 			// The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3299 			value = (value & 0xFFFFFFFF) + slide_amount;
3300 			// Add in the offset from the mach_header
3301 			const uint64_t value_add = s_info->value_add;
3302 			value += value_add;
3303 
3304 #if __has_feature(ptrauth_calls)
3305 			uint64_t discriminator = diversity_data;
3306 			if (hasAddressDiversity) {
3307 				// First calculate a new discriminator using the address of where we are trying to store the value
3308 				uintptr_t pageOffset = rebaseLocation - page_content;
3309 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3310 			}
3311 
3312 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3313 				/*
3314 				 * these pointers are used in user mode. disable the kernel key diversification
3315 				 * so we can sign them for use in user mode.
3316 				 */
3317 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3318 			}
3319 #endif /* __has_feature(ptrauth_calls) */
3320 		} else {
3321 			// The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3322 			// Regular pointer which needs to fit in 51-bits of value.
3323 			// C++ RTTI uses the top bit, so we'll allow the whole top-byte
3324 			// and the bottom 43-bits to be fit in to 51-bits.
3325 			uint64_t top8Bits = value & 0x0007F80000000000ULL;
3326 			uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3327 			uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3328 			value = targetValue + slide_amount;
3329 		}
3330 
3331 		memcpy(rebaseLocation, &value, sizeof(value));
3332 	} while (delta != 0);
3333 
3334 	return KERN_SUCCESS;
3335 }
3336 
3337 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3338 rebase_chainv4(
3339 	uint8_t *page_content,
3340 	uint16_t start_offset,
3341 	uint32_t slide_amount,
3342 	vm_shared_region_slide_info_entry_v4_t s_info)
3343 {
3344 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3345 
3346 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3347 	const uint32_t value_mask = ~delta_mask;
3348 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3349 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3350 
3351 	uint32_t page_offset = start_offset;
3352 	uint32_t delta = 1;
3353 
3354 	while (delta != 0 && page_offset <= last_page_offset) {
3355 		uint8_t *loc;
3356 		uint32_t value;
3357 
3358 		loc = page_content + page_offset;
3359 		memcpy(&value, loc, sizeof(value));
3360 		delta = (value & delta_mask) >> delta_shift;
3361 		value &= value_mask;
3362 
3363 		if ((value & 0xFFFF8000) == 0) {
3364 			// small positive non-pointer, use as-is
3365 		} else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3366 			// small negative non-pointer
3367 			value |= 0xC0000000;
3368 		} else {
3369 			// pointer that needs rebasing
3370 			value += value_add;
3371 			value += slide_amount;
3372 		}
3373 		memcpy(loc, &value, sizeof(value));
3374 		page_offset += delta;
3375 	}
3376 
3377 	/* If the offset went past the end of the page, then the slide data is invalid. */
3378 	if (page_offset > last_page_offset) {
3379 		return KERN_FAILURE;
3380 	}
3381 	return KERN_SUCCESS;
3382 }
3383 
3384 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3385 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3386 {
3387 	vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3388 	const uint32_t slide_amount = si->si_slide;
3389 
3390 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3391 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3392 
3393 	uint8_t *page_content = (uint8_t *)vaddr;
3394 	uint16_t page_entry;
3395 
3396 	if (pageIndex >= s_info->page_starts_count) {
3397 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3398 		    pageIndex, s_info->page_starts_count);
3399 		return KERN_FAILURE;
3400 	}
3401 	page_entry = page_starts[pageIndex];
3402 
3403 	if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3404 		return KERN_SUCCESS;
3405 	}
3406 
3407 	if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3408 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3409 		uint16_t info;
3410 
3411 		do {
3412 			uint16_t page_start_offset;
3413 			kern_return_t kr;
3414 
3415 			if (chain_index >= s_info->page_extras_count) {
3416 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3417 				    chain_index, s_info->page_extras_count);
3418 				return KERN_FAILURE;
3419 			}
3420 			info = page_extras[chain_index];
3421 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3422 
3423 			kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3424 			if (kr != KERN_SUCCESS) {
3425 				return KERN_FAILURE;
3426 			}
3427 
3428 			chain_index++;
3429 		} while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3430 	} else {
3431 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3432 		kern_return_t kr;
3433 
3434 		kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3435 		if (kr != KERN_SUCCESS) {
3436 			return KERN_FAILURE;
3437 		}
3438 	}
3439 
3440 	return KERN_SUCCESS;
3441 }
3442 
3443 
3444 static kern_return_t
vm_shared_region_slide_page_v5(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3445 vm_shared_region_slide_page_v5(
3446 	vm_shared_region_slide_info_t si,
3447 	vm_offset_t vaddr,
3448 	__unused mach_vm_offset_t uservaddr,
3449 	uint32_t pageIndex,
3450 #if !__has_feature(ptrauth_calls)
3451 	__unused
3452 #endif /* !__has_feature(ptrauth_calls) */
3453 	uint64_t jop_key)
3454 {
3455 	vm_shared_region_slide_info_entry_v5_t s_info = &si->si_slide_info_entry->v5;
3456 	const uint32_t slide_amount = si->si_slide;
3457 	const uint64_t value_add = s_info->value_add;
3458 
3459 	uint8_t *page_content = (uint8_t *)vaddr;
3460 	uint16_t page_entry;
3461 
3462 	if (pageIndex >= s_info->page_starts_count) {
3463 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3464 		    pageIndex, s_info->page_starts_count);
3465 		return KERN_FAILURE;
3466 	}
3467 	page_entry = s_info->page_starts[pageIndex];
3468 
3469 	if (page_entry == DYLD_CACHE_SLIDE_V5_PAGE_ATTR_NO_REBASE) {
3470 		return KERN_SUCCESS;
3471 	}
3472 
3473 	uint8_t* rebaseLocation = page_content;
3474 	uint64_t delta = page_entry;
3475 	do {
3476 		rebaseLocation += delta;
3477 		uint64_t value;
3478 		memcpy(&value, rebaseLocation, sizeof(value));
3479 		delta = ((value & 0x7FF0000000000000ULL) >> 52) * sizeof(uint64_t);
3480 
3481 		// A pointer is one of :
3482 		// {
3483 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3484 		//               high8           :  8,
3485 		//               unused          : 10,
3486 		//               next            : 11,   // 8-byte stide
3487 		//               auth            :  1;   // == 0
3488 		// }
3489 		// {
3490 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3491 		//               diversity       : 16,
3492 		//               addrDiv         :  1,
3493 		//               keyIsData       :  1,   // implicitly always the 'A' key.  0 -> IA.  1 -> DA
3494 		//               next            : 11,   // 8-byte stide
3495 		//               auth            :  1;   // == 1
3496 		// }
3497 
3498 #if __has_feature(ptrauth_calls)
3499 		bool        addrDiv = ((value & (1ULL << 50)) != 0);
3500 		bool        keyIsData = ((value & (1ULL << 51)) != 0);
3501 		// the key is always A, and the bit tells us if its IA or ID
3502 		ptrauth_key key = keyIsData ? ptrauth_key_asda : ptrauth_key_asia;
3503 		uint16_t    diversity = (uint16_t)((value >> 34) & 0xFFFF);
3504 #endif /* __has_feature(ptrauth_calls) */
3505 		uint64_t    high8 = (value << 22) & 0xFF00000000000000ULL;
3506 		bool        isAuthenticated = (value & (1ULL << 63)) != 0;
3507 
3508 		// The new value for a rebase is the low 34-bits of the threaded value plus the base plus slide.
3509 		value = (value & 0x3FFFFFFFFULL) + value_add + slide_amount;
3510 		if (isAuthenticated) {
3511 #if __has_feature(ptrauth_calls)
3512 			uint64_t discriminator = diversity;
3513 			if (addrDiv) {
3514 				// First calculate a new discriminator using the address of where we are trying to store the value
3515 				uintptr_t pageOffset = rebaseLocation - page_content;
3516 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3517 			}
3518 
3519 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3520 				/*
3521 				 * these pointers are used in user mode. disable the kernel key diversification
3522 				 * so we can sign them for use in user mode.
3523 				 */
3524 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3525 			}
3526 #endif /* __has_feature(ptrauth_calls) */
3527 		} else {
3528 			// the value already has the correct low bits, so just add in the high8 if it exists
3529 			value += high8;
3530 		}
3531 
3532 		memcpy(rebaseLocation, &value, sizeof(value));
3533 	} while (delta != 0);
3534 
3535 	return KERN_SUCCESS;
3536 }
3537 
3538 
3539 
3540 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3541 vm_shared_region_slide_page(
3542 	vm_shared_region_slide_info_t si,
3543 	vm_offset_t vaddr,
3544 	mach_vm_offset_t uservaddr,
3545 	uint32_t pageIndex,
3546 	uint64_t jop_key)
3547 {
3548 	switch (si->si_slide_info_entry->version) {
3549 	case 1:
3550 		return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3551 	case 2:
3552 		return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3553 	case 3:
3554 		return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3555 	case 4:
3556 		return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3557 	case 5:
3558 		return vm_shared_region_slide_page_v5(si, vaddr, uservaddr, pageIndex, jop_key);
3559 	default:
3560 		return KERN_FAILURE;
3561 	}
3562 }
3563 
3564 /******************************************************************************/
3565 /* Comm page support                                                          */
3566 /******************************************************************************/
3567 
3568 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3569 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3570 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3571 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3572 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3573 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3574 
3575 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3576 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3577 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3578 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3579 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3580 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3581 
3582 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3583 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3584 
3585 #if defined(__i386__) || defined(__x86_64__)
3586 /*
3587  * Create a memory entry, VM submap and pmap for one commpage.
3588  */
3589 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3590 _vm_commpage_init(
3591 	ipc_port_t      *handlep,
3592 	vm_map_size_t   size)
3593 {
3594 	vm_named_entry_t        mem_entry;
3595 	vm_map_t                new_map;
3596 
3597 	SHARED_REGION_TRACE_DEBUG(
3598 		("commpage: -> _init(0x%llx)\n",
3599 		(long long)size));
3600 
3601 	pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3602 	if (new_pmap == NULL) {
3603 		panic("_vm_commpage_init: could not allocate pmap");
3604 	}
3605 	new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3606 
3607 	mem_entry = mach_memory_entry_allocate(handlep);
3608 	mem_entry->backing.map = new_map;
3609 	mem_entry->internal = TRUE;
3610 	mem_entry->is_sub_map = TRUE;
3611 	mem_entry->offset = 0;
3612 	mem_entry->protection = VM_PROT_ALL;
3613 	mem_entry->size = size;
3614 
3615 	SHARED_REGION_TRACE_DEBUG(
3616 		("commpage: _init(0x%llx) <- %p\n",
3617 		(long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3618 }
3619 #endif
3620 
3621 
3622 /*
3623  * Initialize the comm text pages at boot time
3624  */
3625 void
vm_commpage_text_init(void)3626 vm_commpage_text_init(void)
3627 {
3628 	SHARED_REGION_TRACE_DEBUG(
3629 		("commpage text: ->init()\n"));
3630 #if defined(__i386__) || defined(__x86_64__)
3631 	/* create the 32 bit comm text page */
3632 	unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3633 	_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3634 	commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3635 	commpage_text32_map = commpage_text32_entry->backing.map;
3636 	commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3637 	/* XXX if (cpu_is_64bit_capable()) ? */
3638 	/* create the 64-bit comm page */
3639 	offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3640 	_vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3641 	commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3642 	commpage_text64_map = commpage_text64_entry->backing.map;
3643 	commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3644 #endif
3645 
3646 	commpage_text_populate();
3647 
3648 	/* populate the routines in here */
3649 	SHARED_REGION_TRACE_DEBUG(
3650 		("commpage text: init() <-\n"));
3651 }
3652 
3653 /*
3654  * Initialize the comm pages at boot time.
3655  */
3656 void
vm_commpage_init(void)3657 vm_commpage_init(void)
3658 {
3659 	SHARED_REGION_TRACE_DEBUG(
3660 		("commpage: -> init()\n"));
3661 
3662 #if defined(__i386__) || defined(__x86_64__)
3663 	/* create the 32-bit comm page */
3664 	_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3665 	commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3666 	commpage32_map = commpage32_entry->backing.map;
3667 
3668 	/* XXX if (cpu_is_64bit_capable()) ? */
3669 	/* create the 64-bit comm page */
3670 	_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3671 	commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3672 	commpage64_map = commpage64_entry->backing.map;
3673 
3674 #endif /* __i386__ || __x86_64__ */
3675 
3676 	/* populate them according to this specific platform */
3677 	commpage_populate();
3678 	__commpage_setup = 1;
3679 #if XNU_TARGET_OS_OSX
3680 	if (__system_power_source == 0) {
3681 		post_sys_powersource_internal(0, 1);
3682 	}
3683 #endif /* XNU_TARGET_OS_OSX */
3684 
3685 	SHARED_REGION_TRACE_DEBUG(
3686 		("commpage: init() <-\n"));
3687 }
3688 
3689 /*
3690  * Enter the appropriate comm page into the task's address space.
3691  * This is called at exec() time via vm_map_exec().
3692  */
3693 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3694 vm_commpage_enter(
3695 	vm_map_t        map,
3696 	task_t          task,
3697 	boolean_t       is64bit)
3698 {
3699 #if   defined(__arm64__)
3700 #pragma unused(is64bit)
3701 	(void)task;
3702 	(void)map;
3703 	pmap_insert_commpage(vm_map_pmap(map));
3704 	return KERN_SUCCESS;
3705 #else
3706 	ipc_port_t              commpage_handle, commpage_text_handle;
3707 	vm_map_offset_t         commpage_address, objc_address, commpage_text_address;
3708 	vm_map_size_t           commpage_size, objc_size, commpage_text_size;
3709 	vm_map_kernel_flags_t   vmk_flags;
3710 	kern_return_t           kr;
3711 
3712 	SHARED_REGION_TRACE_DEBUG(
3713 		("commpage: -> enter(%p,%p)\n",
3714 		(void *)VM_KERNEL_ADDRPERM(map),
3715 		(void *)VM_KERNEL_ADDRPERM(task)));
3716 
3717 	commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3718 	/* the comm page is likely to be beyond the actual end of the VM map */
3719 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3720 	vmk_flags.vmkf_beyond_max = TRUE;
3721 
3722 	/* select the appropriate comm page for this task */
3723 	assert(!(is64bit ^ vm_map_is_64bit(map)));
3724 	if (is64bit) {
3725 		commpage_handle = commpage64_handle;
3726 		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3727 		commpage_size = _COMM_PAGE64_AREA_LENGTH;
3728 		objc_size = _COMM_PAGE64_OBJC_SIZE;
3729 		objc_address = _COMM_PAGE64_OBJC_BASE;
3730 		commpage_text_handle = commpage_text64_handle;
3731 		commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3732 	} else {
3733 		commpage_handle = commpage32_handle;
3734 		commpage_address =
3735 		    (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3736 		commpage_size = _COMM_PAGE32_AREA_LENGTH;
3737 		objc_size = _COMM_PAGE32_OBJC_SIZE;
3738 		objc_address = _COMM_PAGE32_OBJC_BASE;
3739 		commpage_text_handle = commpage_text32_handle;
3740 		commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3741 	}
3742 
3743 	if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3744 	    (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3745 		/* the commpage is properly aligned or sized for pmap-nesting */
3746 		vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3747 		vmk_flags.vmkf_nested_pmap = TRUE;
3748 	}
3749 
3750 	/* map the comm page in the task's address space */
3751 	assert(commpage_handle != IPC_PORT_NULL);
3752 	kr = mach_vm_map_kernel(
3753 		map,
3754 		vm_sanitize_wrap_addr_ref(&commpage_address),
3755 		commpage_size,
3756 		0,
3757 		vmk_flags,
3758 		commpage_handle,
3759 		0,
3760 		FALSE,
3761 		VM_PROT_READ,
3762 		VM_PROT_READ,
3763 		VM_INHERIT_SHARE);
3764 	if (kr != KERN_SUCCESS) {
3765 		SHARED_REGION_TRACE_ERROR(
3766 			("commpage: enter(%p,0x%llx,0x%llx) "
3767 			"commpage %p mapping failed 0x%x\n",
3768 			(void *)VM_KERNEL_ADDRPERM(map),
3769 			(long long)commpage_address,
3770 			(long long)commpage_size,
3771 			(void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3772 	}
3773 
3774 	/* map the comm text page in the task's address space */
3775 	assert(commpage_text_handle != IPC_PORT_NULL);
3776 	kr = mach_vm_map_kernel(
3777 		map,
3778 		vm_sanitize_wrap_addr_ref(&commpage_text_address),
3779 		commpage_text_size,
3780 		0,
3781 		vmk_flags,
3782 		commpage_text_handle,
3783 		0,
3784 		FALSE,
3785 		VM_PROT_READ | VM_PROT_EXECUTE,
3786 		VM_PROT_READ | VM_PROT_EXECUTE,
3787 		VM_INHERIT_SHARE);
3788 	if (kr != KERN_SUCCESS) {
3789 		SHARED_REGION_TRACE_ERROR(
3790 			("commpage text: enter(%p,0x%llx,0x%llx) "
3791 			"commpage text %p mapping failed 0x%x\n",
3792 			(void *)VM_KERNEL_ADDRPERM(map),
3793 			(long long)commpage_text_address,
3794 			(long long)commpage_text_size,
3795 			(void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3796 	}
3797 
3798 	/*
3799 	 * Since we're here, we also pre-allocate some virtual space for the
3800 	 * Objective-C run-time, if needed...
3801 	 */
3802 	if (objc_size != 0) {
3803 		kr = mach_vm_map_kernel(
3804 			map,
3805 			vm_sanitize_wrap_addr_ref(&objc_address),
3806 			objc_size,
3807 			0,
3808 			vmk_flags,
3809 			IPC_PORT_NULL,
3810 			0,
3811 			FALSE,
3812 			VM_PROT_ALL,
3813 			VM_PROT_ALL,
3814 			VM_INHERIT_DEFAULT);
3815 		if (kr != KERN_SUCCESS) {
3816 			SHARED_REGION_TRACE_ERROR(
3817 				("commpage: enter(%p,0x%llx,0x%llx) "
3818 				"objc mapping failed 0x%x\n",
3819 				(void *)VM_KERNEL_ADDRPERM(map),
3820 				(long long)objc_address,
3821 				(long long)objc_size, kr));
3822 		}
3823 	}
3824 
3825 	SHARED_REGION_TRACE_DEBUG(
3826 		("commpage: enter(%p,%p) <- 0x%x\n",
3827 		(void *)VM_KERNEL_ADDRPERM(map),
3828 		(void *)VM_KERNEL_ADDRPERM(task), kr));
3829 	return kr;
3830 #endif
3831 }
3832 
3833 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3834 vm_shared_region_slide(
3835 	uint32_t slide,
3836 	mach_vm_offset_t        entry_start_address,
3837 	mach_vm_size_t          entry_size,
3838 	mach_vm_offset_t        slide_start,
3839 	mach_vm_size_t          slide_size,
3840 	mach_vm_offset_t        slid_mapping,
3841 	memory_object_control_t sr_file_control,
3842 	vm_prot_t               prot)
3843 {
3844 	vm_shared_region_t      sr;
3845 	kern_return_t           error;
3846 
3847 	SHARED_REGION_TRACE_DEBUG(
3848 		("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3849 		slide, entry_start_address, entry_size, slide_start, slide_size));
3850 
3851 	sr = vm_shared_region_get(current_task());
3852 	if (sr == NULL) {
3853 		printf("%s: no shared region?\n", __FUNCTION__);
3854 		SHARED_REGION_TRACE_DEBUG(
3855 			("vm_shared_region_slide: <- %d (no shared region)\n",
3856 			KERN_FAILURE));
3857 		return KERN_FAILURE;
3858 	}
3859 
3860 	/*
3861 	 * Protect from concurrent access.
3862 	 */
3863 	vm_shared_region_lock();
3864 	while (sr->sr_slide_in_progress) {
3865 		vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3866 	}
3867 
3868 	sr->sr_slide_in_progress = current_thread();
3869 	vm_shared_region_unlock();
3870 
3871 	error = vm_shared_region_slide_mapping(sr,
3872 	    (user_addr_t)slide_start,
3873 	    slide_size,
3874 	    entry_start_address,
3875 	    entry_size,
3876 	    slid_mapping,
3877 	    slide,
3878 	    sr_file_control,
3879 	    prot);
3880 	if (error) {
3881 		printf("slide_info initialization failed with kr=%d\n", error);
3882 	}
3883 
3884 	vm_shared_region_lock();
3885 
3886 	assert(sr->sr_slide_in_progress == current_thread());
3887 	sr->sr_slide_in_progress = THREAD_NULL;
3888 	vm_shared_region_wakeup(&sr->sr_slide_in_progress);
3889 
3890 #if XNU_TARGET_OS_OSX
3891 	if (error == KERN_SUCCESS) {
3892 		shared_region_completed_slide = TRUE;
3893 	}
3894 #endif /* XNU_TARGET_OS_OSX */
3895 	vm_shared_region_unlock();
3896 
3897 	vm_shared_region_deallocate(sr);
3898 
3899 	SHARED_REGION_TRACE_DEBUG(
3900 		("vm_shared_region_slide: <- %d\n",
3901 		error));
3902 
3903 	return error;
3904 }
3905 
3906 /*
3907  * Used during Authenticated Root Volume macOS boot.
3908  * Launchd re-execs itself and wants the new launchd to use
3909  * the shared cache from the new root volume. This call
3910  * makes all the existing shared caches stale to allow
3911  * that to happen.
3912  */
3913 void
vm_shared_region_pivot(void)3914 vm_shared_region_pivot(void)
3915 {
3916 	vm_shared_region_t      shared_region = NULL;
3917 
3918 	vm_shared_region_lock();
3919 
3920 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3921 		assert(shared_region->sr_ref_count > 0);
3922 		shared_region->sr_stale = TRUE;
3923 		if (shared_region->sr_timer_call) {
3924 			/*
3925 			 * We have a shared region ready to be destroyed
3926 			 * and just waiting for a delayed timer to fire.
3927 			 * Marking it stale cements its ineligibility to
3928 			 * be used ever again. So let's shorten the timer
3929 			 * aggressively down to 10 milliseconds and get rid of it.
3930 			 * This is a single quantum and we don't need to go
3931 			 * shorter than this duration. We want it to be short
3932 			 * enough, however, because we could have an unmount
3933 			 * of the volume hosting this shared region just behind
3934 			 * us.
3935 			 */
3936 			uint64_t deadline;
3937 			assert(shared_region->sr_ref_count == 1);
3938 
3939 			/*
3940 			 * Free the old timer call. Returns with a reference held.
3941 			 * If the old timer has fired and is waiting for the vm_shared_region_lock
3942 			 * lock, we will just return with an additional ref_count i.e. 2.
3943 			 * The old timer will then fire and just drop the ref count down to 1
3944 			 * with no other modifications.
3945 			 */
3946 			vm_shared_region_reference_locked(shared_region);
3947 
3948 			/* set up the timer. Keep the reference from above for this timer.*/
3949 			shared_region->sr_timer_call = thread_call_allocate(
3950 				(thread_call_func_t) vm_shared_region_timeout,
3951 				(thread_call_param_t) shared_region);
3952 
3953 			/* schedule the timer */
3954 			clock_interval_to_deadline(10, /* 10 milliseconds */
3955 			    NSEC_PER_MSEC,
3956 			    &deadline);
3957 			thread_call_enter_delayed(shared_region->sr_timer_call,
3958 			    deadline);
3959 
3960 			SHARED_REGION_TRACE_DEBUG(
3961 				("shared_region: pivot(%p): armed timer\n",
3962 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
3963 		}
3964 	}
3965 
3966 	vm_shared_region_unlock();
3967 }
3968 
3969 /*
3970  * Routine to mark any non-standard slide shared cache region as stale.
3971  * This causes the next "reslide" spawn to create a new shared region.
3972  */
3973 void
vm_shared_region_reslide_stale(boolean_t driverkit)3974 vm_shared_region_reslide_stale(boolean_t driverkit)
3975 {
3976 #if __has_feature(ptrauth_calls)
3977 	vm_shared_region_t      shared_region = NULL;
3978 
3979 	vm_shared_region_lock();
3980 
3981 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3982 		assert(shared_region->sr_ref_count > 0);
3983 		if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3984 			shared_region->sr_stale = TRUE;
3985 			vm_shared_region_reslide_count++;
3986 		}
3987 	}
3988 
3989 	vm_shared_region_unlock();
3990 #else
3991 	(void)driverkit;
3992 #endif /* __has_feature(ptrauth_calls) */
3993 }
3994 
3995 /*
3996  * report if the task is using a reslide shared cache region.
3997  */
3998 bool
vm_shared_region_is_reslide(__unused struct task * task)3999 vm_shared_region_is_reslide(__unused struct task *task)
4000 {
4001 	bool is_reslide = FALSE;
4002 #if __has_feature(ptrauth_calls)
4003 	vm_shared_region_t sr = vm_shared_region_get(task);
4004 
4005 	if (sr != NULL) {
4006 		is_reslide = sr->sr_reslide;
4007 		vm_shared_region_deallocate(sr);
4008 	}
4009 #endif /* __has_feature(ptrauth_calls) */
4010 	return is_reslide;
4011 }
4012 
4013 /*
4014  * This is called from powermanagement code to let kernel know the current source of power.
4015  * 0 if it is external source (connected to power )
4016  * 1 if it is internal power source ie battery
4017  */
4018 void
4019 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)4020 post_sys_powersource(int i)
4021 #else /* XNU_TARGET_OS_OSX */
4022 post_sys_powersource(__unused int i)
4023 #endif /* XNU_TARGET_OS_OSX */
4024 {
4025 #if XNU_TARGET_OS_OSX
4026 	post_sys_powersource_internal(i, 0);
4027 #endif /* XNU_TARGET_OS_OSX */
4028 }
4029 
4030 
4031 #if XNU_TARGET_OS_OSX
4032 static void
post_sys_powersource_internal(int i,int internal)4033 post_sys_powersource_internal(int i, int internal)
4034 {
4035 	if (internal == 0) {
4036 		__system_power_source = i;
4037 	}
4038 }
4039 #endif /* XNU_TARGET_OS_OSX */
4040 
4041 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)4042 vm_shared_region_root_dir(
4043 	struct vm_shared_region *sr)
4044 {
4045 	void *vnode;
4046 
4047 	vm_shared_region_lock();
4048 	vnode = sr->sr_root_dir;
4049 	vm_shared_region_unlock();
4050 	return vnode;
4051 }
4052