xref: /xnu-8019.80.24/osfmk/vm/vm_shared_region.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http://www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * Shared region (... and comm page)
26  *
27  * This file handles the VM shared region and comm page.
28  *
29  */
30 /*
31  * SHARED REGIONS
32  * --------------
33  *
34  * A shared region is a submap that contains the most common system shared
35  * libraries for a given environment which is defined by:
36  * - cpu-type
37  * - 64-bitness
38  * - root directory
39  * - Team ID - when we have pointer authentication.
40  *
41  * The point of a shared region is to reduce the setup overhead when exec'ing
42  * a new process. A shared region uses a shared VM submap that gets mapped
43  * automatically at exec() time, see vm_map_exec().  The first process of a given
44  * environment sets up the shared region and all further processes in that
45  * environment can re-use that shared region without having to re-create
46  * the same mappings in their VM map.  All they need is contained in the shared
47  * region.
48  *
49  * The region can also share a pmap (mostly for read-only parts but also for the
50  * initial version of some writable parts), which gets "nested" into the
51  * process's pmap.  This reduces the number of soft faults:  once one process
52  * brings in a page in the shared region, all the other processes can access
53  * it without having to enter it in their own pmap.
54  *
55  * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56  * to map the appropriate shared region in the process's address space.
57  * We look up the appropriate shared region for the process's environment.
58  * If we can't find one, we create a new (empty) one and add it to the list.
59  * Otherwise, we just take an extra reference on the shared region we found.
60  *
61  * The "dyld" runtime, mapped into the process's address space at exec() time,
62  * will then use the shared_region_check_np() and shared_region_map_and_slide_np()
63  * system calls to validate and/or populate the shared region with the
64  * appropriate dyld_shared_cache file.
65  *
66  * The shared region is inherited on fork() and the child simply takes an
67  * extra reference on its parent's shared region.
68  *
69  * When the task terminates, we release the reference on its shared region.
70  * When the last reference is released, we destroy the shared region.
71  *
72  * After a chroot(), the calling process keeps using its original shared region,
73  * since that's what was mapped when it was started.  But its children
74  * will use a different shared region, because they need to use the shared
75  * cache that's relative to the new root directory.
76  */
77 
78 /*
79  * COMM PAGE
80  *
81  * A "comm page" is an area of memory that is populated by the kernel with
82  * the appropriate platform-specific version of some commonly used code.
83  * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84  * for the native cpu-type.  No need to overly optimize translated code
85  * for hardware that is not really there !
86  *
87  * The comm pages are created and populated at boot time.
88  *
89  * The appropriate comm page is mapped into a process's address space
90  * at exec() time, in vm_map_exec(). It is then inherited on fork().
91  *
92  * The comm page is shared between the kernel and all applications of
93  * a given platform. Only the kernel can modify it.
94  *
95  * Applications just branch to fixed addresses in the comm page and find
96  * the right version of the code for the platform.  There is also some
97  * data provided and updated by the kernel for processes to retrieve easily
98  * without having to do a system call.
99  */
100 
101 #include <debug.h>
102 
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106 
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109 
110 #include <vm/vm_map.h>
111 #include <vm/vm_shared_region.h>
112 
113 #include <vm/vm_protos.h>
114 
115 #include <machine/commpage.h>
116 #include <machine/cpu_capabilities.h>
117 #include <sys/random.h>
118 
119 #if defined (__arm__) || defined(__arm64__)
120 #include <arm/cpu_data_internal.h>
121 #include <arm/misc_protos.h>
122 #endif
123 
124 /*
125  * the following codes are used in the  subclass
126  * of the DBG_MACH_SHAREDREGION class
127  */
128 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
129 
130 #if __has_feature(ptrauth_calls)
131 #include <ptrauth.h>
132 #endif /* __has_feature(ptrauth_calls) */
133 
134 /* "dyld" uses this to figure out what the kernel supports */
135 int shared_region_version = 3;
136 
137 /* trace level, output is sent to the system log file */
138 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
139 
140 /* should local (non-chroot) shared regions persist when no task uses them ? */
141 int shared_region_persistence = 0;      /* no by default */
142 
143 
144 /* delay in seconds before reclaiming an unused shared region */
145 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
146 
147 /*
148  * Cached pointer to the most recently mapped shared region from PID 1, which should
149  * be the most commonly mapped shared region in the system.  There are many processes
150  * which do not use this, for a variety of reasons.
151  *
152  * The main consumer of this is stackshot.
153  */
154 struct vm_shared_region *primary_system_shared_region = NULL;
155 
156 #if XNU_TARGET_OS_OSX
157 /*
158  * Only one cache gets to slide on Desktop, since we can't
159  * tear down slide info properly today and the desktop actually
160  * produces lots of shared caches.
161  */
162 boolean_t shared_region_completed_slide = FALSE;
163 #endif /* XNU_TARGET_OS_OSX */
164 
165 /* this lock protects all the shared region data structures */
166 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
167 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
168 
169 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
170 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
171 #define vm_shared_region_sleep(event, interruptible)                    \
172 	lck_mtx_sleep(&vm_shared_region_lock,                           \
173 	              LCK_SLEEP_DEFAULT,                                \
174 	              (event_t) (event),                                \
175 	              (interruptible))
176 
177 /* the list of currently available shared regions (one per environment) */
178 queue_head_t    vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
179 int             vm_shared_region_count = 0;
180 int             vm_shared_region_peak = 0;
181 
182 /*
183  * the number of times an event has forced the recalculation of the reslide
184  * shared region slide.
185  */
186 #if __has_feature(ptrauth_calls)
187 int                             vm_shared_region_reslide_count = 0;
188 #endif /* __has_feature(ptrauth_calls) */
189 
190 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
191 static vm_shared_region_t vm_shared_region_create(
192 	void                    *root_dir,
193 	cpu_type_t              cputype,
194 	cpu_subtype_t           cpu_subtype,
195 	boolean_t               is_64bit,
196 	boolean_t               reslide,
197 	boolean_t               is_driverkit);
198 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
199 
200 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
201 static void vm_shared_region_timeout(thread_call_param_t param0,
202     thread_call_param_t param1);
203 static kern_return_t vm_shared_region_slide_mapping(
204 	vm_shared_region_t sr,
205 	user_addr_t        slide_info_addr,
206 	mach_vm_size_t     slide_info_size,
207 	mach_vm_offset_t   start,
208 	mach_vm_size_t     size,
209 	mach_vm_offset_t   slid_mapping,
210 	uint32_t           slide,
211 	memory_object_control_t,
212 	vm_prot_t          prot); /* forward */
213 
214 static int __commpage_setup = 0;
215 #if XNU_TARGET_OS_OSX
216 static int __system_power_source = 1;   /* init to extrnal power source */
217 static void post_sys_powersource_internal(int i, int internal);
218 #endif /* XNU_TARGET_OS_OSX */
219 
220 extern u_int32_t random(void);
221 
222 /*
223  * Retrieve a task's shared region and grab an extra reference to
224  * make sure it doesn't disappear while the caller is using it.
225  * The caller is responsible for consuming that extra reference if
226  * necessary.
227  */
228 vm_shared_region_t
vm_shared_region_get(task_t task)229 vm_shared_region_get(
230 	task_t          task)
231 {
232 	vm_shared_region_t      shared_region;
233 
234 	SHARED_REGION_TRACE_DEBUG(
235 		("shared_region: -> get(%p)\n",
236 		(void *)VM_KERNEL_ADDRPERM(task)));
237 
238 	task_lock(task);
239 	vm_shared_region_lock();
240 	shared_region = task->shared_region;
241 	if (shared_region) {
242 		assert(shared_region->sr_ref_count > 0);
243 		vm_shared_region_reference_locked(shared_region);
244 	}
245 	vm_shared_region_unlock();
246 	task_unlock(task);
247 
248 	SHARED_REGION_TRACE_DEBUG(
249 		("shared_region: get(%p) <- %p\n",
250 		(void *)VM_KERNEL_ADDRPERM(task),
251 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
252 
253 	return shared_region;
254 }
255 
256 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)257 vm_shared_region_vm_map(
258 	vm_shared_region_t      shared_region)
259 {
260 	ipc_port_t              sr_handle;
261 	vm_named_entry_t        sr_mem_entry;
262 	vm_map_t                sr_map;
263 
264 	SHARED_REGION_TRACE_DEBUG(
265 		("shared_region: -> vm_map(%p)\n",
266 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
267 	assert(shared_region->sr_ref_count > 0);
268 
269 	sr_handle = shared_region->sr_mem_entry;
270 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
271 	sr_map = sr_mem_entry->backing.map;
272 	assert(sr_mem_entry->is_sub_map);
273 
274 	SHARED_REGION_TRACE_DEBUG(
275 		("shared_region: vm_map(%p) <- %p\n",
276 		(void *)VM_KERNEL_ADDRPERM(shared_region),
277 		(void *)VM_KERNEL_ADDRPERM(sr_map)));
278 	return sr_map;
279 }
280 
281 /*
282  * Set the shared region the process should use.
283  * A NULL new shared region means that we just want to release the old
284  * shared region.
285  * The caller should already have an extra reference on the new shared region
286  * (if any).  We release a reference on the old shared region (if any).
287  */
288 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)289 vm_shared_region_set(
290 	task_t                  task,
291 	vm_shared_region_t      new_shared_region)
292 {
293 	vm_shared_region_t      old_shared_region;
294 
295 	SHARED_REGION_TRACE_DEBUG(
296 		("shared_region: -> set(%p, %p)\n",
297 		(void *)VM_KERNEL_ADDRPERM(task),
298 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
299 
300 	task_lock(task);
301 	vm_shared_region_lock();
302 
303 	old_shared_region = task->shared_region;
304 	if (new_shared_region) {
305 		assert(new_shared_region->sr_ref_count > 0);
306 	}
307 
308 	task->shared_region = new_shared_region;
309 
310 	vm_shared_region_unlock();
311 	task_unlock(task);
312 
313 	if (old_shared_region) {
314 		assert(old_shared_region->sr_ref_count > 0);
315 		vm_shared_region_deallocate(old_shared_region);
316 	}
317 
318 	SHARED_REGION_TRACE_DEBUG(
319 		("shared_region: set(%p) <- old=%p new=%p\n",
320 		(void *)VM_KERNEL_ADDRPERM(task),
321 		(void *)VM_KERNEL_ADDRPERM(old_shared_region),
322 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
323 }
324 
325 /*
326  * New arm64 shared regions match with an existing arm64e region.
327  * They just get a private non-authenticating pager.
328  */
329 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)330 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
331 {
332 	if (exist == new) {
333 		return true;
334 	}
335 	if (cputype == CPU_TYPE_ARM64 &&
336 	    exist == CPU_SUBTYPE_ARM64E &&
337 	    new == CPU_SUBTYPE_ARM64_ALL) {
338 		return true;
339 	}
340 	return false;
341 }
342 
343 
344 /*
345  * Lookup up the shared region for the desired environment.
346  * If none is found, create a new (empty) one.
347  * Grab an extra reference on the returned shared region, to make sure
348  * it doesn't get destroyed before the caller is done with it.  The caller
349  * is responsible for consuming that extra reference if necessary.
350  */
351 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,boolean_t reslide,boolean_t is_driverkit)352 vm_shared_region_lookup(
353 	void            *root_dir,
354 	cpu_type_t      cputype,
355 	cpu_subtype_t   cpu_subtype,
356 	boolean_t       is_64bit,
357 	boolean_t       reslide,
358 	boolean_t       is_driverkit)
359 {
360 	vm_shared_region_t      shared_region;
361 	vm_shared_region_t      new_shared_region;
362 
363 	SHARED_REGION_TRACE_DEBUG(
364 		("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d)\n",
365 		(void *)VM_KERNEL_ADDRPERM(root_dir),
366 		cputype, cpu_subtype, is_64bit, reslide, is_driverkit));
367 
368 	shared_region = NULL;
369 	new_shared_region = NULL;
370 
371 	vm_shared_region_lock();
372 	for (;;) {
373 		queue_iterate(&vm_shared_region_queue,
374 		    shared_region,
375 		    vm_shared_region_t,
376 		    sr_q) {
377 			assert(shared_region->sr_ref_count > 0);
378 			if (shared_region->sr_cpu_type == cputype &&
379 			    match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
380 			    shared_region->sr_root_dir == root_dir &&
381 			    shared_region->sr_64bit == is_64bit &&
382 #if __has_feature(ptrauth_calls)
383 			    shared_region->sr_reslide == reslide &&
384 #endif /* __has_feature(ptrauth_calls) */
385 			    shared_region->sr_driverkit == is_driverkit &&
386 			    !shared_region->sr_stale) {
387 				/* found a match ! */
388 				vm_shared_region_reference_locked(shared_region);
389 				goto done;
390 			}
391 		}
392 		if (new_shared_region == NULL) {
393 			/* no match: create a new one */
394 			vm_shared_region_unlock();
395 			new_shared_region = vm_shared_region_create(root_dir,
396 			    cputype,
397 			    cpu_subtype,
398 			    is_64bit,
399 			    reslide,
400 			    is_driverkit);
401 			/* do the lookup again, in case we lost a race */
402 			vm_shared_region_lock();
403 			continue;
404 		}
405 		/* still no match: use our new one */
406 		shared_region = new_shared_region;
407 		new_shared_region = NULL;
408 		queue_enter(&vm_shared_region_queue,
409 		    shared_region,
410 		    vm_shared_region_t,
411 		    sr_q);
412 		vm_shared_region_count++;
413 		if (vm_shared_region_count > vm_shared_region_peak) {
414 			vm_shared_region_peak = vm_shared_region_count;
415 		}
416 		break;
417 	}
418 
419 done:
420 	vm_shared_region_unlock();
421 
422 	if (new_shared_region) {
423 		/*
424 		 * We lost a race with someone else to create a new shared
425 		 * region for that environment. Get rid of our unused one.
426 		 */
427 		assert(new_shared_region->sr_ref_count == 1);
428 		new_shared_region->sr_ref_count--;
429 		vm_shared_region_destroy(new_shared_region);
430 		new_shared_region = NULL;
431 	}
432 
433 	SHARED_REGION_TRACE_DEBUG(
434 		("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d) <- %p\n",
435 		(void *)VM_KERNEL_ADDRPERM(root_dir),
436 		cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
437 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
438 
439 	assert(shared_region->sr_ref_count > 0);
440 	return shared_region;
441 }
442 
443 /*
444  * Take an extra reference on a shared region.
445  * The vm_shared_region_lock should already be held by the caller.
446  */
447 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)448 vm_shared_region_reference_locked(
449 	vm_shared_region_t      shared_region)
450 {
451 	LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
452 
453 	SHARED_REGION_TRACE_DEBUG(
454 		("shared_region: -> reference_locked(%p)\n",
455 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
456 	assert(shared_region->sr_ref_count > 0);
457 	shared_region->sr_ref_count++;
458 	assert(shared_region->sr_ref_count != 0);
459 
460 	if (shared_region->sr_timer_call != NULL) {
461 		boolean_t cancelled;
462 
463 		/* cancel and free any pending timeout */
464 		cancelled = thread_call_cancel(shared_region->sr_timer_call);
465 		if (cancelled) {
466 			thread_call_free(shared_region->sr_timer_call);
467 			shared_region->sr_timer_call = NULL;
468 			/* release the reference held by the cancelled timer */
469 			shared_region->sr_ref_count--;
470 		} else {
471 			/* the timer will drop the reference and free itself */
472 		}
473 	}
474 
475 	SHARED_REGION_TRACE_DEBUG(
476 		("shared_region: reference_locked(%p) <- %d\n",
477 		(void *)VM_KERNEL_ADDRPERM(shared_region),
478 		shared_region->sr_ref_count));
479 }
480 
481 /*
482  * Take a reference on a shared region.
483  */
484 void
vm_shared_region_reference(vm_shared_region_t shared_region)485 vm_shared_region_reference(vm_shared_region_t shared_region)
486 {
487 	SHARED_REGION_TRACE_DEBUG(
488 		("shared_region: -> reference(%p)\n",
489 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
490 
491 	vm_shared_region_lock();
492 	vm_shared_region_reference_locked(shared_region);
493 	vm_shared_region_unlock();
494 
495 	SHARED_REGION_TRACE_DEBUG(
496 		("shared_region: reference(%p) <- %d\n",
497 		(void *)VM_KERNEL_ADDRPERM(shared_region),
498 		shared_region->sr_ref_count));
499 }
500 
501 /*
502  * Release a reference on the shared region.
503  * Destroy it if there are no references left.
504  */
505 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)506 vm_shared_region_deallocate(
507 	vm_shared_region_t      shared_region)
508 {
509 	SHARED_REGION_TRACE_DEBUG(
510 		("shared_region: -> deallocate(%p)\n",
511 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
512 
513 	vm_shared_region_lock();
514 
515 	assert(shared_region->sr_ref_count > 0);
516 
517 	if (shared_region->sr_root_dir == NULL) {
518 		/*
519 		 * Local (i.e. based on the boot volume) shared regions
520 		 * can persist or not based on the "shared_region_persistence"
521 		 * sysctl.
522 		 * Make sure that this one complies.
523 		 *
524 		 * See comments in vm_shared_region_slide() for notes about
525 		 * shared regions we have slid (which are not torn down currently).
526 		 */
527 		if (shared_region_persistence &&
528 		    !shared_region->sr_persists) {
529 			/* make this one persistent */
530 			shared_region->sr_ref_count++;
531 			shared_region->sr_persists = TRUE;
532 		} else if (!shared_region_persistence &&
533 		    shared_region->sr_persists) {
534 			/* make this one no longer persistent */
535 			assert(shared_region->sr_ref_count > 1);
536 			shared_region->sr_ref_count--;
537 			shared_region->sr_persists = FALSE;
538 		}
539 	}
540 
541 	assert(shared_region->sr_ref_count > 0);
542 	shared_region->sr_ref_count--;
543 	SHARED_REGION_TRACE_DEBUG(
544 		("shared_region: deallocate(%p): ref now %d\n",
545 		(void *)VM_KERNEL_ADDRPERM(shared_region),
546 		shared_region->sr_ref_count));
547 
548 	if (shared_region->sr_ref_count == 0) {
549 		uint64_t deadline;
550 
551 		/*
552 		 * Even though a shared region is unused, delay a while before
553 		 * tearing it down, in case a new app launch can use it.
554 		 */
555 		if (shared_region->sr_timer_call == NULL &&
556 		    shared_region_destroy_delay != 0 &&
557 		    !shared_region->sr_stale) {
558 			/* hold one reference for the timer */
559 			assert(!shared_region->sr_mapping_in_progress);
560 			shared_region->sr_ref_count++;
561 
562 			/* set up the timer */
563 			shared_region->sr_timer_call = thread_call_allocate(
564 				(thread_call_func_t) vm_shared_region_timeout,
565 				(thread_call_param_t) shared_region);
566 
567 			/* schedule the timer */
568 			clock_interval_to_deadline(shared_region_destroy_delay,
569 			    NSEC_PER_SEC,
570 			    &deadline);
571 			thread_call_enter_delayed(shared_region->sr_timer_call,
572 			    deadline);
573 
574 			SHARED_REGION_TRACE_DEBUG(
575 				("shared_region: deallocate(%p): armed timer\n",
576 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
577 
578 			vm_shared_region_unlock();
579 		} else {
580 			/* timer expired: let go of this shared region */
581 
582 			/* Make sure there's no cached pointer to the region. */
583 			if (primary_system_shared_region == shared_region) {
584 				primary_system_shared_region = NULL;
585 			}
586 
587 			/*
588 			 * Remove it from the queue first, so no one can find
589 			 * it...
590 			 */
591 			queue_remove(&vm_shared_region_queue,
592 			    shared_region,
593 			    vm_shared_region_t,
594 			    sr_q);
595 			vm_shared_region_count--;
596 			vm_shared_region_unlock();
597 
598 			/* ... and destroy it */
599 			vm_shared_region_destroy(shared_region);
600 			shared_region = NULL;
601 		}
602 	} else {
603 		vm_shared_region_unlock();
604 	}
605 
606 	SHARED_REGION_TRACE_DEBUG(
607 		("shared_region: deallocate(%p) <-\n",
608 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
609 }
610 
611 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)612 vm_shared_region_timeout(
613 	thread_call_param_t     param0,
614 	__unused thread_call_param_t    param1)
615 {
616 	vm_shared_region_t      shared_region;
617 
618 	shared_region = (vm_shared_region_t) param0;
619 
620 	vm_shared_region_deallocate(shared_region);
621 }
622 
623 
624 /*
625  * Create a new (empty) shared region for a new environment.
626  */
627 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,__unused boolean_t reslide,boolean_t is_driverkit)628 vm_shared_region_create(
629 	void                    *root_dir,
630 	cpu_type_t              cputype,
631 	cpu_subtype_t           cpu_subtype,
632 	boolean_t               is_64bit,
633 #if !__has_feature(ptrauth_calls)
634 	__unused
635 #endif /* __has_feature(ptrauth_calls) */
636 	boolean_t               reslide,
637 	boolean_t               is_driverkit)
638 {
639 	kern_return_t           kr;
640 	vm_named_entry_t        mem_entry;
641 	ipc_port_t              mem_entry_port;
642 	vm_shared_region_t      shared_region;
643 	vm_map_t                sub_map;
644 	mach_vm_offset_t        base_address, pmap_nesting_start;
645 	mach_vm_size_t          size, pmap_nesting_size;
646 
647 	SHARED_REGION_TRACE_INFO(
648 		("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d)\n",
649 		(void *)VM_KERNEL_ADDRPERM(root_dir),
650 		cputype, cpu_subtype, is_64bit, reslide, is_driverkit));
651 
652 	base_address = 0;
653 	size = 0;
654 	mem_entry = NULL;
655 	mem_entry_port = IPC_PORT_NULL;
656 	sub_map = VM_MAP_NULL;
657 
658 	/* create a new shared region structure... */
659 	shared_region = kalloc_type(struct vm_shared_region,
660 	    Z_WAITOK | Z_NOFAIL);
661 
662 	/* figure out the correct settings for the desired environment */
663 	if (is_64bit) {
664 		switch (cputype) {
665 #if defined(__arm64__)
666 		case CPU_TYPE_ARM64:
667 			base_address = SHARED_REGION_BASE_ARM64;
668 			size = SHARED_REGION_SIZE_ARM64;
669 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
670 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
671 			break;
672 #elif !defined(__arm__)
673 		case CPU_TYPE_I386:
674 			base_address = SHARED_REGION_BASE_X86_64;
675 			size = SHARED_REGION_SIZE_X86_64;
676 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
677 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
678 			break;
679 		case CPU_TYPE_POWERPC:
680 			base_address = SHARED_REGION_BASE_PPC64;
681 			size = SHARED_REGION_SIZE_PPC64;
682 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
683 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
684 			break;
685 #endif
686 		default:
687 			SHARED_REGION_TRACE_ERROR(
688 				("shared_region: create: unknown cpu type %d\n",
689 				cputype));
690 			kfree_type(struct vm_shared_region, shared_region);
691 			shared_region = NULL;
692 			goto done;
693 		}
694 	} else {
695 		switch (cputype) {
696 #if defined(__arm__) || defined(__arm64__)
697 		case CPU_TYPE_ARM:
698 			base_address = SHARED_REGION_BASE_ARM;
699 			size = SHARED_REGION_SIZE_ARM;
700 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
701 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
702 			break;
703 #else
704 		case CPU_TYPE_I386:
705 			base_address = SHARED_REGION_BASE_I386;
706 			size = SHARED_REGION_SIZE_I386;
707 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
708 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
709 			break;
710 		case CPU_TYPE_POWERPC:
711 			base_address = SHARED_REGION_BASE_PPC;
712 			size = SHARED_REGION_SIZE_PPC;
713 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
714 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
715 			break;
716 #endif
717 		default:
718 			SHARED_REGION_TRACE_ERROR(
719 				("shared_region: create: unknown cpu type %d\n",
720 				cputype));
721 			kfree_type(struct vm_shared_region, shared_region);
722 			shared_region = NULL;
723 			goto done;
724 		}
725 	}
726 
727 	/* create a memory entry structure and a Mach port handle */
728 	kr = mach_memory_entry_allocate(&mem_entry, &mem_entry_port);
729 	if (kr != KERN_SUCCESS) {
730 		kfree_type(struct vm_shared_region, shared_region);
731 		shared_region = NULL;
732 		SHARED_REGION_TRACE_ERROR(
733 			("shared_region: create: "
734 			"couldn't allocate mem_entry\n"));
735 		goto done;
736 	}
737 
738 #if     defined(__arm__) || defined(__arm64__)
739 	{
740 		struct pmap *pmap_nested;
741 		int pmap_flags = 0;
742 		pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
743 
744 
745 		pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
746 		if (pmap_nested != PMAP_NULL) {
747 			pmap_set_nested(pmap_nested);
748 			sub_map = vm_map_create(pmap_nested, 0, (vm_map_offset_t)size, TRUE);
749 			if (sub_map == VM_MAP_NULL) {
750 				pmap_destroy(pmap_nested);
751 			} else {
752 #if defined(__arm64__)
753 				if (is_64bit ||
754 				    page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
755 					/* enforce 16KB alignment of VM map entries */
756 					vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
757 				}
758 
759 #elif (__ARM_ARCH_7K__ >= 2)
760 				/* enforce 16KB alignment for watch targets with new ABI */
761 				vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
762 #endif /* __arm64__ */
763 			}
764 		} else {
765 			sub_map = VM_MAP_NULL;
766 		}
767 	}
768 #else /* defined(__arm__) || defined(__arm64__) */
769 	{
770 		/* create a VM sub map and its pmap */
771 		pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
772 		if (pmap != NULL) {
773 			sub_map = vm_map_create(pmap, 0, size, TRUE);
774 			if (sub_map == VM_MAP_NULL) {
775 				pmap_destroy(pmap);
776 			}
777 		} else {
778 			sub_map = VM_MAP_NULL;
779 		}
780 	}
781 #endif /* defined(__arm__) || defined(__arm64__) */
782 	if (sub_map == VM_MAP_NULL) {
783 		ipc_port_release_send(mem_entry_port);
784 		kfree_type(struct vm_shared_region, shared_region);
785 		shared_region = NULL;
786 		SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
787 		goto done;
788 	}
789 
790 	/* shared regions should always enforce code-signing */
791 	vm_map_cs_enforcement_set(sub_map, true);
792 	assert(vm_map_cs_enforcement(sub_map));
793 	assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
794 
795 	assert(!sub_map->disable_vmentry_reuse);
796 	sub_map->is_nested_map = TRUE;
797 
798 	/* make the memory entry point to the VM sub map */
799 	mem_entry->is_sub_map = TRUE;
800 	mem_entry->backing.map = sub_map;
801 	mem_entry->size = size;
802 	mem_entry->protection = VM_PROT_ALL;
803 
804 	/* make the shared region point at the memory entry */
805 	shared_region->sr_mem_entry = mem_entry_port;
806 
807 	/* fill in the shared region's environment and settings */
808 	shared_region->sr_base_address = base_address;
809 	shared_region->sr_size = size;
810 	shared_region->sr_pmap_nesting_start = pmap_nesting_start;
811 	shared_region->sr_pmap_nesting_size = pmap_nesting_size;
812 	shared_region->sr_cpu_type = cputype;
813 	shared_region->sr_cpu_subtype = cpu_subtype;
814 	shared_region->sr_64bit = (uint8_t)is_64bit;
815 	shared_region->sr_driverkit = (uint8_t)is_driverkit;
816 	shared_region->sr_root_dir = root_dir;
817 
818 	queue_init(&shared_region->sr_q);
819 	shared_region->sr_mapping_in_progress = FALSE;
820 	shared_region->sr_slide_in_progress = FALSE;
821 	shared_region->sr_persists = FALSE;
822 	shared_region->sr_stale = FALSE;
823 	shared_region->sr_timer_call = NULL;
824 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
825 
826 	/* grab a reference for the caller */
827 	shared_region->sr_ref_count = 1;
828 
829 	shared_region->sr_slide = 0; /* not slid yet */
830 
831 	/* Initialize UUID and other metadata */
832 	memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
833 	shared_region->sr_uuid_copied = FALSE;
834 	shared_region->sr_images_count = 0;
835 	shared_region->sr_images = NULL;
836 #if __has_feature(ptrauth_calls)
837 	shared_region->sr_reslide = reslide;
838 	shared_region->sr_num_auth_section = 0;
839 	for (uint_t i = 0; i < NUM_SR_AUTH_SECTIONS; ++i) {
840 		shared_region->sr_auth_section[i] = NULL;
841 	}
842 	shared_region->sr_num_auth_section = 0;
843 #endif /* __has_feature(ptrauth_calls) */
844 
845 done:
846 	if (shared_region) {
847 		SHARED_REGION_TRACE_INFO(
848 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
849 			"base=0x%llx,size=0x%llx) <- "
850 			"%p mem=(%p,%p) map=%p pmap=%p\n",
851 			(void *)VM_KERNEL_ADDRPERM(root_dir),
852 			cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
853 			(long long)base_address,
854 			(long long)size,
855 			(void *)VM_KERNEL_ADDRPERM(shared_region),
856 			(void *)VM_KERNEL_ADDRPERM(mem_entry_port),
857 			(void *)VM_KERNEL_ADDRPERM(mem_entry),
858 			(void *)VM_KERNEL_ADDRPERM(sub_map),
859 			(void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
860 	} else {
861 		SHARED_REGION_TRACE_INFO(
862 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
863 			"base=0x%llx,size=0x%llx) <- NULL",
864 			(void *)VM_KERNEL_ADDRPERM(root_dir),
865 			cputype, cpu_subtype, is_64bit, is_driverkit,
866 			(long long)base_address,
867 			(long long)size));
868 	}
869 	return shared_region;
870 }
871 
872 /*
873  * Destroy a now-unused shared region.
874  * The shared region is no longer in the queue and can not be looked up.
875  */
876 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)877 vm_shared_region_destroy(
878 	vm_shared_region_t      shared_region)
879 {
880 	vm_named_entry_t        mem_entry;
881 	vm_map_t                map;
882 
883 	SHARED_REGION_TRACE_INFO(
884 		("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
885 		(void *)VM_KERNEL_ADDRPERM(shared_region),
886 		(void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
887 		shared_region->sr_cpu_type,
888 		shared_region->sr_cpu_subtype,
889 		shared_region->sr_64bit,
890 		shared_region->sr_driverkit));
891 
892 	assert(shared_region->sr_ref_count == 0);
893 	assert(!shared_region->sr_persists);
894 
895 	mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
896 	assert(mem_entry->is_sub_map);
897 	assert(!mem_entry->internal);
898 	assert(!mem_entry->is_copy);
899 	map = mem_entry->backing.map;
900 
901 	/*
902 	 * Clean up the pmap first.  The virtual addresses that were
903 	 * entered in this possibly "nested" pmap may have different values
904 	 * than the VM map's min and max offsets, if the VM sub map was
905 	 * mapped at a non-zero offset in the processes' main VM maps, which
906 	 * is usually the case, so the clean-up we do in vm_map_destroy() would
907 	 * not be enough.
908 	 */
909 	if (map->pmap) {
910 		pmap_remove(map->pmap,
911 		    (vm_map_offset_t)shared_region->sr_base_address,
912 		    (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
913 	}
914 
915 	/*
916 	 * Release our (one and only) handle on the memory entry.
917 	 * This will generate a no-senders notification, which will be processed
918 	 * by ipc_kobject_notify_no_senders(), which will release the one and only
919 	 * reference on the memory entry and cause it to be destroyed, along
920 	 * with the VM sub map and its pmap.
921 	 */
922 	mach_memory_entry_port_release(shared_region->sr_mem_entry);
923 	mem_entry = NULL;
924 	shared_region->sr_mem_entry = IPC_PORT_NULL;
925 
926 	if (shared_region->sr_timer_call) {
927 		thread_call_free(shared_region->sr_timer_call);
928 	}
929 
930 #if __has_feature(ptrauth_calls)
931 	/*
932 	 * Free the cached copies of slide_info for the AUTH regions.
933 	 */
934 	for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
935 		vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
936 		if (si != NULL) {
937 			vm_object_deallocate(si->si_slide_object);
938 			kfree_data(si->si_slide_info_entry,
939 			    si->si_slide_info_size);
940 			kfree_type(struct vm_shared_region_slide_info, si);
941 			shared_region->sr_auth_section[i] = NULL;
942 		}
943 	}
944 	shared_region->sr_num_auth_section = 0;
945 #endif /* __has_feature(ptrauth_calls) */
946 
947 	/* release the shared region structure... */
948 	kfree_type(struct vm_shared_region, shared_region);
949 
950 	SHARED_REGION_TRACE_DEBUG(
951 		("shared_region: destroy(%p) <-\n",
952 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
953 	shared_region = NULL;
954 }
955 
956 /*
957  * Gets the address of the first (in time) mapping in the shared region.
958  * If used during initial task setup by dyld, task should non-NULL.
959  */
960 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)961 vm_shared_region_start_address(
962 	vm_shared_region_t      shared_region,
963 	mach_vm_offset_t        *start_address,
964 	task_t                  task)
965 {
966 	kern_return_t           kr;
967 	mach_vm_offset_t        sr_base_address;
968 	mach_vm_offset_t        sr_first_mapping;
969 
970 	SHARED_REGION_TRACE_DEBUG(
971 		("shared_region: -> start_address(%p)\n",
972 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
973 
974 	vm_shared_region_lock();
975 
976 	/*
977 	 * Wait if there's another thread establishing a mapping
978 	 * in this shared region right when we're looking at it.
979 	 * We want a consistent view of the map...
980 	 */
981 	while (shared_region->sr_mapping_in_progress) {
982 		/* wait for our turn... */
983 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
984 		    THREAD_UNINT);
985 	}
986 	assert(!shared_region->sr_mapping_in_progress);
987 	assert(shared_region->sr_ref_count > 0);
988 
989 	sr_base_address = shared_region->sr_base_address;
990 	sr_first_mapping = shared_region->sr_first_mapping;
991 
992 	if (sr_first_mapping == (mach_vm_offset_t) -1) {
993 		/* shared region is empty */
994 		kr = KERN_INVALID_ADDRESS;
995 	} else {
996 		kr = KERN_SUCCESS;
997 		*start_address = sr_base_address + sr_first_mapping;
998 	}
999 
1000 
1001 	uint32_t slide = shared_region->sr_slide;
1002 
1003 	vm_shared_region_unlock();
1004 
1005 	/*
1006 	 * Cache shared region info in the task for telemetry gathering, if we're
1007 	 * passed in the task. No task lock here as we're still in intial task set up.
1008 	 */
1009 	if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1010 		uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1011 		if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1012 		    (char *)&task->task_shared_region_uuid,
1013 		    sizeof(task->task_shared_region_uuid)) == 0) {
1014 			task->task_shared_region_slide = slide;
1015 		}
1016 	}
1017 
1018 	SHARED_REGION_TRACE_DEBUG(
1019 		("shared_region: start_address(%p) <- 0x%llx\n",
1020 		(void *)VM_KERNEL_ADDRPERM(shared_region),
1021 		(long long)shared_region->sr_base_address));
1022 
1023 	return kr;
1024 }
1025 
1026 /*
1027  * Look up a pre-existing mapping in shared region, for replacement.
1028  * Takes an extra object reference if found.
1029  */
1030 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1031 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1032 {
1033 	vm_map_entry_t found;
1034 
1035 	/* find the shared region's map entry to slide */
1036 	vm_map_lock_read(map);
1037 	if (!vm_map_lookup_entry(map, addr, &found)) {
1038 		/* no mapping there */
1039 		vm_map_unlock(map);
1040 		return KERN_INVALID_ARGUMENT;
1041 	}
1042 
1043 	*entry = *found;
1044 	/* extra ref to keep object alive while map is unlocked */
1045 	vm_object_reference(VME_OBJECT(found));
1046 	vm_map_unlock_read(map);
1047 	return KERN_SUCCESS;
1048 }
1049 
1050 #if __has_feature(ptrauth_calls)
1051 
1052 /*
1053  * Determine if this task is actually using pointer signing.
1054  */
1055 static boolean_t
task_sign_pointers(task_t task)1056 task_sign_pointers(task_t task)
1057 {
1058 	if (task->map &&
1059 	    task->map->pmap &&
1060 	    !task->map->pmap->disable_jop) {
1061 		return TRUE;
1062 	}
1063 	return FALSE;
1064 }
1065 
1066 /*
1067  * If the shared region contains mappings that are authenticated, then
1068  * remap them into the task private map.
1069  *
1070  * Failures are possible in this routine when jetsam kills a process
1071  * just as dyld is trying to set it up. The vm_map and task shared region
1072  * info get torn down w/o waiting for this thread to finish up.
1073  */
1074 __attribute__((noinline))
1075 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1076 vm_shared_region_auth_remap(vm_shared_region_t sr)
1077 {
1078 	memory_object_t               sr_pager = MEMORY_OBJECT_NULL;
1079 	task_t                        task = current_task();
1080 	vm_shared_region_slide_info_t si;
1081 	uint_t                        i;
1082 	vm_object_t                   object;
1083 	vm_map_t                      sr_map;
1084 	struct vm_map_entry           tmp_entry_store = {0};
1085 	vm_map_entry_t                tmp_entry = NULL;
1086 	int                           vm_flags;
1087 	vm_map_kernel_flags_t         vmk_flags;
1088 	vm_map_offset_t               map_addr;
1089 	kern_return_t                 kr = KERN_SUCCESS;
1090 	boolean_t                     use_ptr_auth = task_sign_pointers(task);
1091 
1092 	/*
1093 	 * Don't do this more than once and avoid any race conditions in finishing it.
1094 	 */
1095 	vm_shared_region_lock();
1096 	while (sr->sr_mapping_in_progress) {
1097 		/* wait for our turn... */
1098 		vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1099 	}
1100 	assert(!sr->sr_mapping_in_progress);
1101 	assert(sr->sr_ref_count > 0);
1102 
1103 	/* Just return if already done. */
1104 	if (task->shared_region_auth_remapped) {
1105 		vm_shared_region_unlock();
1106 		return KERN_SUCCESS;
1107 	}
1108 
1109 	/* let others know to wait while we're working in this shared region */
1110 	sr->sr_mapping_in_progress = TRUE;
1111 	vm_shared_region_unlock();
1112 
1113 	/*
1114 	 * Remap any sections with pointer authentications into the private map.
1115 	 */
1116 	for (i = 0; i < sr->sr_num_auth_section; ++i) {
1117 		si = sr->sr_auth_section[i];
1118 		assert(si != NULL);
1119 		assert(si->si_ptrauth);
1120 
1121 		/*
1122 		 * We have mapping that needs to be private.
1123 		 * Look for an existing slid mapping's pager with matching
1124 		 * object, offset, slide info and shared_region_id to reuse.
1125 		 */
1126 		object = si->si_slide_object;
1127 		sr_pager = shared_region_pager_match(object, si->si_start, si,
1128 		    use_ptr_auth ? task->jop_pid : 0);
1129 		if (sr_pager == MEMORY_OBJECT_NULL) {
1130 			kr = KERN_FAILURE;
1131 			goto done;
1132 		}
1133 
1134 		/*
1135 		 * verify matching jop_pid for this task and this pager
1136 		 */
1137 		if (use_ptr_auth) {
1138 			shared_region_pager_match_task_key(sr_pager, task);
1139 		}
1140 
1141 		sr_map = vm_shared_region_vm_map(sr);
1142 		tmp_entry = NULL;
1143 
1144 		kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1145 		if (kr != KERN_SUCCESS) {
1146 			goto done;
1147 		}
1148 		tmp_entry = &tmp_entry_store;
1149 
1150 		/*
1151 		 * Check that the object exactly covers the region to slide.
1152 		 */
1153 		if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1154 			kr = KERN_FAILURE;
1155 			goto done;
1156 		}
1157 
1158 		/*
1159 		 * map the pager over the portion of the mapping that needs sliding
1160 		 */
1161 		vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
1162 		vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1163 		vmk_flags.vmkf_overwrite_immutable = TRUE;
1164 		map_addr = si->si_slid_address;
1165 		kr = vm_map_enter_mem_object(task->map,
1166 		    &map_addr,
1167 		    si->si_end - si->si_start,
1168 		    (mach_vm_offset_t) 0,
1169 		    vm_flags,
1170 		    vmk_flags,
1171 		    VM_KERN_MEMORY_NONE,
1172 		    (ipc_port_t)(uintptr_t) sr_pager,
1173 		    0,
1174 		    TRUE,
1175 		    tmp_entry->protection,
1176 		    tmp_entry->max_protection,
1177 		    tmp_entry->inheritance);
1178 		memory_object_deallocate(sr_pager);
1179 		sr_pager = MEMORY_OBJECT_NULL;
1180 		if (kr != KERN_SUCCESS) {
1181 			goto done;
1182 		}
1183 		assertf(map_addr == si->si_slid_address,
1184 		    "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1185 		    (uint64_t)map_addr,
1186 		    (uint64_t)si->si_slid_address,
1187 		    tmp_entry);
1188 
1189 		/* Drop the ref count grabbed by find_mapping_to_slide */
1190 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1191 		tmp_entry = NULL;
1192 	}
1193 
1194 done:
1195 	if (tmp_entry) {
1196 		/* Drop the ref count grabbed by find_mapping_to_slide */
1197 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1198 		tmp_entry = NULL;
1199 	}
1200 
1201 	/*
1202 	 * Drop any extra reference to the pager in case we're quitting due to an error above.
1203 	 */
1204 	if (sr_pager != MEMORY_OBJECT_NULL) {
1205 		memory_object_deallocate(sr_pager);
1206 	}
1207 
1208 	/*
1209 	 * Mark the region as having it's auth sections remapped.
1210 	 */
1211 	vm_shared_region_lock();
1212 	task->shared_region_auth_remapped = TRUE;
1213 	sr->sr_mapping_in_progress = FALSE;
1214 	thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1215 	vm_shared_region_unlock();
1216 	return kr;
1217 }
1218 #endif /* __has_feature(ptrauth_calls) */
1219 
1220 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1221 vm_shared_region_undo_mappings(
1222 	vm_map_t                 sr_map,
1223 	mach_vm_offset_t         sr_base_address,
1224 	struct _sr_file_mappings *srf_mappings,
1225 	struct _sr_file_mappings *srf_mappings_current,
1226 	unsigned int             srf_current_mappings_count)
1227 {
1228 	unsigned int             j = 0;
1229 	vm_shared_region_t       shared_region = NULL;
1230 	boolean_t                reset_shared_region_state = FALSE;
1231 	struct _sr_file_mappings *srfmp;
1232 	unsigned int             mappings_count;
1233 	struct shared_file_mapping_slide_np *mappings;
1234 
1235 	shared_region = vm_shared_region_get(current_task());
1236 	if (shared_region == NULL) {
1237 		printf("Failed to undo mappings because of NULL shared region.\n");
1238 		return;
1239 	}
1240 
1241 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1242 
1243 	if (sr_map == NULL) {
1244 		ipc_port_t              sr_handle;
1245 		vm_named_entry_t        sr_mem_entry;
1246 
1247 		vm_shared_region_lock();
1248 		assert(shared_region->sr_ref_count > 0);
1249 
1250 		while (shared_region->sr_mapping_in_progress) {
1251 			/* wait for our turn... */
1252 			vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1253 			    THREAD_UNINT);
1254 		}
1255 		assert(!shared_region->sr_mapping_in_progress);
1256 		assert(shared_region->sr_ref_count > 0);
1257 		/* let others know we're working in this shared region */
1258 		shared_region->sr_mapping_in_progress = TRUE;
1259 
1260 		vm_shared_region_unlock();
1261 
1262 		reset_shared_region_state = TRUE;
1263 
1264 		/* no need to lock because this data is never modified... */
1265 		sr_handle = shared_region->sr_mem_entry;
1266 		sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1267 		sr_map = sr_mem_entry->backing.map;
1268 		sr_base_address = shared_region->sr_base_address;
1269 	}
1270 	/*
1271 	 * Undo the mappings we've established so far.
1272 	 */
1273 	for (srfmp = &srf_mappings[0];
1274 	    srfmp <= srf_mappings_current;
1275 	    srfmp++) {
1276 		mappings = srfmp->mappings;
1277 		mappings_count = srfmp->mappings_count;
1278 		if (srfmp == srf_mappings_current) {
1279 			mappings_count = srf_current_mappings_count;
1280 		}
1281 
1282 		for (j = 0; j < mappings_count; j++) {
1283 			kern_return_t kr2;
1284 
1285 			if (mappings[j].sms_size == 0) {
1286 				/*
1287 				 * We didn't establish this
1288 				 * mapping, so nothing to undo.
1289 				 */
1290 				continue;
1291 			}
1292 			SHARED_REGION_TRACE_INFO(
1293 				("shared_region: mapping[%d]: "
1294 				"address:0x%016llx "
1295 				"size:0x%016llx "
1296 				"offset:0x%016llx "
1297 				"maxprot:0x%x prot:0x%x: "
1298 				"undoing...\n",
1299 				j,
1300 				(long long)mappings[j].sms_address,
1301 				(long long)mappings[j].sms_size,
1302 				(long long)mappings[j].sms_file_offset,
1303 				mappings[j].sms_max_prot,
1304 				mappings[j].sms_init_prot));
1305 			kr2 = mach_vm_deallocate(
1306 				sr_map,
1307 				(mappings[j].sms_address -
1308 				sr_base_address),
1309 				mappings[j].sms_size);
1310 			assert(kr2 == KERN_SUCCESS);
1311 		}
1312 	}
1313 
1314 	if (reset_shared_region_state) {
1315 		vm_shared_region_lock();
1316 		assert(shared_region->sr_ref_count > 0);
1317 		assert(shared_region->sr_mapping_in_progress);
1318 		/* we're done working on that shared region */
1319 		shared_region->sr_mapping_in_progress = FALSE;
1320 		thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1321 		vm_shared_region_unlock();
1322 		reset_shared_region_state = FALSE;
1323 	}
1324 
1325 	vm_shared_region_deallocate(shared_region);
1326 }
1327 
1328 /*
1329  * First part of vm_shared_region_map_file(). Split out to
1330  * avoid kernel stack overflow.
1331  */
1332 __attribute__((noinline))
1333 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1334 vm_shared_region_map_file_setup(
1335 	vm_shared_region_t              shared_region,
1336 	int                             sr_file_mappings_count,
1337 	struct _sr_file_mappings        *sr_file_mappings,
1338 	unsigned int                    *mappings_to_slide_cnt,
1339 	struct shared_file_mapping_slide_np **mappings_to_slide,
1340 	mach_vm_offset_t                *slid_mappings,
1341 	memory_object_control_t         *slid_file_controls,
1342 	mach_vm_offset_t                *sfm_min_address,
1343 	mach_vm_offset_t                *sfm_max_address,
1344 	vm_map_t                        *sr_map_ptr,
1345 	vm_map_offset_t                 *lowest_unnestable_addr_ptr,
1346 	unsigned int                    vmsr_num_slides)
1347 {
1348 	kern_return_t           kr = KERN_SUCCESS;
1349 	memory_object_control_t file_control;
1350 	vm_object_t             file_object;
1351 	ipc_port_t              sr_handle;
1352 	vm_named_entry_t        sr_mem_entry;
1353 	vm_map_t                sr_map;
1354 	mach_vm_offset_t        sr_base_address;
1355 	unsigned int            i = 0;
1356 	mach_port_t             map_port;
1357 	vm_map_offset_t         target_address;
1358 	vm_object_t             object;
1359 	vm_object_size_t        obj_size;
1360 	vm_map_offset_t         lowest_unnestable_addr = 0;
1361 	vm_map_kernel_flags_t   vmk_flags;
1362 	mach_vm_offset_t        sfm_end;
1363 	uint32_t                mappings_count;
1364 	struct shared_file_mapping_slide_np *mappings;
1365 	struct _sr_file_mappings *srfmp;
1366 
1367 	vm_shared_region_lock();
1368 	assert(shared_region->sr_ref_count > 0);
1369 
1370 	/*
1371 	 * Make sure we handle only one mapping at a time in a given
1372 	 * shared region, to avoid race conditions.  This should not
1373 	 * happen frequently...
1374 	 */
1375 	while (shared_region->sr_mapping_in_progress) {
1376 		/* wait for our turn... */
1377 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1378 		    THREAD_UNINT);
1379 	}
1380 	assert(!shared_region->sr_mapping_in_progress);
1381 	assert(shared_region->sr_ref_count > 0);
1382 
1383 
1384 	/* let others know we're working in this shared region */
1385 	shared_region->sr_mapping_in_progress = TRUE;
1386 
1387 	/*
1388 	 * Did someone race in and map this shared region already?
1389 	 */
1390 	if (shared_region->sr_first_mapping != -1) {
1391 		vm_shared_region_unlock();
1392 #if DEVELOPMENT || DEBUG
1393 		printf("shared_region: caught race in map and slide\n");
1394 #endif /* DEVELOPMENT || DEBUG */
1395 		return KERN_FAILURE;
1396 	}
1397 
1398 	vm_shared_region_unlock();
1399 
1400 	/* no need to lock because this data is never modified... */
1401 	sr_handle = shared_region->sr_mem_entry;
1402 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1403 	sr_map = sr_mem_entry->backing.map;
1404 	sr_base_address = shared_region->sr_base_address;
1405 
1406 	SHARED_REGION_TRACE_DEBUG(
1407 		("shared_region: -> map(%p)\n",
1408 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1409 
1410 	mappings_count = 0;
1411 	mappings = NULL;
1412 	srfmp = NULL;
1413 
1414 	/* process all the files to be mapped */
1415 	for (srfmp = &sr_file_mappings[0];
1416 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1417 	    srfmp++) {
1418 		mappings_count = srfmp->mappings_count;
1419 		mappings = srfmp->mappings;
1420 		file_control = srfmp->file_control;
1421 
1422 		if (mappings_count == 0) {
1423 			/* no mappings here... */
1424 			continue;
1425 		}
1426 
1427 		/*
1428 		 * The code below can only correctly "slide" (perform relocations) for one
1429 		 * value of the slide amount. So if a file has a non-zero slide, it has to
1430 		 * match any previous value. A zero slide value is ok for things that are
1431 		 * just directly mapped.
1432 		 */
1433 		if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1434 			shared_region->sr_slide = srfmp->slide;
1435 		} else if (shared_region->sr_slide != 0 &&
1436 		    srfmp->slide != 0 &&
1437 		    shared_region->sr_slide != srfmp->slide) {
1438 			SHARED_REGION_TRACE_ERROR(
1439 				("shared_region: more than 1 non-zero slide value amount "
1440 				"slide 1:0x%x slide 2:0x%x\n ",
1441 				shared_region->sr_slide, srfmp->slide));
1442 			kr = KERN_INVALID_ARGUMENT;
1443 			break;
1444 		}
1445 
1446 #if __arm64__
1447 		if ((shared_region->sr_64bit ||
1448 		    page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1449 		    ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1450 			printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1451 			    __FUNCTION__, srfmp->slide);
1452 			kr = KERN_INVALID_ARGUMENT;
1453 			break;
1454 		}
1455 #endif /* __arm64__ */
1456 
1457 		/* get the VM object associated with the file to be mapped */
1458 		file_object = memory_object_control_to_vm_object(file_control);
1459 		assert(file_object);
1460 
1461 #if CONFIG_SECLUDED_MEMORY
1462 		/*
1463 		 * Camera will need the shared cache, so don't put the pages
1464 		 * on the secluded queue, assume that's the primary region.
1465 		 * Also keep DEXT shared cache pages off secluded.
1466 		 */
1467 		if (primary_system_shared_region == NULL ||
1468 		    primary_system_shared_region == shared_region ||
1469 		    shared_region->sr_driverkit) {
1470 			memory_object_mark_eligible_for_secluded(file_control, FALSE);
1471 		}
1472 #endif /* CONFIG_SECLUDED_MEMORY */
1473 
1474 		/* establish the mappings for that file */
1475 		for (i = 0; i < mappings_count; i++) {
1476 			SHARED_REGION_TRACE_INFO(
1477 				("shared_region: mapping[%d]: "
1478 				"address:0x%016llx size:0x%016llx offset:0x%016llx "
1479 				"maxprot:0x%x prot:0x%x\n",
1480 				i,
1481 				(long long)mappings[i].sms_address,
1482 				(long long)mappings[i].sms_size,
1483 				(long long)mappings[i].sms_file_offset,
1484 				mappings[i].sms_max_prot,
1485 				mappings[i].sms_init_prot));
1486 
1487 			if (mappings[i].sms_address < *sfm_min_address) {
1488 				*sfm_min_address = mappings[i].sms_address;
1489 			}
1490 
1491 			if (os_add_overflow(mappings[i].sms_address,
1492 			    mappings[i].sms_size,
1493 			    &sfm_end) ||
1494 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1495 			    mappings[i].sms_address)) {
1496 				/* overflow */
1497 				kr = KERN_INVALID_ARGUMENT;
1498 				break;
1499 			}
1500 			if (sfm_end > *sfm_max_address) {
1501 				*sfm_max_address = sfm_end;
1502 			}
1503 
1504 			if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1505 				/* zero-filled memory */
1506 				map_port = MACH_PORT_NULL;
1507 			} else {
1508 				/* file-backed memory */
1509 				__IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1510 			}
1511 
1512 			/*
1513 			 * Remember which mappings need sliding.
1514 			 */
1515 			if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1516 				if (*mappings_to_slide_cnt == vmsr_num_slides) {
1517 					SHARED_REGION_TRACE_INFO(
1518 						("shared_region: mapping[%d]: "
1519 						"address:0x%016llx size:0x%016llx "
1520 						"offset:0x%016llx "
1521 						"maxprot:0x%x prot:0x%x "
1522 						"too many mappings to slide...\n",
1523 						i,
1524 						(long long)mappings[i].sms_address,
1525 						(long long)mappings[i].sms_size,
1526 						(long long)mappings[i].sms_file_offset,
1527 						mappings[i].sms_max_prot,
1528 						mappings[i].sms_init_prot));
1529 				} else {
1530 					mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1531 					*mappings_to_slide_cnt += 1;
1532 				}
1533 			}
1534 
1535 			/* mapping's address is relative to the shared region base */
1536 			target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1537 
1538 			vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1539 			vmk_flags.vmkf_already = TRUE;
1540 			/* no copy-on-read for mapped binaries */
1541 			vmk_flags.vmkf_no_copy_on_read = 1;
1542 
1543 
1544 			/* establish that mapping, OK if it's "already" there */
1545 			if (map_port == MACH_PORT_NULL) {
1546 				/*
1547 				 * We want to map some anonymous memory in a shared region.
1548 				 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1549 				 */
1550 				obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1551 				object = vm_object_allocate(obj_size);
1552 				if (object == VM_OBJECT_NULL) {
1553 					kr = KERN_RESOURCE_SHORTAGE;
1554 				} else {
1555 					kr = vm_map_enter(
1556 						sr_map,
1557 						&target_address,
1558 						vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1559 						0,
1560 						VM_FLAGS_FIXED,
1561 						vmk_flags,
1562 						VM_KERN_MEMORY_NONE,
1563 						object,
1564 						0,
1565 						TRUE,
1566 						mappings[i].sms_init_prot & VM_PROT_ALL,
1567 						mappings[i].sms_max_prot & VM_PROT_ALL,
1568 						VM_INHERIT_DEFAULT);
1569 				}
1570 			} else {
1571 				object = VM_OBJECT_NULL; /* no anonymous memory here */
1572 				kr = vm_map_enter_mem_object(
1573 					sr_map,
1574 					&target_address,
1575 					vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1576 					0,
1577 					VM_FLAGS_FIXED,
1578 					vmk_flags,
1579 					VM_KERN_MEMORY_NONE,
1580 					map_port,
1581 					mappings[i].sms_file_offset,
1582 					TRUE,
1583 					mappings[i].sms_init_prot & VM_PROT_ALL,
1584 					mappings[i].sms_max_prot & VM_PROT_ALL,
1585 					VM_INHERIT_DEFAULT);
1586 			}
1587 
1588 			if (kr == KERN_SUCCESS) {
1589 				/*
1590 				 * Record the first successful mapping(s) in the shared
1591 				 * region by file. We're protected by "sr_mapping_in_progress"
1592 				 * here, so no need to lock "shared_region".
1593 				 *
1594 				 * Note that if we have an AOT shared cache (ARM) for a
1595 				 * translated task, then it's always the first file.
1596 				 * The original "native" (i.e. x86) shared cache is the
1597 				 * second file.
1598 				 */
1599 
1600 				if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1601 					shared_region->sr_first_mapping = target_address;
1602 				}
1603 
1604 				if (*mappings_to_slide_cnt > 0 &&
1605 				    mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1606 					slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1607 					slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1608 				}
1609 
1610 				/*
1611 				 * Record the lowest writable address in this
1612 				 * sub map, to log any unexpected unnesting below
1613 				 * that address (see log_unnest_badness()).
1614 				 */
1615 				if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1616 				    sr_map->is_nested_map &&
1617 				    (lowest_unnestable_addr == 0 ||
1618 				    (target_address < lowest_unnestable_addr))) {
1619 					lowest_unnestable_addr = target_address;
1620 				}
1621 			} else {
1622 				if (map_port == MACH_PORT_NULL) {
1623 					/*
1624 					 * Get rid of the VM object we just created
1625 					 * but failed to map.
1626 					 */
1627 					vm_object_deallocate(object);
1628 					object = VM_OBJECT_NULL;
1629 				}
1630 				if (kr == KERN_MEMORY_PRESENT) {
1631 					/*
1632 					 * This exact mapping was already there:
1633 					 * that's fine.
1634 					 */
1635 					SHARED_REGION_TRACE_INFO(
1636 						("shared_region: mapping[%d]: "
1637 						"address:0x%016llx size:0x%016llx "
1638 						"offset:0x%016llx "
1639 						"maxprot:0x%x prot:0x%x "
1640 						"already mapped...\n",
1641 						i,
1642 						(long long)mappings[i].sms_address,
1643 						(long long)mappings[i].sms_size,
1644 						(long long)mappings[i].sms_file_offset,
1645 						mappings[i].sms_max_prot,
1646 						mappings[i].sms_init_prot));
1647 					/*
1648 					 * We didn't establish this mapping ourselves;
1649 					 * let's reset its size, so that we do not
1650 					 * attempt to undo it if an error occurs later.
1651 					 */
1652 					mappings[i].sms_size = 0;
1653 					kr = KERN_SUCCESS;
1654 				} else {
1655 					break;
1656 				}
1657 			}
1658 		}
1659 
1660 		if (kr != KERN_SUCCESS) {
1661 			break;
1662 		}
1663 	}
1664 
1665 	if (kr != KERN_SUCCESS) {
1666 		/* the last mapping we tried (mappings[i]) failed ! */
1667 		assert(i < mappings_count);
1668 		SHARED_REGION_TRACE_ERROR(
1669 			("shared_region: mapping[%d]: "
1670 			"address:0x%016llx size:0x%016llx "
1671 			"offset:0x%016llx "
1672 			"maxprot:0x%x prot:0x%x failed 0x%x\n",
1673 			i,
1674 			(long long)mappings[i].sms_address,
1675 			(long long)mappings[i].sms_size,
1676 			(long long)mappings[i].sms_file_offset,
1677 			mappings[i].sms_max_prot,
1678 			mappings[i].sms_init_prot,
1679 			kr));
1680 
1681 		/*
1682 		 * Respect the design of vm_shared_region_undo_mappings
1683 		 * as we are holding the sr_mapping_in_progress == true here.
1684 		 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1685 		 * will be blocked at waiting sr_mapping_in_progress to be false.
1686 		 */
1687 		assert(sr_map != NULL);
1688 		/* undo all the previous mappings */
1689 		vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1690 		return kr;
1691 	}
1692 
1693 	*lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1694 	*sr_map_ptr = sr_map;
1695 	return KERN_SUCCESS;
1696 }
1697 
1698 /* forwared declaration */
1699 __attribute__((noinline))
1700 static void
1701 vm_shared_region_map_file_final(
1702 	vm_shared_region_t shared_region,
1703 	vm_map_t           sr_map,
1704 	mach_vm_offset_t   sfm_min_address,
1705 	mach_vm_offset_t   sfm_max_address);
1706 
1707 /*
1708  * Establish some mappings of a file in the shared region.
1709  * This is used by "dyld" via the shared_region_map_np() system call
1710  * to populate the shared region with the appropriate shared cache.
1711  *
1712  * One could also call it several times to incrementally load several
1713  * libraries, as long as they do not overlap.
1714  * It will return KERN_SUCCESS if the mappings were successfully established
1715  * or if they were already established identically by another process.
1716  */
1717 __attribute__((noinline))
1718 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1719 vm_shared_region_map_file(
1720 	vm_shared_region_t       shared_region,
1721 	int                      sr_file_mappings_count,
1722 	struct _sr_file_mappings *sr_file_mappings)
1723 {
1724 	kern_return_t           kr = KERN_SUCCESS;
1725 	unsigned int            i;
1726 	unsigned int            mappings_to_slide_cnt = 0;
1727 	mach_vm_offset_t        sfm_min_address = (mach_vm_offset_t)-1;
1728 	mach_vm_offset_t        sfm_max_address = 0;
1729 	vm_map_t                sr_map = NULL;
1730 	vm_map_offset_t         lowest_unnestable_addr = 0;
1731 	unsigned int            vmsr_num_slides = 0;
1732 	mach_vm_offset_t        *slid_mappings = NULL;                  /* [0..vmsr_num_slides] */
1733 	memory_object_control_t *slid_file_controls = NULL;             /* [0..vmsr_num_slides] */
1734 	struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1735 	struct _sr_file_mappings *srfmp;
1736 
1737 	/*
1738 	 * Figure out how many of the mappings have slides.
1739 	 */
1740 	for (srfmp = &sr_file_mappings[0];
1741 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1742 	    srfmp++) {
1743 		for (i = 0; i < srfmp->mappings_count; ++i) {
1744 			if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1745 				++vmsr_num_slides;
1746 			}
1747 		}
1748 	}
1749 
1750 	/* Allocate per slide data structures */
1751 	if (vmsr_num_slides > 0) {
1752 		slid_mappings =
1753 		    kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1754 		slid_file_controls =
1755 		    kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1756 		mappings_to_slide =
1757 		    kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1758 	}
1759 
1760 	kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1761 	    &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
1762 	    &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
1763 	if (kr != KERN_SUCCESS) {
1764 		vm_shared_region_lock();
1765 		goto done;
1766 	}
1767 	assert(vmsr_num_slides == mappings_to_slide_cnt);
1768 
1769 	/*
1770 	 * The call above installed direct mappings to the shared cache file.
1771 	 * Now we go back and overwrite the mappings that need relocation
1772 	 * with a special shared region pager.
1773 	 */
1774 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
1775 		kr = vm_shared_region_slide(shared_region->sr_slide,
1776 		    mappings_to_slide[i]->sms_file_offset,
1777 		    mappings_to_slide[i]->sms_size,
1778 		    mappings_to_slide[i]->sms_slide_start,
1779 		    mappings_to_slide[i]->sms_slide_size,
1780 		    slid_mappings[i],
1781 		    slid_file_controls[i],
1782 		    mappings_to_slide[i]->sms_max_prot);
1783 		if (kr != KERN_SUCCESS) {
1784 			SHARED_REGION_TRACE_ERROR(
1785 				("shared_region: region_slide("
1786 				"slide:0x%x start:0x%016llx "
1787 				"size:0x%016llx) failed 0x%x\n",
1788 				shared_region->sr_slide,
1789 				(long long)mappings_to_slide[i]->sms_slide_start,
1790 				(long long)mappings_to_slide[i]->sms_slide_size,
1791 				kr));
1792 			vm_shared_region_lock();
1793 			goto done;
1794 		}
1795 	}
1796 
1797 	assert(kr == KERN_SUCCESS);
1798 
1799 	/* adjust the map's "lowest_unnestable_start" */
1800 	lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
1801 	if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
1802 		vm_map_lock(sr_map);
1803 		sr_map->lowest_unnestable_start = lowest_unnestable_addr;
1804 		vm_map_unlock(sr_map);
1805 	}
1806 
1807 	vm_shared_region_lock();
1808 	assert(shared_region->sr_ref_count > 0);
1809 	assert(shared_region->sr_mapping_in_progress);
1810 
1811 	vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
1812 
1813 done:
1814 	/*
1815 	 * We're done working on that shared region.
1816 	 * Wake up any waiting threads.
1817 	 */
1818 	shared_region->sr_mapping_in_progress = FALSE;
1819 	thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1820 	vm_shared_region_unlock();
1821 
1822 #if __has_feature(ptrauth_calls)
1823 	if (kr == KERN_SUCCESS) {
1824 		/*
1825 		 * Since authenticated mappings were just added to the shared region,
1826 		 * go back and remap them into private mappings for this task.
1827 		 */
1828 		kr = vm_shared_region_auth_remap(shared_region);
1829 	}
1830 #endif /* __has_feature(ptrauth_calls) */
1831 
1832 	/* Cache shared region info needed for telemetry in the task */
1833 	task_t task;
1834 	if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
1835 		mach_vm_offset_t start_address;
1836 		(void)vm_shared_region_start_address(shared_region, &start_address, task);
1837 	}
1838 
1839 	SHARED_REGION_TRACE_DEBUG(
1840 		("shared_region: map(%p) <- 0x%x \n",
1841 		(void *)VM_KERNEL_ADDRPERM(shared_region), kr));
1842 	if (vmsr_num_slides > 0) {
1843 		kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
1844 		kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
1845 		kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
1846 		    mappings_to_slide);
1847 	}
1848 	return kr;
1849 }
1850 
1851 /*
1852  * Final part of vm_shared_region_map_file().
1853  * Kept in separate function to avoid blowing out the stack.
1854  */
1855 __attribute__((noinline))
1856 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map,mach_vm_offset_t sfm_min_address,mach_vm_offset_t sfm_max_address)1857 vm_shared_region_map_file_final(
1858 	vm_shared_region_t        shared_region,
1859 	vm_map_t                  sr_map,
1860 	mach_vm_offset_t          sfm_min_address,
1861 	mach_vm_offset_t          sfm_max_address)
1862 {
1863 	struct _dyld_cache_header sr_cache_header;
1864 	int                       error;
1865 	size_t                    image_array_length;
1866 	struct _dyld_cache_image_text_info *sr_image_layout;
1867 	boolean_t                 locally_built = FALSE;
1868 
1869 
1870 	/*
1871 	 * copy in the shared region UUID to the shared region structure.
1872 	 * we do this indirectly by first copying in the shared cache header
1873 	 * and then copying the UUID from there because we'll need to look
1874 	 * at other content from the shared cache header.
1875 	 */
1876 	if (!shared_region->sr_uuid_copied) {
1877 		error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
1878 		    (char *)&sr_cache_header,
1879 		    sizeof(sr_cache_header));
1880 		if (error == 0) {
1881 			memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
1882 			shared_region->sr_uuid_copied = TRUE;
1883 			locally_built = sr_cache_header.locallyBuiltCache;
1884 		} else {
1885 #if DEVELOPMENT || DEBUG
1886 			panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
1887 			    "offset:0 size:0x%016llx) failed with %d\n",
1888 			    (long long)shared_region->sr_base_address,
1889 			    (long long)shared_region->sr_first_mapping,
1890 			    (long long)sizeof(sr_cache_header),
1891 			    error);
1892 #endif /* DEVELOPMENT || DEBUG */
1893 			shared_region->sr_uuid_copied = FALSE;
1894 		}
1895 	}
1896 
1897 	/*
1898 	 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd.  This is used by
1899 	 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
1900 	 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
1901 	 * region.  In that case, launchd re-exec's itself, so we may go through this path multiple times.  We
1902 	 * let the most recent one win.
1903 	 *
1904 	 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
1905 	 */
1906 	bool is_init_task = (task_pid(current_task()) == 1);
1907 	if (shared_region->sr_uuid_copied && is_init_task) {
1908 		/* Copy in the shared cache layout if we're running with a locally built shared cache */
1909 		if (locally_built) {
1910 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
1911 			image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
1912 			sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
1913 			error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
1914 			    sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
1915 			if (error == 0) {
1916 				if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
1917 					panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
1918 				}
1919 				shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
1920 				for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
1921 					memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
1922 					    sizeof(shared_region->sr_images[index].imageUUID));
1923 					shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
1924 				}
1925 
1926 				shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
1927 			} else {
1928 #if DEVELOPMENT || DEBUG
1929 				panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
1930 				    "offset:0x%016llx size:0x%016llx) failed with %d\n",
1931 				    (long long)shared_region->sr_base_address,
1932 				    (long long)shared_region->sr_first_mapping,
1933 				    (long long)sr_cache_header.imagesTextOffset,
1934 				    (long long)image_array_length,
1935 				    error);
1936 #endif /* DEVELOPMENT || DEBUG */
1937 			}
1938 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
1939 			kfree_data(sr_image_layout, image_array_length);
1940 			sr_image_layout = NULL;
1941 		}
1942 		primary_system_shared_region = shared_region;
1943 	}
1944 
1945 	/*
1946 	 * If we succeeded, we know the bounds of the shared region.
1947 	 * Trim our pmaps to only cover this range (if applicable to
1948 	 * this platform).
1949 	 */
1950 	if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
1951 		pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
1952 	}
1953 }
1954 
1955 /*
1956  * Retrieve a task's shared region and grab an extra reference to
1957  * make sure it doesn't disappear while the caller is using it.
1958  * The caller is responsible for consuming that extra reference if
1959  * necessary.
1960  *
1961  * This also tries to trim the pmap for the shared region.
1962  */
1963 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)1964 vm_shared_region_trim_and_get(task_t task)
1965 {
1966 	vm_shared_region_t shared_region;
1967 	ipc_port_t sr_handle;
1968 	vm_named_entry_t sr_mem_entry;
1969 	vm_map_t sr_map;
1970 
1971 	/* Get the shared region and the map. */
1972 	shared_region = vm_shared_region_get(task);
1973 	if (shared_region == NULL) {
1974 		return NULL;
1975 	}
1976 
1977 	sr_handle = shared_region->sr_mem_entry;
1978 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1979 	sr_map = sr_mem_entry->backing.map;
1980 
1981 	/* Trim the pmap if possible. */
1982 	if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
1983 		pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
1984 	}
1985 
1986 	return shared_region;
1987 }
1988 
1989 /*
1990  * Enter the appropriate shared region into "map" for "task".
1991  * This involves looking up the shared region (and possibly creating a new
1992  * one) for the desired environment, then mapping the VM sub map into the
1993  * task's VM "map", with the appropriate level of pmap-nesting.
1994  */
1995 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit)1996 vm_shared_region_enter(
1997 	struct _vm_map          *map,
1998 	struct task             *task,
1999 	boolean_t               is_64bit,
2000 	void                    *fsroot,
2001 	cpu_type_t              cpu,
2002 	cpu_subtype_t           cpu_subtype,
2003 	boolean_t               reslide,
2004 	boolean_t               is_driverkit)
2005 {
2006 	kern_return_t           kr;
2007 	vm_shared_region_t      shared_region;
2008 	vm_map_offset_t         sr_address, sr_offset, target_address;
2009 	vm_map_size_t           sr_size, mapping_size;
2010 	vm_map_offset_t         sr_pmap_nesting_start;
2011 	vm_map_size_t           sr_pmap_nesting_size;
2012 	ipc_port_t              sr_handle;
2013 	vm_prot_t               cur_prot, max_prot;
2014 
2015 	SHARED_REGION_TRACE_DEBUG(
2016 		("shared_region: -> "
2017 		"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2018 		(void *)VM_KERNEL_ADDRPERM(map),
2019 		(void *)VM_KERNEL_ADDRPERM(task),
2020 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2021 		cpu, cpu_subtype, is_64bit, is_driverkit));
2022 
2023 	/* lookup (create if needed) the shared region for this environment */
2024 	shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, reslide, is_driverkit);
2025 	if (shared_region == NULL) {
2026 		/* this should not happen ! */
2027 		SHARED_REGION_TRACE_ERROR(
2028 			("shared_region: -> "
2029 			"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2030 			"lookup failed !\n",
2031 			(void *)VM_KERNEL_ADDRPERM(map),
2032 			(void *)VM_KERNEL_ADDRPERM(task),
2033 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2034 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2035 		//panic("shared_region_enter: lookup failed");
2036 		return KERN_FAILURE;
2037 	}
2038 
2039 	kr = KERN_SUCCESS;
2040 	/* no need to lock since this data is never modified */
2041 	sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2042 	sr_size = (vm_map_size_t)shared_region->sr_size;
2043 	sr_handle = shared_region->sr_mem_entry;
2044 	sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2045 	sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2046 
2047 	cur_prot = VM_PROT_READ;
2048 	if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2049 		/*
2050 		 * XXX BINARY COMPATIBILITY
2051 		 * java6 apparently needs to modify some code in the
2052 		 * dyld shared cache and needs to be allowed to add
2053 		 * write access...
2054 		 */
2055 		max_prot = VM_PROT_ALL;
2056 	} else {
2057 		max_prot = VM_PROT_READ;
2058 	}
2059 
2060 	/*
2061 	 * Start mapping the shared region's VM sub map into the task's VM map.
2062 	 */
2063 	sr_offset = 0;
2064 
2065 	if (sr_pmap_nesting_start > sr_address) {
2066 		/* we need to map a range without pmap-nesting first */
2067 		target_address = sr_address;
2068 		mapping_size = sr_pmap_nesting_start - sr_address;
2069 		kr = vm_map_enter_mem_object(
2070 			map,
2071 			&target_address,
2072 			mapping_size,
2073 			0,
2074 			VM_FLAGS_FIXED,
2075 			VM_MAP_KERNEL_FLAGS_NONE,
2076 			VM_KERN_MEMORY_NONE,
2077 			sr_handle,
2078 			sr_offset,
2079 			TRUE,
2080 			cur_prot,
2081 			max_prot,
2082 			VM_INHERIT_SHARE);
2083 		if (kr != KERN_SUCCESS) {
2084 			SHARED_REGION_TRACE_ERROR(
2085 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2086 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2087 				(void *)VM_KERNEL_ADDRPERM(map),
2088 				(void *)VM_KERNEL_ADDRPERM(task),
2089 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2090 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2091 				(long long)target_address,
2092 				(long long)mapping_size,
2093 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2094 			goto done;
2095 		}
2096 		SHARED_REGION_TRACE_DEBUG(
2097 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2098 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2099 			(void *)VM_KERNEL_ADDRPERM(map),
2100 			(void *)VM_KERNEL_ADDRPERM(task),
2101 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2102 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2103 			(long long)target_address, (long long)mapping_size,
2104 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2105 		sr_offset += mapping_size;
2106 		sr_size -= mapping_size;
2107 	}
2108 
2109 	/* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2110 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2111 	vmk_flags.vmkf_nested_pmap = TRUE;
2112 
2113 	/*
2114 	 * Use pmap-nesting to map the majority of the shared region into the task's
2115 	 * VM space. Very rarely will architectures have a shared region that isn't
2116 	 * the same size as the pmap-nesting region, or start at a different address
2117 	 * than the pmap-nesting region, so this code will map the entirety of the
2118 	 * shared region for most architectures.
2119 	 */
2120 	assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2121 	target_address = sr_pmap_nesting_start;
2122 	kr = vm_map_enter_mem_object(
2123 		map,
2124 		&target_address,
2125 		sr_pmap_nesting_size,
2126 		0,
2127 		VM_FLAGS_FIXED,
2128 		vmk_flags,
2129 		VM_MEMORY_SHARED_PMAP,
2130 		sr_handle,
2131 		sr_offset,
2132 		TRUE,
2133 		cur_prot,
2134 		max_prot,
2135 		VM_INHERIT_SHARE);
2136 	if (kr != KERN_SUCCESS) {
2137 		SHARED_REGION_TRACE_ERROR(
2138 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2139 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2140 			(void *)VM_KERNEL_ADDRPERM(map),
2141 			(void *)VM_KERNEL_ADDRPERM(task),
2142 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2143 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2144 			(long long)target_address,
2145 			(long long)sr_pmap_nesting_size,
2146 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2147 		goto done;
2148 	}
2149 	SHARED_REGION_TRACE_DEBUG(
2150 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2151 		"nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2152 		(void *)VM_KERNEL_ADDRPERM(map),
2153 		(void *)VM_KERNEL_ADDRPERM(task),
2154 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2155 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2156 		(long long)target_address, (long long)sr_pmap_nesting_size,
2157 		(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2158 
2159 	sr_offset += sr_pmap_nesting_size;
2160 	sr_size -= sr_pmap_nesting_size;
2161 
2162 	if (sr_size > 0) {
2163 		/* and there's some left to be mapped without pmap-nesting */
2164 		target_address = sr_address + sr_offset;
2165 		mapping_size = sr_size;
2166 		kr = vm_map_enter_mem_object(
2167 			map,
2168 			&target_address,
2169 			mapping_size,
2170 			0,
2171 			VM_FLAGS_FIXED,
2172 			VM_MAP_KERNEL_FLAGS_NONE,
2173 			VM_KERN_MEMORY_NONE,
2174 			sr_handle,
2175 			sr_offset,
2176 			TRUE,
2177 			cur_prot,
2178 			max_prot,
2179 			VM_INHERIT_SHARE);
2180 		if (kr != KERN_SUCCESS) {
2181 			SHARED_REGION_TRACE_ERROR(
2182 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2183 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2184 				(void *)VM_KERNEL_ADDRPERM(map),
2185 				(void *)VM_KERNEL_ADDRPERM(task),
2186 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2187 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2188 				(long long)target_address,
2189 				(long long)mapping_size,
2190 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2191 			goto done;
2192 		}
2193 		SHARED_REGION_TRACE_DEBUG(
2194 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2195 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2196 			(void *)VM_KERNEL_ADDRPERM(map),
2197 			(void *)VM_KERNEL_ADDRPERM(task),
2198 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2199 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2200 			(long long)target_address, (long long)mapping_size,
2201 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2202 		sr_offset += mapping_size;
2203 		sr_size -= mapping_size;
2204 	}
2205 	assert(sr_size == 0);
2206 
2207 done:
2208 	if (kr == KERN_SUCCESS) {
2209 		/* let the task use that shared region */
2210 		vm_shared_region_set(task, shared_region);
2211 	} else {
2212 		/* drop our reference since we're not using it */
2213 		vm_shared_region_deallocate(shared_region);
2214 		vm_shared_region_set(task, NULL);
2215 	}
2216 
2217 	SHARED_REGION_TRACE_DEBUG(
2218 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2219 		(void *)VM_KERNEL_ADDRPERM(map),
2220 		(void *)VM_KERNEL_ADDRPERM(task),
2221 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2222 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2223 		kr));
2224 	return kr;
2225 }
2226 
2227 #define SANE_SLIDE_INFO_SIZE            (2560*1024) /*Can be changed if needed*/
2228 struct vm_shared_region_slide_info      slide_info;
2229 
2230 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2231 vm_shared_region_sliding_valid(uint32_t slide)
2232 {
2233 	kern_return_t kr = KERN_SUCCESS;
2234 	vm_shared_region_t sr = vm_shared_region_get(current_task());
2235 
2236 	/* No region yet? we're fine. */
2237 	if (sr == NULL) {
2238 		return kr;
2239 	}
2240 
2241 	if (sr->sr_slide != 0 && slide != 0) {
2242 		if (slide == sr->sr_slide) {
2243 			/*
2244 			 * Request for sliding when we've
2245 			 * already done it with exactly the
2246 			 * same slide value before.
2247 			 * This isn't wrong technically but
2248 			 * we don't want to slide again and
2249 			 * so we return this value.
2250 			 */
2251 			kr = KERN_INVALID_ARGUMENT;
2252 		} else {
2253 			printf("Mismatched shared region slide\n");
2254 			kr = KERN_FAILURE;
2255 		}
2256 	}
2257 	vm_shared_region_deallocate(sr);
2258 	return kr;
2259 }
2260 
2261 /*
2262  * Actually create (really overwrite) the mapping to part of the shared cache which
2263  * undergoes relocation.  This routine reads in the relocation info from dyld and
2264  * verifies it. It then creates a (or finds a matching) shared region pager which
2265  * handles the actual modification of the page contents and installs the mapping
2266  * using that pager.
2267  */
2268 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2269 vm_shared_region_slide_mapping(
2270 	vm_shared_region_t      sr,
2271 	user_addr_t             slide_info_addr,
2272 	mach_vm_size_t          slide_info_size,
2273 	mach_vm_offset_t        start,
2274 	mach_vm_size_t          size,
2275 	mach_vm_offset_t        slid_mapping,
2276 	uint32_t                slide,
2277 	memory_object_control_t sr_file_control,
2278 	vm_prot_t               prot)
2279 {
2280 	kern_return_t           kr;
2281 	vm_object_t             object = VM_OBJECT_NULL;
2282 	vm_shared_region_slide_info_t si = NULL;
2283 	vm_map_entry_t          tmp_entry = VM_MAP_ENTRY_NULL;
2284 	struct vm_map_entry     tmp_entry_store;
2285 	memory_object_t         sr_pager = MEMORY_OBJECT_NULL;
2286 	vm_map_t                sr_map;
2287 	int                     vm_flags;
2288 	vm_map_kernel_flags_t   vmk_flags;
2289 	vm_map_offset_t         map_addr;
2290 	void                    *slide_info_entry = NULL;
2291 	int                     error;
2292 
2293 	assert(sr->sr_slide_in_progress);
2294 
2295 	if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2296 		return KERN_INVALID_ARGUMENT;
2297 	}
2298 
2299 	/*
2300 	 * Copy in and verify the relocation information.
2301 	 */
2302 	if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2303 		printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2304 		return KERN_FAILURE;
2305 	}
2306 	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2307 		printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2308 		return KERN_FAILURE;
2309 	}
2310 
2311 	slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2312 	if (slide_info_entry == NULL) {
2313 		return KERN_RESOURCE_SHORTAGE;
2314 	}
2315 	error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2316 	if (error) {
2317 		printf("copyin of slide_info failed\n");
2318 		kr = KERN_INVALID_ADDRESS;
2319 		goto done;
2320 	}
2321 
2322 	if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2323 		printf("Sanity Check failed for slide_info\n");
2324 		goto done;
2325 	}
2326 
2327 	/*
2328 	 * Allocate and fill in a vm_shared_region_slide_info.
2329 	 * This will either be used by a new pager, or used to find
2330 	 * a pre-existing matching pager.
2331 	 */
2332 	object = memory_object_control_to_vm_object(sr_file_control);
2333 	if (object == VM_OBJECT_NULL || object->internal) {
2334 		object = VM_OBJECT_NULL;
2335 		kr = KERN_INVALID_ADDRESS;
2336 		goto done;
2337 	}
2338 
2339 	si = kalloc_type(struct vm_shared_region_slide_info,
2340 	    Z_WAITOK | Z_NOFAIL);
2341 	vm_object_lock(object);
2342 
2343 	vm_object_reference_locked(object);     /* for si->slide_object */
2344 	object->object_is_shared_cache = TRUE;
2345 	vm_object_unlock(object);
2346 
2347 	si->si_slide_info_entry = slide_info_entry;
2348 	si->si_slide_info_size = slide_info_size;
2349 
2350 	assert(slid_mapping != (mach_vm_offset_t) -1);
2351 	si->si_slid_address = slid_mapping + sr->sr_base_address;
2352 	si->si_slide_object = object;
2353 	si->si_start = start;
2354 	si->si_end = si->si_start + size;
2355 	si->si_slide = slide;
2356 #if __has_feature(ptrauth_calls)
2357 	/*
2358 	 * If there is authenticated pointer data in this slid mapping,
2359 	 * then just add the information needed to create new pagers for
2360 	 * different shared_region_id's later.
2361 	 */
2362 	if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2363 	    sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2364 	    !(prot & VM_PROT_NOAUTH)) {
2365 		if (sr->sr_num_auth_section == NUM_SR_AUTH_SECTIONS) {
2366 			printf("Too many auth/private sections for shared region!!\n");
2367 			kr = KERN_INVALID_ARGUMENT;
2368 			goto done;
2369 		}
2370 		si->si_ptrauth = TRUE;
2371 		sr->sr_auth_section[sr->sr_num_auth_section++] = si;
2372 		/*
2373 		 * Remember the shared region, since that's where we'll
2374 		 * stash this info for all auth pagers to share. Each pager
2375 		 * will need to take a reference to it.
2376 		 */
2377 		si->si_shared_region = sr;
2378 		kr = KERN_SUCCESS;
2379 		goto done;
2380 	}
2381 	si->si_shared_region = NULL;
2382 	si->si_ptrauth = FALSE;
2383 #else /* __has_feature(ptrauth_calls) */
2384 	(void)prot;     /* silence unused warning */
2385 #endif /* __has_feature(ptrauth_calls) */
2386 
2387 	/*
2388 	 * find the pre-existing shared region's map entry to slide
2389 	 */
2390 	sr_map = vm_shared_region_vm_map(sr);
2391 	kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2392 	if (kr != KERN_SUCCESS) {
2393 		goto done;
2394 	}
2395 	tmp_entry = &tmp_entry_store;
2396 
2397 	/*
2398 	 * The object must exactly cover the region to slide.
2399 	 */
2400 	assert(VME_OFFSET(tmp_entry) == start);
2401 	assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2402 
2403 	/* create a "shared_region" sliding pager */
2404 	sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2405 	if (sr_pager == MEMORY_OBJECT_NULL) {
2406 		kr = KERN_RESOURCE_SHORTAGE;
2407 		goto done;
2408 	}
2409 
2410 #if CONFIG_SECLUDED_MEMORY
2411 	/*
2412 	 * The shared region pagers used by camera or DEXT should have
2413 	 * pagers that won't go on the secluded queue.
2414 	 */
2415 	if (primary_system_shared_region == NULL ||
2416 	    primary_system_shared_region == sr ||
2417 	    sr->sr_driverkit) {
2418 		memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2419 	}
2420 #endif /* CONFIG_SECLUDED_MEMORY */
2421 
2422 	/* map that pager over the portion of the mapping that needs sliding */
2423 	vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
2424 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2425 	vmk_flags.vmkf_overwrite_immutable = TRUE;
2426 	map_addr = tmp_entry->vme_start;
2427 	kr = vm_map_enter_mem_object(sr_map,
2428 	    &map_addr,
2429 	    (tmp_entry->vme_end - tmp_entry->vme_start),
2430 	    (mach_vm_offset_t) 0,
2431 	    vm_flags,
2432 	    vmk_flags,
2433 	    VM_KERN_MEMORY_NONE,
2434 	    (ipc_port_t)(uintptr_t) sr_pager,
2435 	    0,
2436 	    TRUE,
2437 	    tmp_entry->protection,
2438 	    tmp_entry->max_protection,
2439 	    tmp_entry->inheritance);
2440 	assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2441 	assertf(map_addr == tmp_entry->vme_start,
2442 	    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2443 	    (uint64_t)map_addr,
2444 	    (uint64_t) tmp_entry->vme_start,
2445 	    tmp_entry);
2446 
2447 	/* success! */
2448 	kr = KERN_SUCCESS;
2449 
2450 done:
2451 	if (sr_pager != NULL) {
2452 		/*
2453 		 * Release the sr_pager reference obtained by shared_region_pager_setup().
2454 		 * The mapping, if it succeeded, is now holding a reference on the memory object.
2455 		 */
2456 		memory_object_deallocate(sr_pager);
2457 		sr_pager = MEMORY_OBJECT_NULL;
2458 	}
2459 	if (tmp_entry != NULL) {
2460 		/* release extra ref on tmp_entry's VM object */
2461 		vm_object_deallocate(VME_OBJECT(tmp_entry));
2462 		tmp_entry = VM_MAP_ENTRY_NULL;
2463 	}
2464 
2465 	if (kr != KERN_SUCCESS) {
2466 		/* cleanup */
2467 		if (si != NULL) {
2468 			if (si->si_slide_object) {
2469 				vm_object_deallocate(si->si_slide_object);
2470 				si->si_slide_object = VM_OBJECT_NULL;
2471 			}
2472 			kfree_type(struct vm_shared_region_slide_info, si);
2473 			si = NULL;
2474 		}
2475 		if (slide_info_entry != NULL) {
2476 			kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2477 			slide_info_entry = NULL;
2478 		}
2479 	}
2480 	return kr;
2481 }
2482 
2483 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2484 vm_shared_region_slide_sanity_check_v2(
2485 	vm_shared_region_slide_info_entry_v2_t s_info,
2486 	mach_vm_size_t slide_info_size)
2487 {
2488 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2489 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2490 		return KERN_FAILURE;
2491 	}
2492 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2493 		return KERN_FAILURE;
2494 	}
2495 
2496 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2497 
2498 	uint32_t page_starts_count = s_info->page_starts_count;
2499 	uint32_t page_extras_count = s_info->page_extras_count;
2500 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2501 	if (num_trailing_entries < page_starts_count) {
2502 		return KERN_FAILURE;
2503 	}
2504 
2505 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2506 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2507 	if (trailing_size >> 1 != num_trailing_entries) {
2508 		return KERN_FAILURE;
2509 	}
2510 
2511 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2512 	if (required_size < sizeof(*s_info)) {
2513 		return KERN_FAILURE;
2514 	}
2515 
2516 	if (required_size > slide_info_size) {
2517 		return KERN_FAILURE;
2518 	}
2519 
2520 	return KERN_SUCCESS;
2521 }
2522 
2523 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2524 vm_shared_region_slide_sanity_check_v3(
2525 	vm_shared_region_slide_info_entry_v3_t s_info,
2526 	mach_vm_size_t slide_info_size)
2527 {
2528 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2529 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2530 		return KERN_FAILURE;
2531 	}
2532 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2533 		printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2534 		return KERN_FAILURE;
2535 	}
2536 
2537 	uint32_t page_starts_count = s_info->page_starts_count;
2538 	mach_vm_size_t num_trailing_entries = page_starts_count;
2539 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2540 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2541 	if (required_size < sizeof(*s_info)) {
2542 		printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2543 		return KERN_FAILURE;
2544 	}
2545 
2546 	if (required_size > slide_info_size) {
2547 		printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2548 		return KERN_FAILURE;
2549 	}
2550 
2551 	return KERN_SUCCESS;
2552 }
2553 
2554 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2555 vm_shared_region_slide_sanity_check_v4(
2556 	vm_shared_region_slide_info_entry_v4_t s_info,
2557 	mach_vm_size_t slide_info_size)
2558 {
2559 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2560 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2561 		return KERN_FAILURE;
2562 	}
2563 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2564 		return KERN_FAILURE;
2565 	}
2566 
2567 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2568 
2569 	uint32_t page_starts_count = s_info->page_starts_count;
2570 	uint32_t page_extras_count = s_info->page_extras_count;
2571 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2572 	if (num_trailing_entries < page_starts_count) {
2573 		return KERN_FAILURE;
2574 	}
2575 
2576 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2577 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2578 	if (trailing_size >> 1 != num_trailing_entries) {
2579 		return KERN_FAILURE;
2580 	}
2581 
2582 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2583 	if (required_size < sizeof(*s_info)) {
2584 		return KERN_FAILURE;
2585 	}
2586 
2587 	if (required_size > slide_info_size) {
2588 		return KERN_FAILURE;
2589 	}
2590 
2591 	return KERN_SUCCESS;
2592 }
2593 
2594 
2595 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2596 vm_shared_region_slide_sanity_check(
2597 	vm_shared_region_slide_info_entry_t s_info,
2598 	mach_vm_size_t s_info_size)
2599 {
2600 	kern_return_t kr;
2601 
2602 	switch (s_info->version) {
2603 	case 2:
2604 		kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2605 		break;
2606 	case 3:
2607 		kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2608 		break;
2609 	case 4:
2610 		kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2611 		break;
2612 	default:
2613 		kr = KERN_FAILURE;
2614 	}
2615 	return kr;
2616 }
2617 
2618 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2619 rebase_chain_32(
2620 	uint8_t *page_content,
2621 	uint16_t start_offset,
2622 	uint32_t slide_amount,
2623 	vm_shared_region_slide_info_entry_v2_t s_info)
2624 {
2625 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2626 
2627 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
2628 	const uint32_t value_mask = ~delta_mask;
2629 	const uint32_t value_add = (uint32_t)(s_info->value_add);
2630 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2631 
2632 	uint32_t page_offset = start_offset;
2633 	uint32_t delta = 1;
2634 
2635 	while (delta != 0 && page_offset <= last_page_offset) {
2636 		uint8_t *loc;
2637 		uint32_t value;
2638 
2639 		loc = page_content + page_offset;
2640 		memcpy(&value, loc, sizeof(value));
2641 		delta = (value & delta_mask) >> delta_shift;
2642 		value &= value_mask;
2643 
2644 		if (value != 0) {
2645 			value += value_add;
2646 			value += slide_amount;
2647 		}
2648 		memcpy(loc, &value, sizeof(value));
2649 		page_offset += delta;
2650 	}
2651 
2652 	/* If the offset went past the end of the page, then the slide data is invalid. */
2653 	if (page_offset > last_page_offset) {
2654 		return KERN_FAILURE;
2655 	}
2656 	return KERN_SUCCESS;
2657 }
2658 
2659 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2660 rebase_chain_64(
2661 	uint8_t *page_content,
2662 	uint16_t start_offset,
2663 	uint32_t slide_amount,
2664 	vm_shared_region_slide_info_entry_v2_t s_info)
2665 {
2666 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
2667 
2668 	const uint64_t delta_mask = s_info->delta_mask;
2669 	const uint64_t value_mask = ~delta_mask;
2670 	const uint64_t value_add = s_info->value_add;
2671 	const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2672 
2673 	uint32_t page_offset = start_offset;
2674 	uint32_t delta = 1;
2675 
2676 	while (delta != 0 && page_offset <= last_page_offset) {
2677 		uint8_t *loc;
2678 		uint64_t value;
2679 
2680 		loc = page_content + page_offset;
2681 		memcpy(&value, loc, sizeof(value));
2682 		delta = (uint32_t)((value & delta_mask) >> delta_shift);
2683 		value &= value_mask;
2684 
2685 		if (value != 0) {
2686 			value += value_add;
2687 			value += slide_amount;
2688 		}
2689 		memcpy(loc, &value, sizeof(value));
2690 		page_offset += delta;
2691 	}
2692 
2693 	if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
2694 		/* If a pointer straddling the page boundary needs to be adjusted, then
2695 		 * add the slide to the lower half. The encoding guarantees that the upper
2696 		 * half on the next page will need no masking.
2697 		 *
2698 		 * This assumes a little-endian machine and that the region being slid
2699 		 * never crosses a 4 GB boundary. */
2700 
2701 		uint8_t *loc = page_content + page_offset;
2702 		uint32_t value;
2703 
2704 		memcpy(&value, loc, sizeof(value));
2705 		value += slide_amount;
2706 		memcpy(loc, &value, sizeof(value));
2707 	} else if (page_offset > last_page_offset) {
2708 		return KERN_FAILURE;
2709 	}
2710 
2711 	return KERN_SUCCESS;
2712 }
2713 
2714 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2715 rebase_chain(
2716 	boolean_t is_64,
2717 	uint32_t pageIndex,
2718 	uint8_t *page_content,
2719 	uint16_t start_offset,
2720 	uint32_t slide_amount,
2721 	vm_shared_region_slide_info_entry_v2_t s_info)
2722 {
2723 	kern_return_t kr;
2724 	if (is_64) {
2725 		kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
2726 	} else {
2727 		kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
2728 	}
2729 
2730 	if (kr != KERN_SUCCESS) {
2731 		printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
2732 		    pageIndex, start_offset, slide_amount);
2733 	}
2734 	return kr;
2735 }
2736 
2737 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2738 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2739 {
2740 	vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
2741 	const uint32_t slide_amount = si->si_slide;
2742 
2743 	/* The high bits of the delta_mask field are nonzero precisely when the shared
2744 	 * cache is 64-bit. */
2745 	const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
2746 
2747 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
2748 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
2749 
2750 	uint8_t *page_content = (uint8_t *)vaddr;
2751 	uint16_t page_entry;
2752 
2753 	if (pageIndex >= s_info->page_starts_count) {
2754 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2755 		    pageIndex, s_info->page_starts_count);
2756 		return KERN_FAILURE;
2757 	}
2758 	page_entry = page_starts[pageIndex];
2759 
2760 	if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
2761 		return KERN_SUCCESS;
2762 	}
2763 
2764 	if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
2765 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
2766 		uint16_t info;
2767 
2768 		do {
2769 			uint16_t page_start_offset;
2770 			kern_return_t kr;
2771 
2772 			if (chain_index >= s_info->page_extras_count) {
2773 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
2774 				    chain_index, s_info->page_extras_count);
2775 				return KERN_FAILURE;
2776 			}
2777 			info = page_extras[chain_index];
2778 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2779 
2780 			kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
2781 			if (kr != KERN_SUCCESS) {
2782 				return KERN_FAILURE;
2783 			}
2784 
2785 			chain_index++;
2786 		} while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
2787 	} else {
2788 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2789 		kern_return_t kr;
2790 
2791 		kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
2792 		if (kr != KERN_SUCCESS) {
2793 			return KERN_FAILURE;
2794 		}
2795 	}
2796 
2797 	return KERN_SUCCESS;
2798 }
2799 
2800 
2801 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)2802 vm_shared_region_slide_page_v3(
2803 	vm_shared_region_slide_info_t si,
2804 	vm_offset_t vaddr,
2805 	__unused mach_vm_offset_t uservaddr,
2806 	uint32_t pageIndex,
2807 #if !__has_feature(ptrauth_calls)
2808 	__unused
2809 #endif /* !__has_feature(ptrauth_calls) */
2810 	uint64_t jop_key)
2811 {
2812 	vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
2813 	const uint32_t slide_amount = si->si_slide;
2814 
2815 	uint8_t *page_content = (uint8_t *)vaddr;
2816 	uint16_t page_entry;
2817 
2818 	if (pageIndex >= s_info->page_starts_count) {
2819 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2820 		    pageIndex, s_info->page_starts_count);
2821 		return KERN_FAILURE;
2822 	}
2823 	page_entry = s_info->page_starts[pageIndex];
2824 
2825 	if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
2826 		return KERN_SUCCESS;
2827 	}
2828 
2829 	uint8_t* rebaseLocation = page_content;
2830 	uint64_t delta = page_entry;
2831 	do {
2832 		rebaseLocation += delta;
2833 		uint64_t value;
2834 		memcpy(&value, rebaseLocation, sizeof(value));
2835 		delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
2836 
2837 		// A pointer is one of :
2838 		// {
2839 		//	 uint64_t pointerValue : 51;
2840 		//	 uint64_t offsetToNextPointer : 11;
2841 		//	 uint64_t isBind : 1 = 0;
2842 		//	 uint64_t authenticated : 1 = 0;
2843 		// }
2844 		// {
2845 		//	 uint32_t offsetFromSharedCacheBase;
2846 		//	 uint16_t diversityData;
2847 		//	 uint16_t hasAddressDiversity : 1;
2848 		//	 uint16_t hasDKey : 1;
2849 		//	 uint16_t hasBKey : 1;
2850 		//	 uint16_t offsetToNextPointer : 11;
2851 		//	 uint16_t isBind : 1;
2852 		//	 uint16_t authenticated : 1 = 1;
2853 		// }
2854 
2855 		bool isBind = (value & (1ULL << 62)) == 1;
2856 		if (isBind) {
2857 			return KERN_FAILURE;
2858 		}
2859 
2860 #if __has_feature(ptrauth_calls)
2861 		uint16_t diversity_data = (uint16_t)(value >> 32);
2862 		bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
2863 		ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
2864 #endif /* __has_feature(ptrauth_calls) */
2865 		bool isAuthenticated = (value & (1ULL << 63)) != 0;
2866 
2867 		if (isAuthenticated) {
2868 			// The new value for a rebase is the low 32-bits of the threaded value plus the slide.
2869 			value = (value & 0xFFFFFFFF) + slide_amount;
2870 			// Add in the offset from the mach_header
2871 			const uint64_t value_add = s_info->value_add;
2872 			value += value_add;
2873 
2874 #if __has_feature(ptrauth_calls)
2875 			uint64_t discriminator = diversity_data;
2876 			if (hasAddressDiversity) {
2877 				// First calculate a new discriminator using the address of where we are trying to store the value
2878 				uintptr_t pageOffset = rebaseLocation - page_content;
2879 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
2880 			}
2881 
2882 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
2883 				/*
2884 				 * these pointers are used in user mode. disable the kernel key diversification
2885 				 * so we can sign them for use in user mode.
2886 				 */
2887 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
2888 			}
2889 #endif /* __has_feature(ptrauth_calls) */
2890 		} else {
2891 			// The new value for a rebase is the low 51-bits of the threaded value plus the slide.
2892 			// Regular pointer which needs to fit in 51-bits of value.
2893 			// C++ RTTI uses the top bit, so we'll allow the whole top-byte
2894 			// and the bottom 43-bits to be fit in to 51-bits.
2895 			uint64_t top8Bits = value & 0x0007F80000000000ULL;
2896 			uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
2897 			uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
2898 			value = targetValue + slide_amount;
2899 		}
2900 
2901 		memcpy(rebaseLocation, &value, sizeof(value));
2902 	} while (delta != 0);
2903 
2904 	return KERN_SUCCESS;
2905 }
2906 
2907 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)2908 rebase_chainv4(
2909 	uint8_t *page_content,
2910 	uint16_t start_offset,
2911 	uint32_t slide_amount,
2912 	vm_shared_region_slide_info_entry_v4_t s_info)
2913 {
2914 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2915 
2916 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
2917 	const uint32_t value_mask = ~delta_mask;
2918 	const uint32_t value_add = (uint32_t)(s_info->value_add);
2919 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2920 
2921 	uint32_t page_offset = start_offset;
2922 	uint32_t delta = 1;
2923 
2924 	while (delta != 0 && page_offset <= last_page_offset) {
2925 		uint8_t *loc;
2926 		uint32_t value;
2927 
2928 		loc = page_content + page_offset;
2929 		memcpy(&value, loc, sizeof(value));
2930 		delta = (value & delta_mask) >> delta_shift;
2931 		value &= value_mask;
2932 
2933 		if ((value & 0xFFFF8000) == 0) {
2934 			// small positive non-pointer, use as-is
2935 		} else if ((value & 0x3FFF8000) == 0x3FFF8000) {
2936 			// small negative non-pointer
2937 			value |= 0xC0000000;
2938 		} else {
2939 			// pointer that needs rebasing
2940 			value += value_add;
2941 			value += slide_amount;
2942 		}
2943 		memcpy(loc, &value, sizeof(value));
2944 		page_offset += delta;
2945 	}
2946 
2947 	/* If the offset went past the end of the page, then the slide data is invalid. */
2948 	if (page_offset > last_page_offset) {
2949 		return KERN_FAILURE;
2950 	}
2951 	return KERN_SUCCESS;
2952 }
2953 
2954 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2955 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2956 {
2957 	vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
2958 	const uint32_t slide_amount = si->si_slide;
2959 
2960 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
2961 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
2962 
2963 	uint8_t *page_content = (uint8_t *)vaddr;
2964 	uint16_t page_entry;
2965 
2966 	if (pageIndex >= s_info->page_starts_count) {
2967 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
2968 		    pageIndex, s_info->page_starts_count);
2969 		return KERN_FAILURE;
2970 	}
2971 	page_entry = page_starts[pageIndex];
2972 
2973 	if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
2974 		return KERN_SUCCESS;
2975 	}
2976 
2977 	if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
2978 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
2979 		uint16_t info;
2980 
2981 		do {
2982 			uint16_t page_start_offset;
2983 			kern_return_t kr;
2984 
2985 			if (chain_index >= s_info->page_extras_count) {
2986 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
2987 				    chain_index, s_info->page_extras_count);
2988 				return KERN_FAILURE;
2989 			}
2990 			info = page_extras[chain_index];
2991 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
2992 
2993 			kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
2994 			if (kr != KERN_SUCCESS) {
2995 				return KERN_FAILURE;
2996 			}
2997 
2998 			chain_index++;
2999 		} while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3000 	} else {
3001 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3002 		kern_return_t kr;
3003 
3004 		kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3005 		if (kr != KERN_SUCCESS) {
3006 			return KERN_FAILURE;
3007 		}
3008 	}
3009 
3010 	return KERN_SUCCESS;
3011 }
3012 
3013 
3014 
3015 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3016 vm_shared_region_slide_page(
3017 	vm_shared_region_slide_info_t si,
3018 	vm_offset_t vaddr,
3019 	mach_vm_offset_t uservaddr,
3020 	uint32_t pageIndex,
3021 	uint64_t jop_key)
3022 {
3023 	switch (si->si_slide_info_entry->version) {
3024 	case 2:
3025 		return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3026 	case 3:
3027 		return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3028 	case 4:
3029 		return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3030 	default:
3031 		return KERN_FAILURE;
3032 	}
3033 }
3034 
3035 /******************************************************************************/
3036 /* Comm page support                                                          */
3037 /******************************************************************************/
3038 
3039 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3040 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3041 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3042 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3043 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3044 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3045 
3046 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3047 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3048 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3049 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3050 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3051 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3052 
3053 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3054 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3055 
3056 #if defined(__i386__) || defined(__x86_64__)
3057 /*
3058  * Create a memory entry, VM submap and pmap for one commpage.
3059  */
3060 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3061 _vm_commpage_init(
3062 	ipc_port_t      *handlep,
3063 	vm_map_size_t   size)
3064 {
3065 	kern_return_t           kr;
3066 	vm_named_entry_t        mem_entry;
3067 	vm_map_t                new_map;
3068 
3069 	SHARED_REGION_TRACE_DEBUG(
3070 		("commpage: -> _init(0x%llx)\n",
3071 		(long long)size));
3072 
3073 	kr = mach_memory_entry_allocate(&mem_entry,
3074 	    handlep);
3075 	if (kr != KERN_SUCCESS) {
3076 		panic("_vm_commpage_init: could not allocate mem_entry");
3077 	}
3078 	pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3079 	if (new_pmap == NULL) {
3080 		panic("_vm_commpage_init: could not allocate pmap");
3081 	}
3082 	new_map = vm_map_create(new_pmap, 0, size, PMAP_CREATE_64BIT);
3083 	if (new_map == VM_MAP_NULL) {
3084 		panic("_vm_commpage_init: could not allocate VM map");
3085 	}
3086 	mem_entry->backing.map = new_map;
3087 	mem_entry->internal = TRUE;
3088 	mem_entry->is_sub_map = TRUE;
3089 	mem_entry->offset = 0;
3090 	mem_entry->protection = VM_PROT_ALL;
3091 	mem_entry->size = size;
3092 
3093 	SHARED_REGION_TRACE_DEBUG(
3094 		("commpage: _init(0x%llx) <- %p\n",
3095 		(long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3096 }
3097 #endif
3098 
3099 
3100 /*
3101  * Initialize the comm text pages at boot time
3102  */
3103 void
vm_commpage_text_init(void)3104 vm_commpage_text_init(void)
3105 {
3106 	SHARED_REGION_TRACE_DEBUG(
3107 		("commpage text: ->init()\n"));
3108 #if defined(__i386__) || defined(__x86_64__)
3109 	/* create the 32 bit comm text page */
3110 	unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3111 	_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3112 	commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3113 	commpage_text32_map = commpage_text32_entry->backing.map;
3114 	commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3115 	/* XXX if (cpu_is_64bit_capable()) ? */
3116 	/* create the 64-bit comm page */
3117 	offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3118 	_vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3119 	commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3120 	commpage_text64_map = commpage_text64_entry->backing.map;
3121 	commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3122 #endif
3123 
3124 	commpage_text_populate();
3125 
3126 	/* populate the routines in here */
3127 	SHARED_REGION_TRACE_DEBUG(
3128 		("commpage text: init() <-\n"));
3129 }
3130 
3131 /*
3132  * Initialize the comm pages at boot time.
3133  */
3134 void
vm_commpage_init(void)3135 vm_commpage_init(void)
3136 {
3137 	SHARED_REGION_TRACE_DEBUG(
3138 		("commpage: -> init()\n"));
3139 
3140 #if defined(__i386__) || defined(__x86_64__)
3141 	/* create the 32-bit comm page */
3142 	_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3143 	commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3144 	commpage32_map = commpage32_entry->backing.map;
3145 
3146 	/* XXX if (cpu_is_64bit_capable()) ? */
3147 	/* create the 64-bit comm page */
3148 	_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3149 	commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3150 	commpage64_map = commpage64_entry->backing.map;
3151 
3152 #endif /* __i386__ || __x86_64__ */
3153 
3154 	/* populate them according to this specific platform */
3155 	commpage_populate();
3156 	__commpage_setup = 1;
3157 #if XNU_TARGET_OS_OSX
3158 	if (__system_power_source == 0) {
3159 		post_sys_powersource_internal(0, 1);
3160 	}
3161 #endif /* XNU_TARGET_OS_OSX */
3162 
3163 	SHARED_REGION_TRACE_DEBUG(
3164 		("commpage: init() <-\n"));
3165 }
3166 
3167 /*
3168  * Enter the appropriate comm page into the task's address space.
3169  * This is called at exec() time via vm_map_exec().
3170  */
3171 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3172 vm_commpage_enter(
3173 	vm_map_t        map,
3174 	task_t          task,
3175 	boolean_t       is64bit)
3176 {
3177 #if     defined(__arm__)
3178 #pragma unused(is64bit)
3179 	(void)task;
3180 	(void)map;
3181 	return KERN_SUCCESS;
3182 #elif   defined(__arm64__)
3183 #pragma unused(is64bit)
3184 	(void)task;
3185 	(void)map;
3186 	pmap_insert_sharedpage(vm_map_pmap(map));
3187 	return KERN_SUCCESS;
3188 #else
3189 	ipc_port_t              commpage_handle, commpage_text_handle;
3190 	vm_map_offset_t         commpage_address, objc_address, commpage_text_address;
3191 	vm_map_size_t           commpage_size, objc_size, commpage_text_size;
3192 	int                     vm_flags;
3193 	vm_map_kernel_flags_t   vmk_flags;
3194 	kern_return_t           kr;
3195 
3196 	SHARED_REGION_TRACE_DEBUG(
3197 		("commpage: -> enter(%p,%p)\n",
3198 		(void *)VM_KERNEL_ADDRPERM(map),
3199 		(void *)VM_KERNEL_ADDRPERM(task)));
3200 
3201 	commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3202 	/* the comm page is likely to be beyond the actual end of the VM map */
3203 	vm_flags = VM_FLAGS_FIXED;
3204 	vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3205 	vmk_flags.vmkf_beyond_max = TRUE;
3206 
3207 	/* select the appropriate comm page for this task */
3208 	assert(!(is64bit ^ vm_map_is_64bit(map)));
3209 	if (is64bit) {
3210 		commpage_handle = commpage64_handle;
3211 		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3212 		commpage_size = _COMM_PAGE64_AREA_LENGTH;
3213 		objc_size = _COMM_PAGE64_OBJC_SIZE;
3214 		objc_address = _COMM_PAGE64_OBJC_BASE;
3215 		commpage_text_handle = commpage_text64_handle;
3216 		commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3217 	} else {
3218 		commpage_handle = commpage32_handle;
3219 		commpage_address =
3220 		    (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3221 		commpage_size = _COMM_PAGE32_AREA_LENGTH;
3222 		objc_size = _COMM_PAGE32_OBJC_SIZE;
3223 		objc_address = _COMM_PAGE32_OBJC_BASE;
3224 		commpage_text_handle = commpage_text32_handle;
3225 		commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3226 	}
3227 
3228 	vm_tag_t tag = VM_KERN_MEMORY_NONE;
3229 	if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3230 	    (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3231 		/* the commpage is properly aligned or sized for pmap-nesting */
3232 		tag = VM_MEMORY_SHARED_PMAP;
3233 		vmk_flags.vmkf_nested_pmap = TRUE;
3234 	}
3235 	/* map the comm page in the task's address space */
3236 	assert(commpage_handle != IPC_PORT_NULL);
3237 	kr = vm_map_enter_mem_object(
3238 		map,
3239 		&commpage_address,
3240 		commpage_size,
3241 		0,
3242 		vm_flags,
3243 		vmk_flags,
3244 		tag,
3245 		commpage_handle,
3246 		0,
3247 		FALSE,
3248 		VM_PROT_READ,
3249 		VM_PROT_READ,
3250 		VM_INHERIT_SHARE);
3251 	if (kr != KERN_SUCCESS) {
3252 		SHARED_REGION_TRACE_ERROR(
3253 			("commpage: enter(%p,0x%llx,0x%llx) "
3254 			"commpage %p mapping failed 0x%x\n",
3255 			(void *)VM_KERNEL_ADDRPERM(map),
3256 			(long long)commpage_address,
3257 			(long long)commpage_size,
3258 			(void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3259 	}
3260 
3261 	/* map the comm text page in the task's address space */
3262 	assert(commpage_text_handle != IPC_PORT_NULL);
3263 	kr = vm_map_enter_mem_object(
3264 		map,
3265 		&commpage_text_address,
3266 		commpage_text_size,
3267 		0,
3268 		vm_flags,
3269 		vmk_flags,
3270 		tag,
3271 		commpage_text_handle,
3272 		0,
3273 		FALSE,
3274 		VM_PROT_READ | VM_PROT_EXECUTE,
3275 		VM_PROT_READ | VM_PROT_EXECUTE,
3276 		VM_INHERIT_SHARE);
3277 	if (kr != KERN_SUCCESS) {
3278 		SHARED_REGION_TRACE_ERROR(
3279 			("commpage text: enter(%p,0x%llx,0x%llx) "
3280 			"commpage text %p mapping failed 0x%x\n",
3281 			(void *)VM_KERNEL_ADDRPERM(map),
3282 			(long long)commpage_text_address,
3283 			(long long)commpage_text_size,
3284 			(void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3285 	}
3286 
3287 	/*
3288 	 * Since we're here, we also pre-allocate some virtual space for the
3289 	 * Objective-C run-time, if needed...
3290 	 */
3291 	if (objc_size != 0) {
3292 		kr = vm_map_enter_mem_object(
3293 			map,
3294 			&objc_address,
3295 			objc_size,
3296 			0,
3297 			VM_FLAGS_FIXED,
3298 			vmk_flags,
3299 			tag,
3300 			IPC_PORT_NULL,
3301 			0,
3302 			FALSE,
3303 			VM_PROT_ALL,
3304 			VM_PROT_ALL,
3305 			VM_INHERIT_DEFAULT);
3306 		if (kr != KERN_SUCCESS) {
3307 			SHARED_REGION_TRACE_ERROR(
3308 				("commpage: enter(%p,0x%llx,0x%llx) "
3309 				"objc mapping failed 0x%x\n",
3310 				(void *)VM_KERNEL_ADDRPERM(map),
3311 				(long long)objc_address,
3312 				(long long)objc_size, kr));
3313 		}
3314 	}
3315 
3316 	SHARED_REGION_TRACE_DEBUG(
3317 		("commpage: enter(%p,%p) <- 0x%x\n",
3318 		(void *)VM_KERNEL_ADDRPERM(map),
3319 		(void *)VM_KERNEL_ADDRPERM(task), kr));
3320 	return kr;
3321 #endif
3322 }
3323 
3324 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3325 vm_shared_region_slide(
3326 	uint32_t slide,
3327 	mach_vm_offset_t        entry_start_address,
3328 	mach_vm_size_t          entry_size,
3329 	mach_vm_offset_t        slide_start,
3330 	mach_vm_size_t          slide_size,
3331 	mach_vm_offset_t        slid_mapping,
3332 	memory_object_control_t sr_file_control,
3333 	vm_prot_t               prot)
3334 {
3335 	vm_shared_region_t      sr;
3336 	kern_return_t           error;
3337 
3338 	SHARED_REGION_TRACE_DEBUG(
3339 		("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3340 		slide, entry_start_address, entry_size, slide_start, slide_size));
3341 
3342 	sr = vm_shared_region_get(current_task());
3343 	if (sr == NULL) {
3344 		printf("%s: no shared region?\n", __FUNCTION__);
3345 		SHARED_REGION_TRACE_DEBUG(
3346 			("vm_shared_region_slide: <- %d (no shared region)\n",
3347 			KERN_FAILURE));
3348 		return KERN_FAILURE;
3349 	}
3350 
3351 	/*
3352 	 * Protect from concurrent access.
3353 	 */
3354 	vm_shared_region_lock();
3355 	while (sr->sr_slide_in_progress) {
3356 		vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3357 	}
3358 
3359 	sr->sr_slide_in_progress = TRUE;
3360 	vm_shared_region_unlock();
3361 
3362 	error = vm_shared_region_slide_mapping(sr,
3363 	    (user_addr_t)slide_start,
3364 	    slide_size,
3365 	    entry_start_address,
3366 	    entry_size,
3367 	    slid_mapping,
3368 	    slide,
3369 	    sr_file_control,
3370 	    prot);
3371 	if (error) {
3372 		printf("slide_info initialization failed with kr=%d\n", error);
3373 	}
3374 
3375 	vm_shared_region_lock();
3376 
3377 	assert(sr->sr_slide_in_progress);
3378 	sr->sr_slide_in_progress = FALSE;
3379 	thread_wakeup(&sr->sr_slide_in_progress);
3380 
3381 #if XNU_TARGET_OS_OSX
3382 	if (error == KERN_SUCCESS) {
3383 		shared_region_completed_slide = TRUE;
3384 	}
3385 #endif /* XNU_TARGET_OS_OSX */
3386 	vm_shared_region_unlock();
3387 
3388 	vm_shared_region_deallocate(sr);
3389 
3390 	SHARED_REGION_TRACE_DEBUG(
3391 		("vm_shared_region_slide: <- %d\n",
3392 		error));
3393 
3394 	return error;
3395 }
3396 
3397 /*
3398  * Used during Authenticated Root Volume macOS boot.
3399  * Launchd re-execs itself and wants the new launchd to use
3400  * the shared cache from the new root volume. This call
3401  * makes all the existing shared caches stale to allow
3402  * that to happen.
3403  */
3404 void
vm_shared_region_pivot(void)3405 vm_shared_region_pivot(void)
3406 {
3407 	vm_shared_region_t      shared_region = NULL;
3408 
3409 	vm_shared_region_lock();
3410 
3411 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3412 		assert(shared_region->sr_ref_count > 0);
3413 		shared_region->sr_stale = TRUE;
3414 		if (shared_region->sr_timer_call) {
3415 			/*
3416 			 * We have a shared region ready to be destroyed
3417 			 * and just waiting for a delayed timer to fire.
3418 			 * Marking it stale cements its ineligibility to
3419 			 * be used ever again. So let's shorten the timer
3420 			 * aggressively down to 10 milliseconds and get rid of it.
3421 			 * This is a single quantum and we don't need to go
3422 			 * shorter than this duration. We want it to be short
3423 			 * enough, however, because we could have an unmount
3424 			 * of the volume hosting this shared region just behind
3425 			 * us.
3426 			 */
3427 			uint64_t deadline;
3428 			assert(shared_region->sr_ref_count == 1);
3429 
3430 			/*
3431 			 * Free the old timer call. Returns with a reference held.
3432 			 * If the old timer has fired and is waiting for the vm_shared_region_lock
3433 			 * lock, we will just return with an additional ref_count i.e. 2.
3434 			 * The old timer will then fire and just drop the ref count down to 1
3435 			 * with no other modifications.
3436 			 */
3437 			vm_shared_region_reference_locked(shared_region);
3438 
3439 			/* set up the timer. Keep the reference from above for this timer.*/
3440 			shared_region->sr_timer_call = thread_call_allocate(
3441 				(thread_call_func_t) vm_shared_region_timeout,
3442 				(thread_call_param_t) shared_region);
3443 
3444 			/* schedule the timer */
3445 			clock_interval_to_deadline(10, /* 10 milliseconds */
3446 			    NSEC_PER_MSEC,
3447 			    &deadline);
3448 			thread_call_enter_delayed(shared_region->sr_timer_call,
3449 			    deadline);
3450 
3451 			SHARED_REGION_TRACE_DEBUG(
3452 				("shared_region: pivot(%p): armed timer\n",
3453 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
3454 		}
3455 	}
3456 
3457 	vm_shared_region_unlock();
3458 }
3459 
3460 /*
3461  * Routine to mark any non-standard slide shared cache region as stale.
3462  * This causes the next "reslide" spawn to create a new shared region.
3463  */
3464 void
vm_shared_region_reslide_stale(void)3465 vm_shared_region_reslide_stale(void)
3466 {
3467 #if __has_feature(ptrauth_calls)
3468 	vm_shared_region_t      shared_region = NULL;
3469 
3470 	vm_shared_region_lock();
3471 
3472 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3473 		assert(shared_region->sr_ref_count > 0);
3474 		if (!shared_region->sr_stale && shared_region->sr_reslide) {
3475 			shared_region->sr_stale = TRUE;
3476 			vm_shared_region_reslide_count++;
3477 		}
3478 	}
3479 
3480 	vm_shared_region_unlock();
3481 #endif /* __has_feature(ptrauth_calls) */
3482 }
3483 
3484 /*
3485  * report if the task is using a reslide shared cache region.
3486  */
3487 bool
vm_shared_region_is_reslide(__unused struct task * task)3488 vm_shared_region_is_reslide(__unused struct task *task)
3489 {
3490 	bool is_reslide = FALSE;
3491 #if __has_feature(ptrauth_calls)
3492 	vm_shared_region_t sr = vm_shared_region_get(task);
3493 
3494 	if (sr != NULL) {
3495 		is_reslide = sr->sr_reslide;
3496 		vm_shared_region_deallocate(sr);
3497 	}
3498 #endif /* __has_feature(ptrauth_calls) */
3499 	return is_reslide;
3500 }
3501 
3502 /*
3503  * This is called from powermanagement code to let kernel know the current source of power.
3504  * 0 if it is external source (connected to power )
3505  * 1 if it is internal power source ie battery
3506  */
3507 void
3508 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3509 post_sys_powersource(int i)
3510 #else /* XNU_TARGET_OS_OSX */
3511 post_sys_powersource(__unused int i)
3512 #endif /* XNU_TARGET_OS_OSX */
3513 {
3514 #if XNU_TARGET_OS_OSX
3515 	post_sys_powersource_internal(i, 0);
3516 #endif /* XNU_TARGET_OS_OSX */
3517 }
3518 
3519 
3520 #if XNU_TARGET_OS_OSX
3521 static void
post_sys_powersource_internal(int i,int internal)3522 post_sys_powersource_internal(int i, int internal)
3523 {
3524 	if (internal == 0) {
3525 		__system_power_source = i;
3526 	}
3527 }
3528 #endif /* XNU_TARGET_OS_OSX */
3529 
3530 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3531 vm_shared_region_root_dir(
3532 	struct vm_shared_region *sr)
3533 {
3534 	void *vnode;
3535 
3536 	vm_shared_region_lock();
3537 	vnode = sr->sr_root_dir;
3538 	vm_shared_region_unlock();
3539 	return vnode;
3540 }
3541