xref: /xnu-11215.1.10/osfmk/vm/vm_shared_region.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http://www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * Shared region (... and comm page)
26  *
27  * This file handles the VM shared region and comm page.
28  *
29  */
30 /*
31  * SHARED REGIONS
32  * --------------
33  *
34  * A shared region is a submap that contains the most common system shared
35  * libraries for a given environment which is defined by:
36  * - cpu-type
37  * - 64-bitness
38  * - root directory
39  * - Team ID - when we have pointer authentication.
40  *
41  * The point of a shared region is to reduce the setup overhead when exec'ing
42  * a new process. A shared region uses a shared VM submap that gets mapped
43  * automatically at exec() time, see vm_map_exec().  The first process of a given
44  * environment sets up the shared region and all further processes in that
45  * environment can re-use that shared region without having to re-create
46  * the same mappings in their VM map.  All they need is contained in the shared
47  * region.
48  *
49  * The region can also share a pmap (mostly for read-only parts but also for the
50  * initial version of some writable parts), which gets "nested" into the
51  * process's pmap.  This reduces the number of soft faults:  once one process
52  * brings in a page in the shared region, all the other processes can access
53  * it without having to enter it in their own pmap.
54  *
55  * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56  * to map the appropriate shared region in the process's address space.
57  * We look up the appropriate shared region for the process's environment.
58  * If we can't find one, we create a new (empty) one and add it to the list.
59  * Otherwise, we just take an extra reference on the shared region we found.
60  *
61  * The "dyld" runtime, mapped into the process's address space at exec() time,
62  * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63  * system calls to validate and/or populate the shared region with the
64  * appropriate dyld_shared_cache file.
65  *
66  * The shared region is inherited on fork() and the child simply takes an
67  * extra reference on its parent's shared region.
68  *
69  * When the task terminates, we release the reference on its shared region.
70  * When the last reference is released, we destroy the shared region.
71  *
72  * After a chroot(), the calling process keeps using its original shared region,
73  * since that's what was mapped when it was started.  But its children
74  * will use a different shared region, because they need to use the shared
75  * cache that's relative to the new root directory.
76  */
77 
78 /*
79  * COMM PAGE
80  *
81  * A "comm page" is an area of memory that is populated by the kernel with
82  * the appropriate platform-specific version of some commonly used code.
83  * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84  * for the native cpu-type.  No need to overly optimize translated code
85  * for hardware that is not really there !
86  *
87  * The comm pages are created and populated at boot time.
88  *
89  * The appropriate comm page is mapped into a process's address space
90  * at exec() time, in vm_map_exec(). It is then inherited on fork().
91  *
92  * The comm page is shared between the kernel and all applications of
93  * a given platform. Only the kernel can modify it.
94  *
95  * Applications just branch to fixed addresses in the comm page and find
96  * the right version of the code for the platform.  There is also some
97  * data provided and updated by the kernel for processes to retrieve easily
98  * without having to do a system call.
99  */
100 
101 #include <debug.h>
102 
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106 
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109 
110 #include <vm/vm_map_internal.h>
111 #include <vm/vm_memory_entry_xnu.h>
112 #include <vm/vm_shared_region_internal.h>
113 #include <vm/vm_kern_xnu.h>
114 #include <vm/memory_object_internal.h>
115 #include <vm/vm_protos_internal.h>
116 #include <vm/vm_object_internal.h>
117 
118 #include <machine/commpage.h>
119 #include <machine/cpu_capabilities.h>
120 #include <sys/random.h>
121 #include <sys/errno.h>
122 
123 #if defined(__arm64__)
124 #include <arm/cpu_data_internal.h>
125 #include <arm/misc_protos.h>
126 #endif
127 
128 /*
129  * the following codes are used in the  subclass
130  * of the DBG_MACH_SHAREDREGION class
131  */
132 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
133 
134 #if __has_feature(ptrauth_calls)
135 #include <ptrauth.h>
136 #endif /* __has_feature(ptrauth_calls) */
137 
138 /* "dyld" uses this to figure out what the kernel supports */
139 int shared_region_version = 3;
140 
141 /* trace level, output is sent to the system log file */
142 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
143 
144 /* should local (non-chroot) shared regions persist when no task uses them ? */
145 int shared_region_persistence = 0;      /* no by default */
146 
147 
148 /* delay in seconds before reclaiming an unused shared region */
149 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
150 
151 /*
152  * Cached pointer to the most recently mapped shared region from PID 1, which should
153  * be the most commonly mapped shared region in the system.  There are many processes
154  * which do not use this, for a variety of reasons.
155  *
156  * The main consumer of this is stackshot.
157  */
158 struct vm_shared_region *primary_system_shared_region = NULL;
159 
160 #if XNU_TARGET_OS_OSX
161 /*
162  * Only one cache gets to slide on Desktop, since we can't
163  * tear down slide info properly today and the desktop actually
164  * produces lots of shared caches.
165  */
166 boolean_t shared_region_completed_slide = FALSE;
167 #endif /* XNU_TARGET_OS_OSX */
168 
169 /* this lock protects all the shared region data structures */
170 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
171 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
172 
173 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
174 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
175 #define vm_shared_region_sleep(event, interruptible)                    \
176 	lck_mtx_sleep_with_inheritor(&vm_shared_region_lock,            \
177 	              LCK_SLEEP_DEFAULT,                                \
178 	              (event_t) (event),                                \
179 	              *(event),                                         \
180 	              (interruptible) | THREAD_WAIT_NOREPORT,           \
181 	              TIMEOUT_WAIT_FOREVER)
182 #define vm_shared_region_wakeup(event)                                  \
183 	wakeup_all_with_inheritor((event), THREAD_AWAKENED)
184 
185 /* the list of currently available shared regions (one per environment) */
186 queue_head_t    vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
187 int             vm_shared_region_count = 0;
188 int             vm_shared_region_peak = 0;
189 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
190 
191 /*
192  * the number of times an event has forced the recalculation of the reslide
193  * shared region slide.
194  */
195 #if __has_feature(ptrauth_calls)
196 int                             vm_shared_region_reslide_count = 0;
197 #endif /* __has_feature(ptrauth_calls) */
198 
199 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
200 static vm_shared_region_t vm_shared_region_create(
201 	void          *root_dir,
202 	cpu_type_t    cputype,
203 	cpu_subtype_t cpu_subtype,
204 	boolean_t     is_64bit,
205 	int           target_page_shift,
206 	boolean_t     reslide,
207 	boolean_t     is_driverkit,
208 	uint32_t      rsr_version);
209 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
210 
211 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
212 static void vm_shared_region_timeout(thread_call_param_t param0,
213     thread_call_param_t param1);
214 static kern_return_t vm_shared_region_slide_mapping(
215 	vm_shared_region_t sr,
216 	user_addr_t        slide_info_addr,
217 	mach_vm_size_t     slide_info_size,
218 	mach_vm_offset_t   start,
219 	mach_vm_size_t     size,
220 	mach_vm_offset_t   slid_mapping,
221 	uint32_t           slide,
222 	memory_object_control_t,
223 	vm_prot_t          prot); /* forward */
224 
225 static int __commpage_setup = 0;
226 #if XNU_TARGET_OS_OSX
227 static int __system_power_source = 1;   /* init to extrnal power source */
228 static void post_sys_powersource_internal(int i, int internal);
229 #endif /* XNU_TARGET_OS_OSX */
230 
231 extern u_int32_t random(void);
232 
233 /*
234  * Retrieve a task's shared region and grab an extra reference to
235  * make sure it doesn't disappear while the caller is using it.
236  * The caller is responsible for consuming that extra reference if
237  * necessary.
238  */
239 vm_shared_region_t
vm_shared_region_get(task_t task)240 vm_shared_region_get(
241 	task_t          task)
242 {
243 	vm_shared_region_t      shared_region;
244 
245 	SHARED_REGION_TRACE_DEBUG(
246 		("shared_region: -> get(%p)\n",
247 		(void *)VM_KERNEL_ADDRPERM(task)));
248 
249 	task_lock(task);
250 	vm_shared_region_lock();
251 	shared_region = task->shared_region;
252 	if (shared_region) {
253 		assert(shared_region->sr_ref_count > 0);
254 		vm_shared_region_reference_locked(shared_region);
255 	}
256 	vm_shared_region_unlock();
257 	task_unlock(task);
258 
259 	SHARED_REGION_TRACE_DEBUG(
260 		("shared_region: get(%p) <- %p\n",
261 		(void *)VM_KERNEL_ADDRPERM(task),
262 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
263 
264 	return shared_region;
265 }
266 
267 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)268 vm_shared_region_vm_map(
269 	vm_shared_region_t      shared_region)
270 {
271 	ipc_port_t              sr_handle;
272 	vm_named_entry_t        sr_mem_entry;
273 	vm_map_t                sr_map;
274 
275 	SHARED_REGION_TRACE_DEBUG(
276 		("shared_region: -> vm_map(%p)\n",
277 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
278 	assert(shared_region->sr_ref_count > 0);
279 
280 	sr_handle = shared_region->sr_mem_entry;
281 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
282 	sr_map = sr_mem_entry->backing.map;
283 	assert(sr_mem_entry->is_sub_map);
284 
285 	SHARED_REGION_TRACE_DEBUG(
286 		("shared_region: vm_map(%p) <- %p\n",
287 		(void *)VM_KERNEL_ADDRPERM(shared_region),
288 		(void *)VM_KERNEL_ADDRPERM(sr_map)));
289 	return sr_map;
290 }
291 
292 /*
293  * Set the shared region the process should use.
294  * A NULL new shared region means that we just want to release the old
295  * shared region.
296  * The caller should already have an extra reference on the new shared region
297  * (if any).  We release a reference on the old shared region (if any).
298  */
299 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)300 vm_shared_region_set(
301 	task_t                  task,
302 	vm_shared_region_t      new_shared_region)
303 {
304 	vm_shared_region_t      old_shared_region;
305 
306 	SHARED_REGION_TRACE_DEBUG(
307 		("shared_region: -> set(%p, %p)\n",
308 		(void *)VM_KERNEL_ADDRPERM(task),
309 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
310 
311 	task_lock(task);
312 	vm_shared_region_lock();
313 
314 	old_shared_region = task->shared_region;
315 	if (new_shared_region) {
316 		assert(new_shared_region->sr_ref_count > 0);
317 	}
318 
319 	task->shared_region = new_shared_region;
320 
321 	vm_shared_region_unlock();
322 	task_unlock(task);
323 
324 	if (old_shared_region) {
325 		assert(old_shared_region->sr_ref_count > 0);
326 		vm_shared_region_deallocate(old_shared_region);
327 	}
328 
329 	SHARED_REGION_TRACE_DEBUG(
330 		("shared_region: set(%p) <- old=%p new=%p\n",
331 		(void *)VM_KERNEL_ADDRPERM(task),
332 		(void *)VM_KERNEL_ADDRPERM(old_shared_region),
333 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
334 }
335 
336 /*
337  * New arm64 shared regions match with an existing arm64e region.
338  * They just get a private non-authenticating pager.
339  */
340 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)341 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
342 {
343 	if (exist == new) {
344 		return true;
345 	}
346 	if (cputype == CPU_TYPE_ARM64 &&
347 	    exist == CPU_SUBTYPE_ARM64E &&
348 	    new == CPU_SUBTYPE_ARM64_ALL) {
349 		return true;
350 	}
351 	return false;
352 }
353 
354 
355 /*
356  * Lookup up the shared region for the desired environment.
357  * If none is found, create a new (empty) one.
358  * Grab an extra reference on the returned shared region, to make sure
359  * it doesn't get destroyed before the caller is done with it.  The caller
360  * is responsible for consuming that extra reference if necessary.
361  */
362 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)363 vm_shared_region_lookup(
364 	void            *root_dir,
365 	cpu_type_t      cputype,
366 	cpu_subtype_t   cpu_subtype,
367 	boolean_t       is_64bit,
368 	int             target_page_shift,
369 	boolean_t       reslide,
370 	boolean_t       is_driverkit,
371 	uint32_t        rsr_version)
372 {
373 	vm_shared_region_t      shared_region;
374 	vm_shared_region_t      new_shared_region;
375 
376 	SHARED_REGION_TRACE_DEBUG(
377 		("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
378 		(void *)VM_KERNEL_ADDRPERM(root_dir),
379 		cputype, cpu_subtype, is_64bit, target_page_shift,
380 		reslide, is_driverkit));
381 
382 	shared_region = NULL;
383 	new_shared_region = NULL;
384 
385 	vm_shared_region_lock();
386 	for (;;) {
387 		queue_iterate(&vm_shared_region_queue,
388 		    shared_region,
389 		    vm_shared_region_t,
390 		    sr_q) {
391 			assert(shared_region->sr_ref_count > 0);
392 			if (shared_region->sr_cpu_type == cputype &&
393 			    match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
394 			    shared_region->sr_root_dir == root_dir &&
395 			    shared_region->sr_64bit == is_64bit &&
396 #if __ARM_MIXED_PAGE_SIZE__
397 			    shared_region->sr_page_shift == target_page_shift &&
398 #endif /* __ARM_MIXED_PAGE_SIZE__ */
399 #if __has_feature(ptrauth_calls)
400 			    shared_region->sr_reslide == reslide &&
401 #endif /* __has_feature(ptrauth_calls) */
402 			    shared_region->sr_driverkit == is_driverkit &&
403 			    shared_region->sr_rsr_version == rsr_version &&
404 			    !shared_region->sr_stale) {
405 				/* found a match ! */
406 				vm_shared_region_reference_locked(shared_region);
407 				goto done;
408 			}
409 		}
410 		if (new_shared_region == NULL) {
411 			/* no match: create a new one */
412 			vm_shared_region_unlock();
413 			new_shared_region = vm_shared_region_create(root_dir,
414 			    cputype,
415 			    cpu_subtype,
416 			    is_64bit,
417 			    target_page_shift,
418 			    reslide,
419 			    is_driverkit,
420 			    rsr_version);
421 			/* do the lookup again, in case we lost a race */
422 			vm_shared_region_lock();
423 			continue;
424 		}
425 		/* still no match: use our new one */
426 		shared_region = new_shared_region;
427 		new_shared_region = NULL;
428 		uint32_t newid = ++vm_shared_region_lastid;
429 		if (newid == 0) {
430 			panic("shared_region: vm_shared_region_lastid wrapped");
431 		}
432 		shared_region->sr_id = newid;
433 		shared_region->sr_install_time = mach_absolute_time();
434 		queue_enter(&vm_shared_region_queue,
435 		    shared_region,
436 		    vm_shared_region_t,
437 		    sr_q);
438 		vm_shared_region_count++;
439 		if (vm_shared_region_count > vm_shared_region_peak) {
440 			vm_shared_region_peak = vm_shared_region_count;
441 		}
442 		break;
443 	}
444 
445 done:
446 	vm_shared_region_unlock();
447 
448 	if (new_shared_region) {
449 		/*
450 		 * We lost a race with someone else to create a new shared
451 		 * region for that environment. Get rid of our unused one.
452 		 */
453 		assert(new_shared_region->sr_ref_count == 1);
454 		new_shared_region->sr_ref_count--;
455 		vm_shared_region_destroy(new_shared_region);
456 		new_shared_region = NULL;
457 	}
458 
459 	SHARED_REGION_TRACE_DEBUG(
460 		("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
461 		(void *)VM_KERNEL_ADDRPERM(root_dir),
462 		cputype, cpu_subtype, is_64bit, target_page_shift,
463 		reslide, is_driverkit,
464 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
465 
466 	assert(shared_region->sr_ref_count > 0);
467 	return shared_region;
468 }
469 
470 /*
471  * Take an extra reference on a shared region.
472  * The vm_shared_region_lock should already be held by the caller.
473  */
474 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)475 vm_shared_region_reference_locked(
476 	vm_shared_region_t      shared_region)
477 {
478 	LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
479 
480 	SHARED_REGION_TRACE_DEBUG(
481 		("shared_region: -> reference_locked(%p)\n",
482 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
483 	assert(shared_region->sr_ref_count > 0);
484 	shared_region->sr_ref_count++;
485 	assert(shared_region->sr_ref_count != 0);
486 
487 	if (shared_region->sr_timer_call != NULL) {
488 		boolean_t cancelled;
489 
490 		/* cancel and free any pending timeout */
491 		cancelled = thread_call_cancel(shared_region->sr_timer_call);
492 		if (cancelled) {
493 			thread_call_free(shared_region->sr_timer_call);
494 			shared_region->sr_timer_call = NULL;
495 			/* release the reference held by the cancelled timer */
496 			shared_region->sr_ref_count--;
497 		} else {
498 			/* the timer will drop the reference and free itself */
499 		}
500 	}
501 
502 	SHARED_REGION_TRACE_DEBUG(
503 		("shared_region: reference_locked(%p) <- %d\n",
504 		(void *)VM_KERNEL_ADDRPERM(shared_region),
505 		shared_region->sr_ref_count));
506 }
507 
508 /*
509  * Take a reference on a shared region.
510  */
511 void
vm_shared_region_reference(vm_shared_region_t shared_region)512 vm_shared_region_reference(vm_shared_region_t shared_region)
513 {
514 	SHARED_REGION_TRACE_DEBUG(
515 		("shared_region: -> reference(%p)\n",
516 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
517 
518 	vm_shared_region_lock();
519 	vm_shared_region_reference_locked(shared_region);
520 	vm_shared_region_unlock();
521 
522 	SHARED_REGION_TRACE_DEBUG(
523 		("shared_region: reference(%p) <- %d\n",
524 		(void *)VM_KERNEL_ADDRPERM(shared_region),
525 		shared_region->sr_ref_count));
526 }
527 
528 /*
529  * Release a reference on the shared region.
530  * Destroy it if there are no references left.
531  */
532 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)533 vm_shared_region_deallocate(
534 	vm_shared_region_t      shared_region)
535 {
536 	SHARED_REGION_TRACE_DEBUG(
537 		("shared_region: -> deallocate(%p)\n",
538 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
539 
540 	vm_shared_region_lock();
541 
542 	assert(shared_region->sr_ref_count > 0);
543 
544 	if (shared_region->sr_root_dir == NULL) {
545 		/*
546 		 * Local (i.e. based on the boot volume) shared regions
547 		 * can persist or not based on the "shared_region_persistence"
548 		 * sysctl.
549 		 * Make sure that this one complies.
550 		 *
551 		 * See comments in vm_shared_region_slide() for notes about
552 		 * shared regions we have slid (which are not torn down currently).
553 		 */
554 		if (shared_region_persistence &&
555 		    !shared_region->sr_persists) {
556 			/* make this one persistent */
557 			shared_region->sr_ref_count++;
558 			shared_region->sr_persists = TRUE;
559 		} else if (!shared_region_persistence &&
560 		    shared_region->sr_persists) {
561 			/* make this one no longer persistent */
562 			assert(shared_region->sr_ref_count > 1);
563 			shared_region->sr_ref_count--;
564 			shared_region->sr_persists = FALSE;
565 		}
566 	}
567 
568 	assert(shared_region->sr_ref_count > 0);
569 	shared_region->sr_ref_count--;
570 	SHARED_REGION_TRACE_DEBUG(
571 		("shared_region: deallocate(%p): ref now %d\n",
572 		(void *)VM_KERNEL_ADDRPERM(shared_region),
573 		shared_region->sr_ref_count));
574 
575 	if (shared_region->sr_ref_count == 0) {
576 		uint64_t deadline;
577 
578 		/*
579 		 * Even though a shared region is unused, delay a while before
580 		 * tearing it down, in case a new app launch can use it.
581 		 * We don't keep around stale shared regions, nor older RSR ones.
582 		 */
583 		if (shared_region->sr_timer_call == NULL &&
584 		    shared_region_destroy_delay != 0 &&
585 		    !shared_region->sr_stale &&
586 		    !(shared_region->sr_rsr_version != 0 &&
587 		    shared_region->sr_rsr_version != rsr_get_version())) {
588 			/* hold one reference for the timer */
589 			assert(!shared_region->sr_mapping_in_progress);
590 			shared_region->sr_ref_count++;
591 
592 			/* set up the timer */
593 			shared_region->sr_timer_call = thread_call_allocate(
594 				(thread_call_func_t) vm_shared_region_timeout,
595 				(thread_call_param_t) shared_region);
596 
597 			/* schedule the timer */
598 			clock_interval_to_deadline(shared_region_destroy_delay,
599 			    NSEC_PER_SEC,
600 			    &deadline);
601 			thread_call_enter_delayed(shared_region->sr_timer_call,
602 			    deadline);
603 
604 			SHARED_REGION_TRACE_DEBUG(
605 				("shared_region: deallocate(%p): armed timer\n",
606 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
607 
608 			vm_shared_region_unlock();
609 		} else {
610 			/* timer expired: let go of this shared region */
611 
612 			/* Make sure there's no cached pointer to the region. */
613 			if (primary_system_shared_region == shared_region) {
614 				primary_system_shared_region = NULL;
615 			}
616 
617 			/*
618 			 * Remove it from the queue first, so no one can find
619 			 * it...
620 			 */
621 			queue_remove(&vm_shared_region_queue,
622 			    shared_region,
623 			    vm_shared_region_t,
624 			    sr_q);
625 			vm_shared_region_count--;
626 			vm_shared_region_unlock();
627 
628 			/* ... and destroy it */
629 			vm_shared_region_destroy(shared_region);
630 			shared_region = NULL;
631 		}
632 	} else {
633 		vm_shared_region_unlock();
634 	}
635 
636 	SHARED_REGION_TRACE_DEBUG(
637 		("shared_region: deallocate(%p) <-\n",
638 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
639 }
640 
641 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)642 vm_shared_region_timeout(
643 	thread_call_param_t     param0,
644 	__unused thread_call_param_t    param1)
645 {
646 	vm_shared_region_t      shared_region;
647 
648 	shared_region = (vm_shared_region_t) param0;
649 
650 	vm_shared_region_deallocate(shared_region);
651 }
652 
653 
654 /*
655  * Create a new (empty) shared region for a new environment.
656  */
657 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)658 vm_shared_region_create(
659 	void                    *root_dir,
660 	cpu_type_t              cputype,
661 	cpu_subtype_t           cpu_subtype,
662 	boolean_t               is_64bit,
663 	int                     target_page_shift,
664 #if !__has_feature(ptrauth_calls)
665 	__unused
666 #endif /* __has_feature(ptrauth_calls) */
667 	boolean_t               reslide,
668 	boolean_t               is_driverkit,
669 	uint32_t                rsr_version)
670 {
671 	vm_named_entry_t        mem_entry;
672 	ipc_port_t              mem_entry_port;
673 	vm_shared_region_t      shared_region;
674 	vm_map_t                sub_map;
675 	mach_vm_offset_t        base_address, pmap_nesting_start;
676 	mach_vm_size_t          size, pmap_nesting_size;
677 
678 	SHARED_REGION_TRACE_INFO(
679 		("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
680 		(void *)VM_KERNEL_ADDRPERM(root_dir),
681 		cputype, cpu_subtype, is_64bit, target_page_shift,
682 		reslide, is_driverkit));
683 
684 	base_address = 0;
685 	size = 0;
686 	mem_entry = NULL;
687 	mem_entry_port = IPC_PORT_NULL;
688 	sub_map = VM_MAP_NULL;
689 
690 	/* create a new shared region structure... */
691 	shared_region = kalloc_type(struct vm_shared_region,
692 	    Z_WAITOK | Z_NOFAIL);
693 
694 	/* figure out the correct settings for the desired environment */
695 	if (is_64bit) {
696 		switch (cputype) {
697 #if defined(__arm64__)
698 		case CPU_TYPE_ARM64:
699 			base_address = SHARED_REGION_BASE_ARM64;
700 			size = SHARED_REGION_SIZE_ARM64;
701 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
702 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
703 			break;
704 #else
705 		case CPU_TYPE_I386:
706 			base_address = SHARED_REGION_BASE_X86_64;
707 			size = SHARED_REGION_SIZE_X86_64;
708 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
709 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
710 			break;
711 		case CPU_TYPE_POWERPC:
712 			base_address = SHARED_REGION_BASE_PPC64;
713 			size = SHARED_REGION_SIZE_PPC64;
714 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
715 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
716 			break;
717 #endif
718 		default:
719 			SHARED_REGION_TRACE_ERROR(
720 				("shared_region: create: unknown cpu type %d\n",
721 				cputype));
722 			kfree_type(struct vm_shared_region, shared_region);
723 			shared_region = NULL;
724 			goto done;
725 		}
726 	} else {
727 		switch (cputype) {
728 #if defined(__arm64__)
729 		case CPU_TYPE_ARM:
730 			base_address = SHARED_REGION_BASE_ARM;
731 			size = SHARED_REGION_SIZE_ARM;
732 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
733 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
734 			break;
735 #else
736 		case CPU_TYPE_I386:
737 			base_address = SHARED_REGION_BASE_I386;
738 			size = SHARED_REGION_SIZE_I386;
739 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
740 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
741 			break;
742 		case CPU_TYPE_POWERPC:
743 			base_address = SHARED_REGION_BASE_PPC;
744 			size = SHARED_REGION_SIZE_PPC;
745 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
746 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
747 			break;
748 #endif
749 		default:
750 			SHARED_REGION_TRACE_ERROR(
751 				("shared_region: create: unknown cpu type %d\n",
752 				cputype));
753 			kfree_type(struct vm_shared_region, shared_region);
754 			shared_region = NULL;
755 			goto done;
756 		}
757 	}
758 
759 	/* create a memory entry structure and a Mach port handle */
760 	mem_entry = mach_memory_entry_allocate(&mem_entry_port);
761 
762 #if defined(__arm64__)
763 	{
764 		struct pmap *pmap_nested;
765 		int pmap_flags = 0;
766 		pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
767 
768 
769 #if __ARM_MIXED_PAGE_SIZE__
770 		if (cputype == CPU_TYPE_ARM64 &&
771 		    target_page_shift == FOURK_PAGE_SHIFT) {
772 			/* arm64/4k address space */
773 			pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
774 		}
775 #endif /* __ARM_MIXED_PAGE_SIZE__ */
776 
777 		pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
778 		if (pmap_nested != PMAP_NULL) {
779 			pmap_set_nested(pmap_nested);
780 			sub_map = vm_map_create_options(pmap_nested, 0,
781 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
782 
783 			if (is_64bit ||
784 			    page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
785 				/* enforce 16KB alignment of VM map entries */
786 				vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
787 			}
788 #if __ARM_MIXED_PAGE_SIZE__
789 			if (cputype == CPU_TYPE_ARM64 &&
790 			    target_page_shift == FOURK_PAGE_SHIFT) {
791 				/* arm64/4k address space */
792 				vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
793 			}
794 #endif /* __ARM_MIXED_PAGE_SIZE__ */
795 		} else {
796 			sub_map = VM_MAP_NULL;
797 		}
798 	}
799 #else /* defined(__arm64__) */
800 	{
801 		/* create a VM sub map and its pmap */
802 		pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
803 		if (pmap != NULL) {
804 			sub_map = vm_map_create_options(pmap, 0,
805 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
806 		} else {
807 			sub_map = VM_MAP_NULL;
808 		}
809 	}
810 #endif /* defined(__arm64__) */
811 	if (sub_map == VM_MAP_NULL) {
812 		ipc_port_release_send(mem_entry_port);
813 		kfree_type(struct vm_shared_region, shared_region);
814 		shared_region = NULL;
815 		SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
816 		goto done;
817 	}
818 
819 	/* shared regions should always enforce code-signing */
820 	vm_map_cs_enforcement_set(sub_map, true);
821 	assert(vm_map_cs_enforcement(sub_map));
822 	assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
823 
824 	assert(!sub_map->disable_vmentry_reuse);
825 	sub_map->is_nested_map = TRUE;
826 
827 	/* make the memory entry point to the VM sub map */
828 	mem_entry->is_sub_map = TRUE;
829 	mem_entry->backing.map = sub_map;
830 	mem_entry->size = size;
831 	mem_entry->protection = VM_PROT_ALL;
832 
833 	/* make the shared region point at the memory entry */
834 	shared_region->sr_mem_entry = mem_entry_port;
835 
836 	/* fill in the shared region's environment and settings */
837 	shared_region->sr_base_address = base_address;
838 	shared_region->sr_size = size;
839 	shared_region->sr_pmap_nesting_start = pmap_nesting_start;
840 	shared_region->sr_pmap_nesting_size = pmap_nesting_size;
841 	shared_region->sr_cpu_type = cputype;
842 	shared_region->sr_cpu_subtype = cpu_subtype;
843 	shared_region->sr_64bit = (uint8_t)is_64bit;
844 #if __ARM_MIXED_PAGE_SIZE__
845 	shared_region->sr_page_shift = (uint8_t)target_page_shift;
846 #endif /* __ARM_MIXED_PAGE_SIZE__ */
847 	shared_region->sr_driverkit = (uint8_t)is_driverkit;
848 	shared_region->sr_rsr_version = rsr_version;
849 	shared_region->sr_root_dir = root_dir;
850 
851 	queue_init(&shared_region->sr_q);
852 	shared_region->sr_mapping_in_progress = THREAD_NULL;
853 	shared_region->sr_slide_in_progress = THREAD_NULL;
854 	shared_region->sr_persists = FALSE;
855 	shared_region->sr_stale = FALSE;
856 	shared_region->sr_timer_call = NULL;
857 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
858 
859 	/* grab a reference for the caller */
860 	shared_region->sr_ref_count = 1;
861 
862 	shared_region->sr_slide = 0; /* not slid yet */
863 
864 	/* Initialize UUID and other metadata */
865 	memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
866 	shared_region->sr_uuid_copied = FALSE;
867 	shared_region->sr_images_count = 0;
868 	shared_region->sr_images = NULL;
869 #if __has_feature(ptrauth_calls)
870 	shared_region->sr_reslide = reslide;
871 	shared_region->sr_num_auth_section = 0;
872 	shared_region->sr_next_auth_section = 0;
873 	shared_region->sr_auth_section = NULL;
874 #endif /* __has_feature(ptrauth_calls) */
875 
876 done:
877 	if (shared_region) {
878 		SHARED_REGION_TRACE_INFO(
879 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
880 			"base=0x%llx,size=0x%llx) <- "
881 			"%p mem=(%p,%p) map=%p pmap=%p\n",
882 			(void *)VM_KERNEL_ADDRPERM(root_dir),
883 			cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
884 			(long long)base_address,
885 			(long long)size,
886 			(void *)VM_KERNEL_ADDRPERM(shared_region),
887 			(void *)VM_KERNEL_ADDRPERM(mem_entry_port),
888 			(void *)VM_KERNEL_ADDRPERM(mem_entry),
889 			(void *)VM_KERNEL_ADDRPERM(sub_map),
890 			(void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
891 	} else {
892 		SHARED_REGION_TRACE_INFO(
893 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
894 			"base=0x%llx,size=0x%llx) <- NULL",
895 			(void *)VM_KERNEL_ADDRPERM(root_dir),
896 			cputype, cpu_subtype, is_64bit, is_driverkit,
897 			(long long)base_address,
898 			(long long)size));
899 	}
900 	return shared_region;
901 }
902 
903 /*
904  * Destroy a now-unused shared region.
905  * The shared region is no longer in the queue and can not be looked up.
906  */
907 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)908 vm_shared_region_destroy(
909 	vm_shared_region_t      shared_region)
910 {
911 	vm_named_entry_t        mem_entry;
912 	vm_map_t                map;
913 
914 	SHARED_REGION_TRACE_INFO(
915 		("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
916 		(void *)VM_KERNEL_ADDRPERM(shared_region),
917 		(void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
918 		shared_region->sr_cpu_type,
919 		shared_region->sr_cpu_subtype,
920 		shared_region->sr_64bit,
921 		shared_region->sr_driverkit));
922 
923 	assert(shared_region->sr_ref_count == 0);
924 	assert(!shared_region->sr_persists);
925 
926 	mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
927 	assert(mem_entry->is_sub_map);
928 	assert(!mem_entry->internal);
929 	assert(!mem_entry->is_copy);
930 	map = mem_entry->backing.map;
931 
932 	/*
933 	 * Clean up the pmap first.  The virtual addresses that were
934 	 * entered in this possibly "nested" pmap may have different values
935 	 * than the VM map's min and max offsets, if the VM sub map was
936 	 * mapped at a non-zero offset in the processes' main VM maps, which
937 	 * is usually the case, so the clean-up we do in vm_map_destroy() would
938 	 * not be enough.
939 	 */
940 	if (map->pmap) {
941 		pmap_remove(map->pmap,
942 		    (vm_map_offset_t)shared_region->sr_base_address,
943 		    (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
944 	}
945 
946 	/*
947 	 * Release our (one and only) handle on the memory entry.
948 	 * This will generate a no-senders notification, which will be processed
949 	 * by ipc_kobject_notify_no_senders(), which will release the one and only
950 	 * reference on the memory entry and cause it to be destroyed, along
951 	 * with the VM sub map and its pmap.
952 	 */
953 	mach_memory_entry_port_release(shared_region->sr_mem_entry);
954 	mem_entry = NULL;
955 	shared_region->sr_mem_entry = IPC_PORT_NULL;
956 
957 	if (shared_region->sr_timer_call) {
958 		thread_call_free(shared_region->sr_timer_call);
959 	}
960 
961 #if __has_feature(ptrauth_calls)
962 	/*
963 	 * Free the cached copies of slide_info for the AUTH regions.
964 	 */
965 	for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
966 		vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
967 		if (si != NULL) {
968 			vm_object_deallocate(si->si_slide_object);
969 			kfree_data(si->si_slide_info_entry,
970 			    si->si_slide_info_size);
971 			kfree_type(struct vm_shared_region_slide_info, si);
972 			shared_region->sr_auth_section[i] = NULL;
973 		}
974 	}
975 	if (shared_region->sr_auth_section != NULL) {
976 		assert(shared_region->sr_num_auth_section > 0);
977 		kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
978 		shared_region->sr_auth_section = NULL;
979 		shared_region->sr_num_auth_section = 0;
980 	}
981 #endif /* __has_feature(ptrauth_calls) */
982 
983 	/* release the shared region structure... */
984 	kfree_type(struct vm_shared_region, shared_region);
985 
986 	SHARED_REGION_TRACE_DEBUG(
987 		("shared_region: destroy(%p) <-\n",
988 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
989 	shared_region = NULL;
990 }
991 
992 /*
993  * Gets the address of the first (in time) mapping in the shared region.
994  * If used during initial task setup by dyld, task should non-NULL.
995  */
996 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)997 vm_shared_region_start_address(
998 	vm_shared_region_t      shared_region,
999 	mach_vm_offset_t        *start_address,
1000 	task_t                  task)
1001 {
1002 	kern_return_t           kr;
1003 	mach_vm_offset_t        sr_base_address;
1004 	mach_vm_offset_t        sr_first_mapping;
1005 
1006 	SHARED_REGION_TRACE_DEBUG(
1007 		("shared_region: -> start_address(%p)\n",
1008 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1009 
1010 	vm_shared_region_lock();
1011 
1012 	/*
1013 	 * Wait if there's another thread establishing a mapping
1014 	 * in this shared region right when we're looking at it.
1015 	 * We want a consistent view of the map...
1016 	 */
1017 	while (shared_region->sr_mapping_in_progress) {
1018 		/* wait for our turn... */
1019 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1020 		    THREAD_UNINT);
1021 	}
1022 	assert(!shared_region->sr_mapping_in_progress);
1023 	assert(shared_region->sr_ref_count > 0);
1024 
1025 	sr_base_address = shared_region->sr_base_address;
1026 	sr_first_mapping = shared_region->sr_first_mapping;
1027 
1028 	if (sr_first_mapping == (mach_vm_offset_t) -1) {
1029 		/* shared region is empty */
1030 		kr = KERN_INVALID_ADDRESS;
1031 	} else {
1032 		kr = KERN_SUCCESS;
1033 		*start_address = sr_base_address + sr_first_mapping;
1034 	}
1035 
1036 
1037 	uint32_t slide = shared_region->sr_slide;
1038 
1039 	vm_shared_region_unlock();
1040 
1041 	/*
1042 	 * Cache shared region info in the task for telemetry gathering, if we're
1043 	 * passed in the task. No task lock here as we're still in intial task set up.
1044 	 */
1045 	if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1046 		uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1047 		if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1048 		    (char *)&task->task_shared_region_uuid,
1049 		    sizeof(task->task_shared_region_uuid)) == 0) {
1050 			task->task_shared_region_slide = slide;
1051 		}
1052 	}
1053 
1054 	SHARED_REGION_TRACE_DEBUG(
1055 		("shared_region: start_address(%p) <- 0x%llx\n",
1056 		(void *)VM_KERNEL_ADDRPERM(shared_region),
1057 		(long long)shared_region->sr_base_address));
1058 
1059 	return kr;
1060 }
1061 
1062 /*
1063  * Look up a pre-existing mapping in shared region, for replacement.
1064  * Takes an extra object reference if found.
1065  */
1066 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1067 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1068 {
1069 	vm_map_entry_t found;
1070 
1071 	/* find the shared region's map entry to slide */
1072 	vm_map_lock_read(map);
1073 	if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1074 		/* no mapping there */
1075 		vm_map_unlock(map);
1076 		return KERN_INVALID_ARGUMENT;
1077 	}
1078 
1079 	*entry = *found;
1080 	/* extra ref to keep object alive while map is unlocked */
1081 	vm_object_reference(VME_OBJECT(found));
1082 	vm_map_unlock_read(map);
1083 	return KERN_SUCCESS;
1084 }
1085 
1086 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1087 shared_region_make_permanent(
1088 	vm_shared_region_t sr,
1089 	vm_prot_t max_prot)
1090 {
1091 	if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1092 		return false;
1093 	}
1094 	if (max_prot & VM_PROT_WRITE) {
1095 		/*
1096 		 * Potentially writable mapping: no major issue with allowing
1097 		 * it to be replaced since its contents could be modified
1098 		 * anyway.
1099 		 */
1100 		return false;
1101 	}
1102 	if (max_prot & VM_PROT_EXECUTE) {
1103 		/*
1104 		 * Potentially executable mapping: some software might want
1105 		 * to try and replace it to interpose their own code when a
1106 		 * given routine is called or returns, for example.
1107 		 * So let's not make it "permanent".
1108 		 */
1109 		return false;
1110 	}
1111 	/*
1112 	 * Make this mapping "permanent" to prevent it from being deleted
1113 	 * and/or replaced with another mapping.
1114 	 */
1115 	return true;
1116 }
1117 
1118 static bool
shared_region_tpro_protect(vm_shared_region_t sr,vm_prot_t max_prot __unused)1119 shared_region_tpro_protect(
1120 	vm_shared_region_t sr,
1121 	vm_prot_t max_prot __unused)
1122 {
1123 	if (sr->sr_cpu_type != CPU_TYPE_ARM64 ||
1124 	    (sr->sr_cpu_subtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) {
1125 		return false;
1126 	}
1127 
1128 
1129 	/*
1130 	 * Unless otherwise explicitly requested all other mappings do not get
1131 	 * TPRO protection.
1132 	 */
1133 	return false;
1134 }
1135 
1136 #if __has_feature(ptrauth_calls)
1137 
1138 /*
1139  * Determine if this task is actually using pointer signing.
1140  */
1141 static boolean_t
task_sign_pointers(task_t task)1142 task_sign_pointers(task_t task)
1143 {
1144 	if (task->map &&
1145 	    task->map->pmap &&
1146 	    !task->map->pmap->disable_jop) {
1147 		return TRUE;
1148 	}
1149 	return FALSE;
1150 }
1151 
1152 /*
1153  * If the shared region contains mappings that are authenticated, then
1154  * remap them into the task private map.
1155  *
1156  * Failures are possible in this routine when jetsam kills a process
1157  * just as dyld is trying to set it up. The vm_map and task shared region
1158  * info get torn down w/o waiting for this thread to finish up.
1159  */
1160 __attribute__((noinline))
1161 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1162 vm_shared_region_auth_remap(vm_shared_region_t sr)
1163 {
1164 	memory_object_t               sr_pager = MEMORY_OBJECT_NULL;
1165 	task_t                        task = current_task();
1166 	vm_shared_region_slide_info_t si;
1167 	uint_t                        i;
1168 	vm_object_t                   object;
1169 	vm_map_t                      sr_map;
1170 	struct vm_map_entry           tmp_entry_store = {0};
1171 	vm_map_entry_t                tmp_entry = NULL;
1172 	vm_map_kernel_flags_t         vmk_flags;
1173 	vm_map_offset_t               map_addr;
1174 	kern_return_t                 kr = KERN_SUCCESS;
1175 	boolean_t                     use_ptr_auth = task_sign_pointers(task);
1176 
1177 	/*
1178 	 * Don't do this more than once and avoid any race conditions in finishing it.
1179 	 */
1180 	vm_shared_region_lock();
1181 	while (sr->sr_mapping_in_progress) {
1182 		/* wait for our turn... */
1183 		vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1184 	}
1185 	assert(!sr->sr_mapping_in_progress);
1186 	assert(sr->sr_ref_count > 0);
1187 
1188 	/* Just return if already done. */
1189 	if (task->shared_region_auth_remapped) {
1190 		vm_shared_region_unlock();
1191 		return KERN_SUCCESS;
1192 	}
1193 
1194 	/* let others know to wait while we're working in this shared region */
1195 	sr->sr_mapping_in_progress = current_thread();
1196 	vm_shared_region_unlock();
1197 
1198 	/*
1199 	 * Remap any sections with pointer authentications into the private map.
1200 	 */
1201 	for (i = 0; i < sr->sr_num_auth_section; ++i) {
1202 		si = sr->sr_auth_section[i];
1203 		assert(si != NULL);
1204 		assert(si->si_ptrauth);
1205 
1206 		/*
1207 		 * We have mapping that needs to be private.
1208 		 * Look for an existing slid mapping's pager with matching
1209 		 * object, offset, slide info and shared_region_id to reuse.
1210 		 */
1211 		object = si->si_slide_object;
1212 		sr_pager = shared_region_pager_match(object, si->si_start, si,
1213 		    use_ptr_auth ? task->jop_pid : 0);
1214 		if (sr_pager == MEMORY_OBJECT_NULL) {
1215 			printf("%s(): shared_region_pager_match() failed\n", __func__);
1216 			kr = KERN_FAILURE;
1217 			goto done;
1218 		}
1219 
1220 		/*
1221 		 * verify matching jop_pid for this task and this pager
1222 		 */
1223 		if (use_ptr_auth) {
1224 			shared_region_pager_match_task_key(sr_pager, task);
1225 		}
1226 
1227 		sr_map = vm_shared_region_vm_map(sr);
1228 		tmp_entry = NULL;
1229 
1230 		kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1231 		if (kr != KERN_SUCCESS) {
1232 			printf("%s(): find_mapping_to_slide() failed\n", __func__);
1233 			goto done;
1234 		}
1235 		tmp_entry = &tmp_entry_store;
1236 
1237 		/*
1238 		 * Check that the object exactly covers the region to slide.
1239 		 */
1240 		if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1241 			printf("%s(): doesn't fully cover\n", __func__);
1242 			kr = KERN_FAILURE;
1243 			goto done;
1244 		}
1245 
1246 		/*
1247 		 * map the pager over the portion of the mapping that needs sliding
1248 		 */
1249 		vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1250 		vmk_flags.vmkf_overwrite_immutable = true;
1251 		vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1252 		    tmp_entry->max_protection);
1253 
1254 		/* Preserve the TPRO flag if task has TPRO enabled */
1255 		vmk_flags.vmf_tpro = (vm_map_tpro(task->map) &&
1256 		    tmp_entry->used_for_tpro &&
1257 		    task_is_hardened_binary(task));
1258 
1259 		map_addr = si->si_slid_address;
1260 		kr = mach_vm_map_kernel(task->map,
1261 		    vm_sanitize_wrap_addr_ref(&map_addr),
1262 		    si->si_end - si->si_start,
1263 		    0,
1264 		    vmk_flags,
1265 		    (ipc_port_t)(uintptr_t) sr_pager,
1266 		    0,
1267 		    TRUE,
1268 		    tmp_entry->protection,
1269 		    tmp_entry->max_protection,
1270 		    tmp_entry->inheritance);
1271 		memory_object_deallocate(sr_pager);
1272 		sr_pager = MEMORY_OBJECT_NULL;
1273 		if (kr != KERN_SUCCESS) {
1274 			printf("%s(): mach_vm_map_kernel() failed\n", __func__);
1275 			goto done;
1276 		}
1277 		assertf(map_addr == si->si_slid_address,
1278 		    "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1279 		    (uint64_t)map_addr,
1280 		    (uint64_t)si->si_slid_address,
1281 		    tmp_entry);
1282 
1283 		/* Drop the ref count grabbed by find_mapping_to_slide */
1284 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1285 		tmp_entry = NULL;
1286 	}
1287 
1288 done:
1289 	if (tmp_entry) {
1290 		/* Drop the ref count grabbed by find_mapping_to_slide */
1291 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1292 		tmp_entry = NULL;
1293 	}
1294 
1295 	/*
1296 	 * Drop any extra reference to the pager in case we're quitting due to an error above.
1297 	 */
1298 	if (sr_pager != MEMORY_OBJECT_NULL) {
1299 		memory_object_deallocate(sr_pager);
1300 	}
1301 
1302 	/*
1303 	 * Mark the region as having it's auth sections remapped.
1304 	 */
1305 	vm_shared_region_lock();
1306 	task->shared_region_auth_remapped = TRUE;
1307 	assert(sr->sr_mapping_in_progress == current_thread());
1308 	sr->sr_mapping_in_progress = THREAD_NULL;
1309 	vm_shared_region_wakeup((event_t)&sr->sr_mapping_in_progress);
1310 	vm_shared_region_unlock();
1311 	return kr;
1312 }
1313 #endif /* __has_feature(ptrauth_calls) */
1314 
1315 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1316 vm_shared_region_undo_mappings(
1317 	vm_map_t                 sr_map,
1318 	mach_vm_offset_t         sr_base_address,
1319 	struct _sr_file_mappings *srf_mappings,
1320 	struct _sr_file_mappings *srf_mappings_current,
1321 	unsigned int             srf_current_mappings_count)
1322 {
1323 	unsigned int             j = 0;
1324 	vm_shared_region_t       shared_region = NULL;
1325 	boolean_t                reset_shared_region_state = FALSE;
1326 	struct _sr_file_mappings *srfmp;
1327 	unsigned int             mappings_count;
1328 	struct shared_file_mapping_slide_np *mappings;
1329 
1330 	shared_region = vm_shared_region_get(current_task());
1331 	if (shared_region == NULL) {
1332 		printf("Failed to undo mappings because of NULL shared region.\n");
1333 		return;
1334 	}
1335 
1336 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1337 
1338 	if (sr_map == NULL) {
1339 		ipc_port_t              sr_handle;
1340 		vm_named_entry_t        sr_mem_entry;
1341 
1342 		vm_shared_region_lock();
1343 		assert(shared_region->sr_ref_count > 0);
1344 
1345 		while (shared_region->sr_mapping_in_progress) {
1346 			/* wait for our turn... */
1347 			vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1348 			    THREAD_UNINT);
1349 		}
1350 		assert(!shared_region->sr_mapping_in_progress);
1351 		assert(shared_region->sr_ref_count > 0);
1352 		/* let others know we're working in this shared region */
1353 		shared_region->sr_mapping_in_progress = current_thread();
1354 
1355 		vm_shared_region_unlock();
1356 
1357 		reset_shared_region_state = TRUE;
1358 
1359 		/* no need to lock because this data is never modified... */
1360 		sr_handle = shared_region->sr_mem_entry;
1361 		sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1362 		sr_map = sr_mem_entry->backing.map;
1363 		sr_base_address = shared_region->sr_base_address;
1364 	}
1365 	/*
1366 	 * Undo the mappings we've established so far.
1367 	 */
1368 	for (srfmp = &srf_mappings[0];
1369 	    srfmp <= srf_mappings_current;
1370 	    srfmp++) {
1371 		mappings = srfmp->mappings;
1372 		mappings_count = srfmp->mappings_count;
1373 		if (srfmp == srf_mappings_current) {
1374 			mappings_count = srf_current_mappings_count;
1375 		}
1376 
1377 		for (j = 0; j < mappings_count; j++) {
1378 			kern_return_t kr2;
1379 			mach_vm_offset_t start, end;
1380 
1381 			if (mappings[j].sms_size == 0) {
1382 				/*
1383 				 * We didn't establish this
1384 				 * mapping, so nothing to undo.
1385 				 */
1386 				continue;
1387 			}
1388 			SHARED_REGION_TRACE_INFO(
1389 				("shared_region: mapping[%d]: "
1390 				"address:0x%016llx "
1391 				"size:0x%016llx "
1392 				"offset:0x%016llx "
1393 				"maxprot:0x%x prot:0x%x: "
1394 				"undoing...\n",
1395 				j,
1396 				(long long)mappings[j].sms_address,
1397 				(long long)mappings[j].sms_size,
1398 				(long long)mappings[j].sms_file_offset,
1399 				mappings[j].sms_max_prot,
1400 				mappings[j].sms_init_prot));
1401 			start = (mappings[j].sms_address - sr_base_address);
1402 			end = start + mappings[j].sms_size;
1403 			start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1404 			end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1405 			kr2 = vm_map_remove_guard(sr_map,
1406 			    start,
1407 			    end,
1408 			    VM_MAP_REMOVE_IMMUTABLE,
1409 			    KMEM_GUARD_NONE).kmr_return;
1410 			assert(kr2 == KERN_SUCCESS);
1411 		}
1412 	}
1413 
1414 	if (reset_shared_region_state) {
1415 		vm_shared_region_lock();
1416 		assert(shared_region->sr_ref_count > 0);
1417 		assert(shared_region->sr_mapping_in_progress == current_thread());
1418 		/* we're done working on that shared region */
1419 		shared_region->sr_mapping_in_progress = THREAD_NULL;
1420 		vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1421 		vm_shared_region_unlock();
1422 		reset_shared_region_state = FALSE;
1423 	}
1424 
1425 	vm_shared_region_deallocate(shared_region);
1426 }
1427 
1428 /*
1429  * First part of vm_shared_region_map_file(). Split out to
1430  * avoid kernel stack overflow.
1431  */
1432 __attribute__((noinline))
1433 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1434 vm_shared_region_map_file_setup(
1435 	vm_shared_region_t              shared_region,
1436 	int                             sr_file_mappings_count,
1437 	struct _sr_file_mappings        *sr_file_mappings,
1438 	unsigned int                    *mappings_to_slide_cnt,
1439 	struct shared_file_mapping_slide_np **mappings_to_slide,
1440 	mach_vm_offset_t                *slid_mappings,
1441 	memory_object_control_t         *slid_file_controls,
1442 	mach_vm_offset_t                *sfm_min_address,
1443 	mach_vm_offset_t                *sfm_max_address,
1444 	vm_map_t                        *sr_map_ptr,
1445 	vm_map_offset_t                 *lowest_unnestable_addr_ptr,
1446 	unsigned int                    vmsr_num_slides)
1447 {
1448 	kern_return_t           kr = KERN_SUCCESS;
1449 	memory_object_control_t file_control;
1450 	vm_object_t             file_object;
1451 	ipc_port_t              sr_handle;
1452 	vm_named_entry_t        sr_mem_entry;
1453 	vm_map_t                sr_map;
1454 	mach_vm_offset_t        sr_base_address;
1455 	unsigned int            i = 0;
1456 	mach_port_t             map_port;
1457 	vm_map_offset_t         target_address;
1458 	vm_object_t             object;
1459 	vm_object_size_t        obj_size;
1460 	vm_map_offset_t         lowest_unnestable_addr = 0;
1461 	vm_map_kernel_flags_t   vmk_flags;
1462 	mach_vm_offset_t        sfm_end;
1463 	uint32_t                mappings_count;
1464 	struct shared_file_mapping_slide_np *mappings;
1465 	struct _sr_file_mappings *srfmp;
1466 
1467 	vm_shared_region_lock();
1468 	assert(shared_region->sr_ref_count > 0);
1469 
1470 	/*
1471 	 * Make sure we handle only one mapping at a time in a given
1472 	 * shared region, to avoid race conditions.  This should not
1473 	 * happen frequently...
1474 	 */
1475 	while (shared_region->sr_mapping_in_progress) {
1476 		/* wait for our turn... */
1477 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1478 		    THREAD_UNINT);
1479 	}
1480 	assert(!shared_region->sr_mapping_in_progress);
1481 	assert(shared_region->sr_ref_count > 0);
1482 
1483 
1484 	/* let others know we're working in this shared region */
1485 	shared_region->sr_mapping_in_progress = current_thread();
1486 
1487 	/*
1488 	 * Did someone race in and map this shared region already?
1489 	 */
1490 	if (shared_region->sr_first_mapping != -1) {
1491 		vm_shared_region_unlock();
1492 #if DEVELOPMENT || DEBUG
1493 		printf("shared_region: caught race in map and slide\n");
1494 #endif /* DEVELOPMENT || DEBUG */
1495 		return KERN_FAILURE;
1496 	}
1497 
1498 	vm_shared_region_unlock();
1499 
1500 	/* no need to lock because this data is never modified... */
1501 	sr_handle = shared_region->sr_mem_entry;
1502 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1503 	sr_map = sr_mem_entry->backing.map;
1504 	sr_base_address = shared_region->sr_base_address;
1505 
1506 	SHARED_REGION_TRACE_DEBUG(
1507 		("shared_region: -> map(%p)\n",
1508 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1509 
1510 	mappings_count = 0;
1511 	mappings = NULL;
1512 	srfmp = NULL;
1513 
1514 	/* process all the files to be mapped */
1515 	for (srfmp = &sr_file_mappings[0];
1516 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1517 	    srfmp++) {
1518 		mappings_count = srfmp->mappings_count;
1519 		mappings = srfmp->mappings;
1520 		file_control = srfmp->file_control;
1521 
1522 		if (mappings_count == 0) {
1523 			/* no mappings here... */
1524 			continue;
1525 		}
1526 
1527 		/*
1528 		 * The code below can only correctly "slide" (perform relocations) for one
1529 		 * value of the slide amount. So if a file has a non-zero slide, it has to
1530 		 * match any previous value. A zero slide value is ok for things that are
1531 		 * just directly mapped.
1532 		 */
1533 		if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1534 			shared_region->sr_slide = srfmp->slide;
1535 		} else if (shared_region->sr_slide != 0 &&
1536 		    srfmp->slide != 0 &&
1537 		    shared_region->sr_slide != srfmp->slide) {
1538 			SHARED_REGION_TRACE_ERROR(
1539 				("shared_region: more than 1 non-zero slide value amount "
1540 				"slide 1:0x%x slide 2:0x%x\n ",
1541 				shared_region->sr_slide, srfmp->slide));
1542 			kr = KERN_INVALID_ARGUMENT;
1543 			break;
1544 		}
1545 
1546 		/*
1547 		 * An FD of -1 means we need to copyin the data to an anonymous object.
1548 		 */
1549 		if (srfmp->fd == -1) {
1550 			assert(mappings_count == 1);
1551 			SHARED_REGION_TRACE_INFO(
1552 				("shared_region: mapping[0]: "
1553 				"address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1554 				"maxprot:0x%x prot:0x%x fd==-1\n",
1555 				(long long)mappings[0].sms_address,
1556 				(long long)mappings[0].sms_size,
1557 				(long long)mappings[0].sms_file_offset,
1558 				mappings[0].sms_max_prot,
1559 				mappings[0].sms_init_prot));
1560 
1561 			/*
1562 			 * We need an anon object to hold the data in the shared region.
1563 			 * The size needs to be suitable to map into kernel.
1564 			 */
1565 			obj_size = vm_object_round_page(mappings->sms_size);
1566 			object = vm_object_allocate(obj_size);
1567 			if (object == VM_OBJECT_NULL) {
1568 				printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1569 				kr = KERN_RESOURCE_SHORTAGE;
1570 				break;
1571 			}
1572 
1573 			/*
1574 			 * map the object into the kernel
1575 			 */
1576 			vm_map_offset_t kaddr = 0;
1577 			vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1578 			vmk_flags.vmkf_no_copy_on_read = 1;
1579 			vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1580 
1581 			kr = vm_map_enter(kernel_map,
1582 			    &kaddr,
1583 			    obj_size,
1584 			    0,
1585 			    vmk_flags,
1586 			    object,
1587 			    0,
1588 			    FALSE,
1589 			    (VM_PROT_READ | VM_PROT_WRITE),
1590 			    (VM_PROT_READ | VM_PROT_WRITE),
1591 			    VM_INHERIT_NONE);
1592 			if (kr != KERN_SUCCESS) {
1593 				printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1594 				vm_object_deallocate(object);
1595 				object = VM_OBJECT_NULL;
1596 				break;
1597 			}
1598 
1599 			/*
1600 			 * We'll need another reference to keep the object alive after
1601 			 * we vm_map_remove() it from the kernel.
1602 			 */
1603 			vm_object_reference(object);
1604 
1605 			/*
1606 			 * Zero out the object's pages, so we can't leak data.
1607 			 */
1608 			bzero((void *)kaddr, obj_size);
1609 
1610 			/*
1611 			 * Copyin the data from dyld to the new object.
1612 			 * Then remove the kernel mapping.
1613 			 */
1614 			int copyin_err =
1615 			    copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1616 			vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1617 			if (copyin_err) {
1618 				printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1619 				switch (copyin_err) {
1620 				case EPERM:
1621 				case EACCES:
1622 					kr = KERN_PROTECTION_FAILURE;
1623 					break;
1624 				case EFAULT:
1625 					kr = KERN_INVALID_ADDRESS;
1626 					break;
1627 				default:
1628 					kr = KERN_FAILURE;
1629 					break;
1630 				}
1631 				vm_object_deallocate(object);
1632 				object = VM_OBJECT_NULL;
1633 				break;
1634 			}
1635 
1636 			/*
1637 			 * Finally map the object into the shared region.
1638 			 */
1639 			target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1640 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1641 			vmk_flags.vmkf_already = TRUE;
1642 			vmk_flags.vmkf_no_copy_on_read = 1;
1643 			vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1644 			    mappings[0].sms_max_prot);
1645 
1646 			kr = vm_map_enter(
1647 				sr_map,
1648 				&target_address,
1649 				vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1650 				0,
1651 				vmk_flags,
1652 				object,
1653 				0,
1654 				TRUE,
1655 				mappings[0].sms_init_prot & VM_PROT_ALL,
1656 				mappings[0].sms_max_prot & VM_PROT_ALL,
1657 				VM_INHERIT_DEFAULT);
1658 			if (kr != KERN_SUCCESS) {
1659 				printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1660 				vm_object_deallocate(object);
1661 				break;
1662 			}
1663 
1664 			if (mappings[0].sms_address < *sfm_min_address) {
1665 				*sfm_min_address = mappings[0].sms_address;
1666 			}
1667 
1668 			if (os_add_overflow(mappings[0].sms_address,
1669 			    mappings[0].sms_size,
1670 			    &sfm_end) ||
1671 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1672 			    mappings[0].sms_address)) {
1673 				/* overflow */
1674 				kr = KERN_INVALID_ARGUMENT;
1675 				break;
1676 			}
1677 
1678 			if (sfm_end > *sfm_max_address) {
1679 				*sfm_max_address = sfm_end;
1680 			}
1681 
1682 			continue;
1683 		}
1684 
1685 		/* get the VM object associated with the file to be mapped */
1686 		file_object = memory_object_control_to_vm_object(file_control);
1687 		assert(file_object);
1688 
1689 		if (!file_object->object_is_shared_cache) {
1690 			vm_object_lock(file_object);
1691 			file_object->object_is_shared_cache = true;
1692 			vm_object_unlock(file_object);
1693 		}
1694 
1695 #if CONFIG_SECLUDED_MEMORY
1696 		/*
1697 		 * Camera will need the shared cache, so don't put the pages
1698 		 * on the secluded queue, assume that's the primary region.
1699 		 * Also keep DEXT shared cache pages off secluded.
1700 		 */
1701 		if (primary_system_shared_region == NULL ||
1702 		    primary_system_shared_region == shared_region ||
1703 		    shared_region->sr_driverkit) {
1704 			memory_object_mark_eligible_for_secluded(file_control, FALSE);
1705 		}
1706 #endif /* CONFIG_SECLUDED_MEMORY */
1707 
1708 		/* establish the mappings for that file */
1709 		for (i = 0; i < mappings_count; i++) {
1710 			SHARED_REGION_TRACE_INFO(
1711 				("shared_region: mapping[%d]: "
1712 				"address:0x%016llx size:0x%016llx offset:0x%016llx "
1713 				"maxprot:0x%x prot:0x%x\n",
1714 				i,
1715 				(long long)mappings[i].sms_address,
1716 				(long long)mappings[i].sms_size,
1717 				(long long)mappings[i].sms_file_offset,
1718 				mappings[i].sms_max_prot,
1719 				mappings[i].sms_init_prot));
1720 
1721 			if (mappings[i].sms_address < *sfm_min_address) {
1722 				*sfm_min_address = mappings[i].sms_address;
1723 			}
1724 
1725 			if (os_add_overflow(mappings[i].sms_address,
1726 			    mappings[i].sms_size,
1727 			    &sfm_end) ||
1728 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1729 			    mappings[i].sms_address)) {
1730 				/* overflow */
1731 				kr = KERN_INVALID_ARGUMENT;
1732 				break;
1733 			}
1734 
1735 			if (sfm_end > *sfm_max_address) {
1736 				*sfm_max_address = sfm_end;
1737 			}
1738 
1739 			if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1740 				/* zero-filled memory */
1741 				map_port = MACH_PORT_NULL;
1742 			} else {
1743 				/* file-backed memory */
1744 				__IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1745 			}
1746 
1747 			/*
1748 			 * Remember which mappings need sliding.
1749 			 */
1750 			if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1751 				if (*mappings_to_slide_cnt == vmsr_num_slides) {
1752 					SHARED_REGION_TRACE_INFO(
1753 						("shared_region: mapping[%d]: "
1754 						"address:0x%016llx size:0x%016llx "
1755 						"offset:0x%016llx "
1756 						"maxprot:0x%x prot:0x%x "
1757 						"too many mappings to slide...\n",
1758 						i,
1759 						(long long)mappings[i].sms_address,
1760 						(long long)mappings[i].sms_size,
1761 						(long long)mappings[i].sms_file_offset,
1762 						mappings[i].sms_max_prot,
1763 						mappings[i].sms_init_prot));
1764 				} else {
1765 					mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1766 					*mappings_to_slide_cnt += 1;
1767 				}
1768 			}
1769 
1770 			/* mapping's address is relative to the shared region base */
1771 			target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1772 
1773 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1774 			vmk_flags.vmkf_already = TRUE;
1775 			/* no copy-on-read for mapped binaries */
1776 			vmk_flags.vmkf_no_copy_on_read = 1;
1777 			vmk_flags.vmf_permanent = shared_region_make_permanent(
1778 				shared_region,
1779 				mappings[i].sms_max_prot);
1780 			vmk_flags.vmf_tpro = shared_region_tpro_protect(
1781 				shared_region,
1782 				mappings[i].sms_max_prot);
1783 
1784 			/* establish that mapping, OK if it's "already" there */
1785 			if (map_port == MACH_PORT_NULL) {
1786 				/*
1787 				 * We want to map some anonymous memory in a shared region.
1788 				 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1789 				 */
1790 				obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1791 				object = vm_object_allocate(obj_size);
1792 				if (object == VM_OBJECT_NULL) {
1793 					kr = KERN_RESOURCE_SHORTAGE;
1794 				} else {
1795 					kr = vm_map_enter(
1796 						sr_map,
1797 						&target_address,
1798 						vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1799 						0,
1800 						vmk_flags,
1801 						object,
1802 						0,
1803 						TRUE,
1804 						mappings[i].sms_init_prot & VM_PROT_ALL,
1805 						mappings[i].sms_max_prot & VM_PROT_ALL,
1806 						VM_INHERIT_DEFAULT);
1807 				}
1808 			} else {
1809 				object = VM_OBJECT_NULL; /* no anonymous memory here */
1810 				kr = mach_vm_map_kernel(
1811 					sr_map,
1812 					vm_sanitize_wrap_addr_ref(&target_address),
1813 					vm_map_round_page(
1814 						mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1815 					0,
1816 					vmk_flags,
1817 					map_port,
1818 					mappings[i].sms_file_offset,
1819 					TRUE,
1820 					mappings[i].sms_init_prot & VM_PROT_ALL,
1821 					mappings[i].sms_max_prot & VM_PROT_ALL,
1822 					VM_INHERIT_DEFAULT);
1823 			}
1824 
1825 			if (kr == KERN_SUCCESS) {
1826 				/*
1827 				 * Record the first successful mapping(s) in the shared
1828 				 * region by file. We're protected by "sr_mapping_in_progress"
1829 				 * here, so no need to lock "shared_region".
1830 				 *
1831 				 * Note that if we have an AOT shared cache (ARM) for a
1832 				 * translated task, then it's always the first file.
1833 				 * The original "native" (i.e. x86) shared cache is the
1834 				 * second file.
1835 				 */
1836 
1837 				if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1838 					shared_region->sr_first_mapping = target_address;
1839 				}
1840 
1841 				if (*mappings_to_slide_cnt > 0 &&
1842 				    mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1843 					slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1844 					slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1845 				}
1846 
1847 				/*
1848 				 * Record the lowest writable address in this
1849 				 * sub map, to log any unexpected unnesting below
1850 				 * that address (see log_unnest_badness()).
1851 				 */
1852 				if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1853 				    sr_map->is_nested_map &&
1854 				    (lowest_unnestable_addr == 0 ||
1855 				    (target_address < lowest_unnestable_addr))) {
1856 					lowest_unnestable_addr = target_address;
1857 				}
1858 			} else {
1859 				if (map_port == MACH_PORT_NULL) {
1860 					/*
1861 					 * Get rid of the VM object we just created
1862 					 * but failed to map.
1863 					 */
1864 					vm_object_deallocate(object);
1865 					object = VM_OBJECT_NULL;
1866 				}
1867 				if (kr == KERN_MEMORY_PRESENT) {
1868 					/*
1869 					 * This exact mapping was already there:
1870 					 * that's fine.
1871 					 */
1872 					SHARED_REGION_TRACE_INFO(
1873 						("shared_region: mapping[%d]: "
1874 						"address:0x%016llx size:0x%016llx "
1875 						"offset:0x%016llx "
1876 						"maxprot:0x%x prot:0x%x "
1877 						"already mapped...\n",
1878 						i,
1879 						(long long)mappings[i].sms_address,
1880 						(long long)mappings[i].sms_size,
1881 						(long long)mappings[i].sms_file_offset,
1882 						mappings[i].sms_max_prot,
1883 						mappings[i].sms_init_prot));
1884 					/*
1885 					 * We didn't establish this mapping ourselves;
1886 					 * let's reset its size, so that we do not
1887 					 * attempt to undo it if an error occurs later.
1888 					 */
1889 					mappings[i].sms_size = 0;
1890 					kr = KERN_SUCCESS;
1891 				} else {
1892 					break;
1893 				}
1894 			}
1895 		}
1896 
1897 		if (kr != KERN_SUCCESS) {
1898 			break;
1899 		}
1900 	}
1901 
1902 	if (kr != KERN_SUCCESS) {
1903 		/* the last mapping we tried (mappings[i]) failed ! */
1904 		assert(i < mappings_count);
1905 		SHARED_REGION_TRACE_ERROR(
1906 			("shared_region: mapping[%d]: "
1907 			"address:0x%016llx size:0x%016llx "
1908 			"offset:0x%016llx "
1909 			"maxprot:0x%x prot:0x%x failed 0x%x\n",
1910 			i,
1911 			(long long)mappings[i].sms_address,
1912 			(long long)mappings[i].sms_size,
1913 			(long long)mappings[i].sms_file_offset,
1914 			mappings[i].sms_max_prot,
1915 			mappings[i].sms_init_prot,
1916 			kr));
1917 
1918 		/*
1919 		 * Respect the design of vm_shared_region_undo_mappings
1920 		 * as we are holding the sr_mapping_in_progress here.
1921 		 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1922 		 * will be blocked at waiting sr_mapping_in_progress to be NULL.
1923 		 */
1924 		assert(sr_map != NULL);
1925 		/* undo all the previous mappings */
1926 		vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1927 		return kr;
1928 	}
1929 
1930 	*lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1931 	*sr_map_ptr = sr_map;
1932 	return KERN_SUCCESS;
1933 }
1934 
1935 /* forwared declaration */
1936 __attribute__((noinline))
1937 static void
1938 vm_shared_region_map_file_final(
1939 	vm_shared_region_t shared_region,
1940 	vm_map_t           sr_map,
1941 	mach_vm_offset_t   sfm_min_address,
1942 	mach_vm_offset_t   sfm_max_address);
1943 
1944 /*
1945  * Establish some mappings of a file in the shared region.
1946  * This is used by "dyld" via the shared_region_map_np() system call
1947  * to populate the shared region with the appropriate shared cache.
1948  *
1949  * One could also call it several times to incrementally load several
1950  * libraries, as long as they do not overlap.
1951  * It will return KERN_SUCCESS if the mappings were successfully established
1952  * or if they were already established identically by another process.
1953  */
1954 __attribute__((noinline))
1955 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1956 vm_shared_region_map_file(
1957 	vm_shared_region_t       shared_region,
1958 	int                      sr_file_mappings_count,
1959 	struct _sr_file_mappings *sr_file_mappings)
1960 {
1961 	kern_return_t           kr = KERN_SUCCESS;
1962 	unsigned int            i;
1963 	unsigned int            mappings_to_slide_cnt = 0;
1964 	mach_vm_offset_t        sfm_min_address = (mach_vm_offset_t)-1;
1965 	mach_vm_offset_t        sfm_max_address = 0;
1966 	vm_map_t                sr_map = NULL;
1967 	vm_map_offset_t         lowest_unnestable_addr = 0;
1968 	unsigned int            vmsr_num_slides = 0;
1969 	typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1970 	slid_mappings_t         *slid_mappings = NULL;                  /* [0..vmsr_num_slides] */
1971 	memory_object_control_t *slid_file_controls = NULL;             /* [0..vmsr_num_slides] */
1972 	struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1973 	struct _sr_file_mappings *srfmp;
1974 
1975 	/*
1976 	 * Figure out how many of the mappings have slides.
1977 	 */
1978 	for (srfmp = &sr_file_mappings[0];
1979 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1980 	    srfmp++) {
1981 		for (i = 0; i < srfmp->mappings_count; ++i) {
1982 			if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1983 				++vmsr_num_slides;
1984 			}
1985 		}
1986 	}
1987 
1988 	/* Allocate per slide data structures */
1989 	if (vmsr_num_slides > 0) {
1990 		slid_mappings =
1991 		    kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1992 		slid_file_controls =
1993 		    kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1994 		mappings_to_slide =
1995 		    kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1996 	}
1997 
1998 	kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1999 	    &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
2000 	    &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
2001 	if (kr != KERN_SUCCESS) {
2002 		vm_shared_region_lock();
2003 		goto done;
2004 	}
2005 	assert(vmsr_num_slides == mappings_to_slide_cnt);
2006 
2007 	/*
2008 	 * The call above installed direct mappings to the shared cache file.
2009 	 * Now we go back and overwrite the mappings that need relocation
2010 	 * with a special shared region pager.
2011 	 *
2012 	 * Note that this does copyin() of data, needed by the pager, which
2013 	 * the previous code just established mappings for. This is why we
2014 	 * do it in a separate pass.
2015 	 */
2016 #if __has_feature(ptrauth_calls)
2017 	/*
2018 	 * need to allocate storage needed for any sr_auth_sections
2019 	 */
2020 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2021 		if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2022 		    shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2023 		    !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2024 			++shared_region->sr_num_auth_section;
2025 		}
2026 	}
2027 	if (shared_region->sr_num_auth_section > 0) {
2028 		shared_region->sr_auth_section =
2029 		    kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2030 		    Z_WAITOK | Z_ZERO);
2031 	}
2032 #endif /* __has_feature(ptrauth_calls) */
2033 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2034 		kr = vm_shared_region_slide(shared_region->sr_slide,
2035 		    mappings_to_slide[i]->sms_file_offset,
2036 		    mappings_to_slide[i]->sms_size,
2037 		    mappings_to_slide[i]->sms_slide_start,
2038 		    mappings_to_slide[i]->sms_slide_size,
2039 		    slid_mappings[i],
2040 		    slid_file_controls[i],
2041 		    mappings_to_slide[i]->sms_max_prot);
2042 		if (kr != KERN_SUCCESS) {
2043 			SHARED_REGION_TRACE_ERROR(
2044 				("shared_region: region_slide("
2045 				"slide:0x%x start:0x%016llx "
2046 				"size:0x%016llx) failed 0x%x\n",
2047 				shared_region->sr_slide,
2048 				(long long)mappings_to_slide[i]->sms_slide_start,
2049 				(long long)mappings_to_slide[i]->sms_slide_size,
2050 				kr));
2051 			vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2052 			    &sr_file_mappings[0],
2053 			    &sr_file_mappings[sr_file_mappings_count - 1],
2054 			    sr_file_mappings_count);
2055 			vm_shared_region_lock();
2056 			goto done;
2057 		}
2058 	}
2059 
2060 	assert(kr == KERN_SUCCESS);
2061 
2062 	/* adjust the map's "lowest_unnestable_start" */
2063 	lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2064 	if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2065 		vm_map_lock(sr_map);
2066 		sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2067 		vm_map_unlock(sr_map);
2068 	}
2069 
2070 	vm_shared_region_lock();
2071 	assert(shared_region->sr_ref_count > 0);
2072 	assert(shared_region->sr_mapping_in_progress == current_thread());
2073 
2074 	vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2075 
2076 done:
2077 	/*
2078 	 * We're done working on that shared region.
2079 	 * Wake up any waiting threads.
2080 	 */
2081 	assert(shared_region->sr_mapping_in_progress == current_thread());
2082 	shared_region->sr_mapping_in_progress = THREAD_NULL;
2083 	vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2084 	vm_shared_region_unlock();
2085 
2086 #if __has_feature(ptrauth_calls)
2087 	if (kr == KERN_SUCCESS) {
2088 		/*
2089 		 * Since authenticated mappings were just added to the shared region,
2090 		 * go back and remap them into private mappings for this task.
2091 		 */
2092 		kr = vm_shared_region_auth_remap(shared_region);
2093 	}
2094 #endif /* __has_feature(ptrauth_calls) */
2095 
2096 	/* Cache shared region info needed for telemetry in the task */
2097 	task_t task;
2098 	if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2099 		mach_vm_offset_t start_address;
2100 		(void)vm_shared_region_start_address(shared_region, &start_address, task);
2101 	}
2102 
2103 	SHARED_REGION_TRACE_DEBUG(
2104 		("shared_region: map(%p) <- 0x%x \n",
2105 		(void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2106 	if (vmsr_num_slides > 0) {
2107 		kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2108 		kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2109 		kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2110 		    mappings_to_slide);
2111 	}
2112 	return kr;
2113 }
2114 
2115 /*
2116  * Final part of vm_shared_region_map_file().
2117  * Kept in separate function to avoid blowing out the stack.
2118  */
2119 __attribute__((noinline))
2120 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2121 vm_shared_region_map_file_final(
2122 	vm_shared_region_t        shared_region,
2123 	vm_map_t                  sr_map __unused,
2124 	mach_vm_offset_t          sfm_min_address __unused,
2125 	mach_vm_offset_t          sfm_max_address __unused)
2126 {
2127 	struct _dyld_cache_header sr_cache_header;
2128 	int                       error;
2129 	size_t                    image_array_length;
2130 	struct _dyld_cache_image_text_info *sr_image_layout;
2131 	boolean_t                 locally_built = FALSE;
2132 
2133 
2134 	/*
2135 	 * copy in the shared region UUID to the shared region structure.
2136 	 * we do this indirectly by first copying in the shared cache header
2137 	 * and then copying the UUID from there because we'll need to look
2138 	 * at other content from the shared cache header.
2139 	 */
2140 	if (!shared_region->sr_uuid_copied) {
2141 		error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2142 		    (char *)&sr_cache_header,
2143 		    sizeof(sr_cache_header));
2144 		if (error == 0) {
2145 			memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2146 			shared_region->sr_uuid_copied = TRUE;
2147 			locally_built = sr_cache_header.locallyBuiltCache;
2148 		} else {
2149 #if DEVELOPMENT || DEBUG
2150 			panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2151 			    "offset:0 size:0x%016llx) failed with %d\n",
2152 			    (long long)shared_region->sr_base_address,
2153 			    (long long)shared_region->sr_first_mapping,
2154 			    (long long)sizeof(sr_cache_header),
2155 			    error);
2156 #endif /* DEVELOPMENT || DEBUG */
2157 			shared_region->sr_uuid_copied = FALSE;
2158 		}
2159 	}
2160 
2161 	/*
2162 	 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd.  This is used by
2163 	 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2164 	 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2165 	 * region.  In that case, launchd re-exec's itself, so we may go through this path multiple times.  We
2166 	 * let the most recent one win.
2167 	 *
2168 	 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2169 	 */
2170 	bool is_init_task = (task_pid(current_task()) == 1);
2171 	if (shared_region->sr_uuid_copied && is_init_task) {
2172 		/* Copy in the shared cache layout if we're running with a locally built shared cache */
2173 		if (locally_built) {
2174 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2175 			image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2176 			sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2177 			error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2178 			    sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2179 			if (error == 0) {
2180 				if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2181 					panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2182 				}
2183 				shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2184 				for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2185 					memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2186 					    sizeof(shared_region->sr_images[index].imageUUID));
2187 					shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2188 				}
2189 
2190 				shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2191 			} else {
2192 #if DEVELOPMENT || DEBUG
2193 				panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2194 				    "offset:0x%016llx size:0x%016llx) failed with %d\n",
2195 				    (long long)shared_region->sr_base_address,
2196 				    (long long)shared_region->sr_first_mapping,
2197 				    (long long)sr_cache_header.imagesTextOffset,
2198 				    (long long)image_array_length,
2199 				    error);
2200 #endif /* DEVELOPMENT || DEBUG */
2201 			}
2202 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2203 			kfree_data(sr_image_layout, image_array_length);
2204 			sr_image_layout = NULL;
2205 		}
2206 		primary_system_shared_region = shared_region;
2207 	}
2208 
2209 #ifndef NO_NESTED_PMAP
2210 	/*
2211 	 * If we succeeded, we know the bounds of the shared region.
2212 	 * Trim our pmaps to only cover this range (if applicable to
2213 	 * this platform).
2214 	 */
2215 	if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2216 		pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2217 	}
2218 #endif
2219 }
2220 
2221 /*
2222  * Retrieve a task's shared region and grab an extra reference to
2223  * make sure it doesn't disappear while the caller is using it.
2224  * The caller is responsible for consuming that extra reference if
2225  * necessary.
2226  *
2227  * This also tries to trim the pmap for the shared region.
2228  */
2229 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2230 vm_shared_region_trim_and_get(task_t task)
2231 {
2232 	vm_shared_region_t shared_region;
2233 	ipc_port_t sr_handle;
2234 	vm_named_entry_t sr_mem_entry;
2235 	vm_map_t sr_map;
2236 
2237 	/* Get the shared region and the map. */
2238 	shared_region = vm_shared_region_get(task);
2239 	if (shared_region == NULL) {
2240 		return NULL;
2241 	}
2242 
2243 	sr_handle = shared_region->sr_mem_entry;
2244 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2245 	sr_map = sr_mem_entry->backing.map;
2246 
2247 #ifndef NO_NESTED_PMAP
2248 	/* Trim the pmap if possible. */
2249 	if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2250 		pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2251 	}
2252 #endif
2253 
2254 	return shared_region;
2255 }
2256 
2257 /*
2258  * Enter the appropriate shared region into "map" for "task".
2259  * This involves looking up the shared region (and possibly creating a new
2260  * one) for the desired environment, then mapping the VM sub map into the
2261  * task's VM "map", with the appropriate level of pmap-nesting.
2262  */
2263 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2264 vm_shared_region_enter(
2265 	struct _vm_map          *map,
2266 	struct task             *task,
2267 	boolean_t               is_64bit,
2268 	void                    *fsroot,
2269 	cpu_type_t              cpu,
2270 	cpu_subtype_t           cpu_subtype,
2271 	boolean_t               reslide,
2272 	boolean_t               is_driverkit,
2273 	uint32_t                rsr_version)
2274 {
2275 	kern_return_t           kr;
2276 	vm_shared_region_t      shared_region;
2277 	vm_map_offset_t         sr_address, sr_offset, target_address;
2278 	vm_map_size_t           sr_size, mapping_size;
2279 	vm_map_offset_t         sr_pmap_nesting_start;
2280 	vm_map_size_t           sr_pmap_nesting_size;
2281 	ipc_port_t              sr_handle;
2282 	vm_prot_t               cur_prot, max_prot;
2283 	vm_map_kernel_flags_t   vmk_flags;
2284 
2285 	SHARED_REGION_TRACE_DEBUG(
2286 		("shared_region: -> "
2287 		"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2288 		(void *)VM_KERNEL_ADDRPERM(map),
2289 		(void *)VM_KERNEL_ADDRPERM(task),
2290 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2291 		cpu, cpu_subtype, is_64bit, is_driverkit));
2292 
2293 	/* lookup (create if needed) the shared region for this environment */
2294 	shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2295 	if (shared_region == NULL) {
2296 		/* this should not happen ! */
2297 		SHARED_REGION_TRACE_ERROR(
2298 			("shared_region: -> "
2299 			"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2300 			"lookup failed !\n",
2301 			(void *)VM_KERNEL_ADDRPERM(map),
2302 			(void *)VM_KERNEL_ADDRPERM(task),
2303 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2304 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2305 		//panic("shared_region_enter: lookup failed");
2306 		return KERN_FAILURE;
2307 	}
2308 
2309 	kr = KERN_SUCCESS;
2310 	/* no need to lock since this data is never modified */
2311 	sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2312 	sr_size = (vm_map_size_t)shared_region->sr_size;
2313 	sr_handle = shared_region->sr_mem_entry;
2314 	sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2315 	sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2316 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2317 
2318 	cur_prot = VM_PROT_READ;
2319 	if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2320 		/*
2321 		 * XXX BINARY COMPATIBILITY
2322 		 * java6 apparently needs to modify some code in the
2323 		 * dyld shared cache and needs to be allowed to add
2324 		 * write access...
2325 		 */
2326 		max_prot = VM_PROT_ALL;
2327 	} else {
2328 		max_prot = VM_PROT_READ;
2329 		/* make it "permanent" to protect against re-mappings */
2330 		vmk_flags.vmf_permanent = true;
2331 	}
2332 
2333 	/*
2334 	 * Start mapping the shared region's VM sub map into the task's VM map.
2335 	 */
2336 	sr_offset = 0;
2337 
2338 	if (sr_pmap_nesting_start > sr_address) {
2339 		/* we need to map a range without pmap-nesting first */
2340 		target_address = sr_address;
2341 		mapping_size = sr_pmap_nesting_start - sr_address;
2342 		kr = mach_vm_map_kernel(
2343 			map,
2344 			vm_sanitize_wrap_addr_ref(&target_address),
2345 			mapping_size,
2346 			0,
2347 			vmk_flags,
2348 			sr_handle,
2349 			sr_offset,
2350 			TRUE,
2351 			cur_prot,
2352 			max_prot,
2353 			VM_INHERIT_SHARE);
2354 		if (kr != KERN_SUCCESS) {
2355 			SHARED_REGION_TRACE_ERROR(
2356 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2357 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2358 				(void *)VM_KERNEL_ADDRPERM(map),
2359 				(void *)VM_KERNEL_ADDRPERM(task),
2360 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2361 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2362 				(long long)target_address,
2363 				(long long)mapping_size,
2364 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2365 			goto done;
2366 		}
2367 		SHARED_REGION_TRACE_DEBUG(
2368 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2369 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2370 			(void *)VM_KERNEL_ADDRPERM(map),
2371 			(void *)VM_KERNEL_ADDRPERM(task),
2372 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2373 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2374 			(long long)target_address, (long long)mapping_size,
2375 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2376 		sr_offset += mapping_size;
2377 		sr_size -= mapping_size;
2378 	}
2379 
2380 	/* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2381 	vmk_flags.vmkf_nested_pmap = true;
2382 	vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2383 
2384 	/*
2385 	 * Use pmap-nesting to map the majority of the shared region into the task's
2386 	 * VM space. Very rarely will architectures have a shared region that isn't
2387 	 * the same size as the pmap-nesting region, or start at a different address
2388 	 * than the pmap-nesting region, so this code will map the entirety of the
2389 	 * shared region for most architectures.
2390 	 */
2391 	assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2392 	target_address = sr_pmap_nesting_start;
2393 	kr = mach_vm_map_kernel(
2394 		map,
2395 		vm_sanitize_wrap_addr_ref(&target_address),
2396 		sr_pmap_nesting_size,
2397 		0,
2398 		vmk_flags,
2399 		sr_handle,
2400 		sr_offset,
2401 		TRUE,
2402 		cur_prot,
2403 		max_prot,
2404 		VM_INHERIT_SHARE);
2405 	if (kr != KERN_SUCCESS) {
2406 		SHARED_REGION_TRACE_ERROR(
2407 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2408 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2409 			(void *)VM_KERNEL_ADDRPERM(map),
2410 			(void *)VM_KERNEL_ADDRPERM(task),
2411 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2412 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2413 			(long long)target_address,
2414 			(long long)sr_pmap_nesting_size,
2415 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2416 		goto done;
2417 	}
2418 	SHARED_REGION_TRACE_DEBUG(
2419 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2420 		"nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2421 		(void *)VM_KERNEL_ADDRPERM(map),
2422 		(void *)VM_KERNEL_ADDRPERM(task),
2423 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2424 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2425 		(long long)target_address, (long long)sr_pmap_nesting_size,
2426 		(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2427 
2428 	sr_offset += sr_pmap_nesting_size;
2429 	sr_size -= sr_pmap_nesting_size;
2430 
2431 	if (sr_size > 0) {
2432 		/* and there's some left to be mapped without pmap-nesting */
2433 		vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2434 		target_address = sr_address + sr_offset;
2435 		mapping_size = sr_size;
2436 		kr = mach_vm_map_kernel(
2437 			map,
2438 			vm_sanitize_wrap_addr_ref(&target_address),
2439 			mapping_size,
2440 			0,
2441 			VM_MAP_KERNEL_FLAGS_FIXED(),
2442 			sr_handle,
2443 			sr_offset,
2444 			TRUE,
2445 			cur_prot,
2446 			max_prot,
2447 			VM_INHERIT_SHARE);
2448 		if (kr != KERN_SUCCESS) {
2449 			SHARED_REGION_TRACE_ERROR(
2450 				("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2451 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2452 				(void *)VM_KERNEL_ADDRPERM(map),
2453 				(void *)VM_KERNEL_ADDRPERM(task),
2454 				(void *)VM_KERNEL_ADDRPERM(fsroot),
2455 				cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2456 				(long long)target_address,
2457 				(long long)mapping_size,
2458 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2459 			goto done;
2460 		}
2461 		SHARED_REGION_TRACE_DEBUG(
2462 			("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2463 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2464 			(void *)VM_KERNEL_ADDRPERM(map),
2465 			(void *)VM_KERNEL_ADDRPERM(task),
2466 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2467 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2468 			(long long)target_address, (long long)mapping_size,
2469 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2470 		sr_offset += mapping_size;
2471 		sr_size -= mapping_size;
2472 	}
2473 	assert(sr_size == 0);
2474 
2475 done:
2476 	if (kr == KERN_SUCCESS) {
2477 		/* let the task use that shared region */
2478 		vm_shared_region_set(task, shared_region);
2479 	} else {
2480 		/* drop our reference since we're not using it */
2481 		vm_shared_region_deallocate(shared_region);
2482 		vm_shared_region_set(task, NULL);
2483 	}
2484 
2485 	SHARED_REGION_TRACE_DEBUG(
2486 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2487 		(void *)VM_KERNEL_ADDRPERM(map),
2488 		(void *)VM_KERNEL_ADDRPERM(task),
2489 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2490 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2491 		kr));
2492 	return kr;
2493 }
2494 
2495 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2496 vm_shared_region_remove(
2497 	task_t task,
2498 	vm_shared_region_t sr)
2499 {
2500 	vm_map_t map;
2501 	mach_vm_offset_t start;
2502 	mach_vm_size_t size;
2503 	vm_map_kernel_flags_t vmk_flags;
2504 	kern_return_t kr;
2505 
2506 	if (sr == NULL) {
2507 		return;
2508 	}
2509 	map = get_task_map(task);
2510 	start = sr->sr_base_address;
2511 	size = sr->sr_size;
2512 
2513 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2514 	vmk_flags.vmkf_overwrite_immutable = true;
2515 	vmk_flags.vm_tag = VM_MEMORY_DYLD;
2516 
2517 	/* range_id is set by mach_vm_map_kernel */
2518 	kr = mach_vm_map_kernel(map,
2519 	    vm_sanitize_wrap_addr_ref(&start),
2520 	    size,
2521 	    0,                     /* mask */
2522 	    vmk_flags,
2523 	    MACH_PORT_NULL,
2524 	    0,
2525 	    FALSE,                     /* copy */
2526 	    VM_PROT_NONE,
2527 	    VM_PROT_NONE,
2528 	    VM_INHERIT_DEFAULT);
2529 	if (kr != KERN_SUCCESS) {
2530 		printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2531 	}
2532 }
2533 
2534 #define SANE_SLIDE_INFO_SIZE            (2560*1024) /*Can be changed if needed*/
2535 
2536 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2537 vm_shared_region_sliding_valid(uint32_t slide)
2538 {
2539 	kern_return_t kr = KERN_SUCCESS;
2540 	vm_shared_region_t sr = vm_shared_region_get(current_task());
2541 
2542 	/* No region yet? we're fine. */
2543 	if (sr == NULL) {
2544 		return kr;
2545 	}
2546 
2547 	if (sr->sr_slide != 0 && slide != 0) {
2548 		if (slide == sr->sr_slide) {
2549 			/*
2550 			 * Request for sliding when we've
2551 			 * already done it with exactly the
2552 			 * same slide value before.
2553 			 * This isn't wrong technically but
2554 			 * we don't want to slide again and
2555 			 * so we return this value.
2556 			 */
2557 			kr = KERN_INVALID_ARGUMENT;
2558 		} else {
2559 			printf("Mismatched shared region slide\n");
2560 			kr = KERN_FAILURE;
2561 		}
2562 	}
2563 	vm_shared_region_deallocate(sr);
2564 	return kr;
2565 }
2566 
2567 /*
2568  * Actually create (really overwrite) the mapping to part of the shared cache which
2569  * undergoes relocation.  This routine reads in the relocation info from dyld and
2570  * verifies it. It then creates a (or finds a matching) shared region pager which
2571  * handles the actual modification of the page contents and installs the mapping
2572  * using that pager.
2573  */
2574 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2575 vm_shared_region_slide_mapping(
2576 	vm_shared_region_t      sr,
2577 	user_addr_t             slide_info_addr,
2578 	mach_vm_size_t          slide_info_size,
2579 	mach_vm_offset_t        start,
2580 	mach_vm_size_t          size,
2581 	mach_vm_offset_t        slid_mapping,
2582 	uint32_t                slide,
2583 	memory_object_control_t sr_file_control,
2584 	vm_prot_t               prot)
2585 {
2586 	kern_return_t           kr;
2587 	vm_object_t             object = VM_OBJECT_NULL;
2588 	vm_shared_region_slide_info_t si = NULL;
2589 	vm_map_entry_t          tmp_entry = VM_MAP_ENTRY_NULL;
2590 	struct vm_map_entry     tmp_entry_store;
2591 	memory_object_t         sr_pager = MEMORY_OBJECT_NULL;
2592 	vm_map_t                sr_map;
2593 	vm_map_kernel_flags_t   vmk_flags;
2594 	vm_map_offset_t         map_addr;
2595 	void                    *slide_info_entry = NULL;
2596 	int                     error;
2597 
2598 	assert(sr->sr_slide_in_progress);
2599 
2600 	if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2601 		return KERN_INVALID_ARGUMENT;
2602 	}
2603 
2604 	/*
2605 	 * Copy in and verify the relocation information.
2606 	 */
2607 	if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2608 		printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2609 		return KERN_FAILURE;
2610 	}
2611 	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2612 		printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2613 		return KERN_FAILURE;
2614 	}
2615 
2616 	slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2617 	if (slide_info_entry == NULL) {
2618 		return KERN_RESOURCE_SHORTAGE;
2619 	}
2620 	error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2621 	if (error) {
2622 		printf("copyin of slide_info failed\n");
2623 		kr = KERN_INVALID_ADDRESS;
2624 		goto done;
2625 	}
2626 
2627 	if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2628 		printf("Sanity Check failed for slide_info\n");
2629 		goto done;
2630 	}
2631 
2632 	/*
2633 	 * Allocate and fill in a vm_shared_region_slide_info.
2634 	 * This will either be used by a new pager, or used to find
2635 	 * a pre-existing matching pager.
2636 	 */
2637 	object = memory_object_control_to_vm_object(sr_file_control);
2638 	if (object == VM_OBJECT_NULL || object->internal) {
2639 		object = VM_OBJECT_NULL;
2640 		kr = KERN_INVALID_ADDRESS;
2641 		goto done;
2642 	}
2643 
2644 	si = kalloc_type(struct vm_shared_region_slide_info,
2645 	    Z_WAITOK | Z_NOFAIL);
2646 	vm_object_lock(object);
2647 
2648 	vm_object_reference_locked(object);     /* for si->slide_object */
2649 	object->object_is_shared_cache = TRUE;
2650 	vm_object_unlock(object);
2651 
2652 	si->si_slide_info_entry = slide_info_entry;
2653 	si->si_slide_info_size = slide_info_size;
2654 
2655 	assert(slid_mapping != (mach_vm_offset_t) -1);
2656 	si->si_slid_address = slid_mapping + sr->sr_base_address;
2657 	si->si_slide_object = object;
2658 	si->si_start = start;
2659 	si->si_end = si->si_start + size;
2660 	si->si_slide = slide;
2661 #if __has_feature(ptrauth_calls)
2662 	/*
2663 	 * If there is authenticated pointer data in this slid mapping,
2664 	 * then just add the information needed to create new pagers for
2665 	 * different shared_region_id's later.
2666 	 */
2667 	if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2668 	    sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2669 	    !(prot & VM_PROT_NOAUTH)) {
2670 		if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2671 			printf("Too many auth/private sections for shared region!!\n");
2672 			kr = KERN_INVALID_ARGUMENT;
2673 			goto done;
2674 		}
2675 		si->si_ptrauth = TRUE;
2676 		sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2677 		/*
2678 		 * Remember the shared region, since that's where we'll
2679 		 * stash this info for all auth pagers to share. Each pager
2680 		 * will need to take a reference to it.
2681 		 */
2682 		si->si_shared_region = sr;
2683 		kr = KERN_SUCCESS;
2684 		goto done;
2685 	}
2686 	si->si_shared_region = NULL;
2687 	si->si_ptrauth = FALSE;
2688 #endif /* __has_feature(ptrauth_calls) */
2689 
2690 	/*
2691 	 * find the pre-existing shared region's map entry to slide
2692 	 */
2693 	sr_map = vm_shared_region_vm_map(sr);
2694 	kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2695 	if (kr != KERN_SUCCESS) {
2696 		goto done;
2697 	}
2698 	tmp_entry = &tmp_entry_store;
2699 
2700 	/*
2701 	 * The object must exactly cover the region to slide.
2702 	 */
2703 	assert(VME_OFFSET(tmp_entry) == start);
2704 	assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2705 
2706 	/* create a "shared_region" sliding pager */
2707 	sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2708 	if (sr_pager == MEMORY_OBJECT_NULL) {
2709 		kr = KERN_RESOURCE_SHORTAGE;
2710 		goto done;
2711 	}
2712 
2713 #if CONFIG_SECLUDED_MEMORY
2714 	/*
2715 	 * The shared region pagers used by camera or DEXT should have
2716 	 * pagers that won't go on the secluded queue.
2717 	 */
2718 	if (primary_system_shared_region == NULL ||
2719 	    primary_system_shared_region == sr ||
2720 	    sr->sr_driverkit) {
2721 		memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2722 	}
2723 #endif /* CONFIG_SECLUDED_MEMORY */
2724 
2725 	/* map that pager over the portion of the mapping that needs sliding */
2726 	map_addr = tmp_entry->vme_start;
2727 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2728 	vmk_flags.vmkf_overwrite_immutable = true;
2729 	vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2730 	    tmp_entry->max_protection);
2731 	vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2732 	    prot);
2733 	kr = mach_vm_map_kernel(sr_map,
2734 	    vm_sanitize_wrap_addr_ref(&map_addr),
2735 	    tmp_entry->vme_end - tmp_entry->vme_start,
2736 	    0,
2737 	    vmk_flags,
2738 	    (ipc_port_t)(uintptr_t) sr_pager,
2739 	    0,
2740 	    TRUE,
2741 	    tmp_entry->protection,
2742 	    tmp_entry->max_protection,
2743 	    tmp_entry->inheritance);
2744 	assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2745 	assertf(map_addr == tmp_entry->vme_start,
2746 	    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2747 	    (uint64_t)map_addr,
2748 	    (uint64_t) tmp_entry->vme_start,
2749 	    tmp_entry);
2750 
2751 	/* success! */
2752 	kr = KERN_SUCCESS;
2753 
2754 done:
2755 	if (sr_pager != NULL) {
2756 		/*
2757 		 * Release the sr_pager reference obtained by shared_region_pager_setup().
2758 		 * The mapping, if it succeeded, is now holding a reference on the memory object.
2759 		 */
2760 		memory_object_deallocate(sr_pager);
2761 		sr_pager = MEMORY_OBJECT_NULL;
2762 	}
2763 	if (tmp_entry != NULL) {
2764 		/* release extra ref on tmp_entry's VM object */
2765 		vm_object_deallocate(VME_OBJECT(tmp_entry));
2766 		tmp_entry = VM_MAP_ENTRY_NULL;
2767 	}
2768 
2769 	if (kr != KERN_SUCCESS) {
2770 		/* cleanup */
2771 		if (si != NULL) {
2772 			if (si->si_slide_object) {
2773 				vm_object_deallocate(si->si_slide_object);
2774 				si->si_slide_object = VM_OBJECT_NULL;
2775 			}
2776 			kfree_type(struct vm_shared_region_slide_info, si);
2777 			si = NULL;
2778 		}
2779 		if (slide_info_entry != NULL) {
2780 			kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2781 			slide_info_entry = NULL;
2782 		}
2783 	}
2784 	return kr;
2785 }
2786 
2787 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2788 vm_shared_region_slide_sanity_check_v1(
2789 	vm_shared_region_slide_info_entry_v1_t s_info)
2790 {
2791 	uint32_t pageIndex = 0;
2792 	uint16_t entryIndex = 0;
2793 	uint16_t *toc = NULL;
2794 
2795 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2796 	for (; pageIndex < s_info->toc_count; pageIndex++) {
2797 		entryIndex =  (uint16_t)(toc[pageIndex]);
2798 
2799 		if (entryIndex >= s_info->entry_count) {
2800 			printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2801 			return KERN_FAILURE;
2802 		}
2803 	}
2804 	return KERN_SUCCESS;
2805 }
2806 
2807 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2808 vm_shared_region_slide_sanity_check_v2(
2809 	vm_shared_region_slide_info_entry_v2_t s_info,
2810 	mach_vm_size_t slide_info_size)
2811 {
2812 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2813 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2814 		return KERN_FAILURE;
2815 	}
2816 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2817 		return KERN_FAILURE;
2818 	}
2819 
2820 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2821 
2822 	uint32_t page_starts_count = s_info->page_starts_count;
2823 	uint32_t page_extras_count = s_info->page_extras_count;
2824 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2825 	if (num_trailing_entries < page_starts_count) {
2826 		return KERN_FAILURE;
2827 	}
2828 
2829 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2830 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2831 	if (trailing_size >> 1 != num_trailing_entries) {
2832 		return KERN_FAILURE;
2833 	}
2834 
2835 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2836 	if (required_size < sizeof(*s_info)) {
2837 		return KERN_FAILURE;
2838 	}
2839 
2840 	if (required_size > slide_info_size) {
2841 		return KERN_FAILURE;
2842 	}
2843 
2844 	return KERN_SUCCESS;
2845 }
2846 
2847 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2848 vm_shared_region_slide_sanity_check_v3(
2849 	vm_shared_region_slide_info_entry_v3_t s_info,
2850 	mach_vm_size_t slide_info_size)
2851 {
2852 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2853 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2854 		return KERN_FAILURE;
2855 	}
2856 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2857 		printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2858 		return KERN_FAILURE;
2859 	}
2860 
2861 	uint32_t page_starts_count = s_info->page_starts_count;
2862 	mach_vm_size_t num_trailing_entries = page_starts_count;
2863 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2864 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2865 	if (required_size < sizeof(*s_info)) {
2866 		printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2867 		return KERN_FAILURE;
2868 	}
2869 
2870 	if (required_size > slide_info_size) {
2871 		printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2872 		return KERN_FAILURE;
2873 	}
2874 
2875 	return KERN_SUCCESS;
2876 }
2877 
2878 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2879 vm_shared_region_slide_sanity_check_v4(
2880 	vm_shared_region_slide_info_entry_v4_t s_info,
2881 	mach_vm_size_t slide_info_size)
2882 {
2883 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2884 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2885 		return KERN_FAILURE;
2886 	}
2887 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2888 		return KERN_FAILURE;
2889 	}
2890 
2891 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2892 
2893 	uint32_t page_starts_count = s_info->page_starts_count;
2894 	uint32_t page_extras_count = s_info->page_extras_count;
2895 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2896 	if (num_trailing_entries < page_starts_count) {
2897 		return KERN_FAILURE;
2898 	}
2899 
2900 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2901 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2902 	if (trailing_size >> 1 != num_trailing_entries) {
2903 		return KERN_FAILURE;
2904 	}
2905 
2906 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2907 	if (required_size < sizeof(*s_info)) {
2908 		return KERN_FAILURE;
2909 	}
2910 
2911 	if (required_size > slide_info_size) {
2912 		return KERN_FAILURE;
2913 	}
2914 
2915 	return KERN_SUCCESS;
2916 }
2917 
2918 static kern_return_t
vm_shared_region_slide_sanity_check_v5(vm_shared_region_slide_info_entry_v5_t s_info,mach_vm_size_t slide_info_size)2919 vm_shared_region_slide_sanity_check_v5(
2920 	vm_shared_region_slide_info_entry_v5_t s_info,
2921 	mach_vm_size_t slide_info_size)
2922 {
2923 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v5)) {
2924 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2925 		return KERN_FAILURE;
2926 	}
2927 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE_16KB) {
2928 		printf("vm_shared_region_slide_sanity_check_v5: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE_16KB);
2929 		return KERN_FAILURE;
2930 	}
2931 
2932 	uint32_t page_starts_count = s_info->page_starts_count;
2933 	mach_vm_size_t num_trailing_entries = page_starts_count;
2934 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2935 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2936 	if (required_size < sizeof(*s_info)) {
2937 		printf("vm_shared_region_slide_sanity_check_v5: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2938 		return KERN_FAILURE;
2939 	}
2940 
2941 	if (required_size > slide_info_size) {
2942 		printf("vm_shared_region_slide_sanity_check_v5: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2943 		return KERN_FAILURE;
2944 	}
2945 
2946 	return KERN_SUCCESS;
2947 }
2948 
2949 
2950 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2951 vm_shared_region_slide_sanity_check(
2952 	vm_shared_region_slide_info_entry_t s_info,
2953 	mach_vm_size_t s_info_size)
2954 {
2955 	kern_return_t kr;
2956 
2957 	switch (s_info->version) {
2958 	case 1:
2959 		kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2960 		break;
2961 	case 2:
2962 		kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2963 		break;
2964 	case 3:
2965 		kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2966 		break;
2967 	case 4:
2968 		kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2969 		break;
2970 	case 5:
2971 		kr = vm_shared_region_slide_sanity_check_v5(&s_info->v5, s_info_size);
2972 		break;
2973 	default:
2974 		kr = KERN_FAILURE;
2975 	}
2976 	return kr;
2977 }
2978 
2979 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2980 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2981 {
2982 	uint16_t *toc = NULL;
2983 	slide_info_entry_toc_t bitmap = NULL;
2984 	uint32_t i = 0, j = 0;
2985 	uint8_t b = 0;
2986 	uint32_t slide = si->si_slide;
2987 	int is_64 = task_has_64Bit_addr(current_task());
2988 
2989 	vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2990 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2991 
2992 	if (pageIndex >= s_info->toc_count) {
2993 		printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2994 	} else {
2995 		uint16_t entryIndex =  (uint16_t)(toc[pageIndex]);
2996 		slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2997 
2998 		if (entryIndex >= s_info->entry_count) {
2999 			printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
3000 		} else {
3001 			bitmap = &slide_info_entries[entryIndex];
3002 
3003 			for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
3004 				b = bitmap->entry[i];
3005 				if (b != 0) {
3006 					for (j = 0; j < 8; ++j) {
3007 						if (b & (1 << j)) {
3008 							uint32_t *ptr_to_slide;
3009 							uint32_t old_value;
3010 
3011 							ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
3012 							old_value = *ptr_to_slide;
3013 							*ptr_to_slide += slide;
3014 							if (is_64 && *ptr_to_slide < old_value) {
3015 								/*
3016 								 * We just slid the low 32 bits of a 64-bit pointer
3017 								 * and it looks like there should have been a carry-over
3018 								 * to the upper 32 bits.
3019 								 * The sliding failed...
3020 								 */
3021 								printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
3022 								    i, j, b, slide, old_value, *ptr_to_slide);
3023 								return KERN_FAILURE;
3024 							}
3025 						}
3026 					}
3027 				}
3028 			}
3029 		}
3030 	}
3031 
3032 	return KERN_SUCCESS;
3033 }
3034 
3035 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3036 rebase_chain_32(
3037 	uint8_t *page_content,
3038 	uint16_t start_offset,
3039 	uint32_t slide_amount,
3040 	vm_shared_region_slide_info_entry_v2_t s_info)
3041 {
3042 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3043 
3044 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3045 	const uint32_t value_mask = ~delta_mask;
3046 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3047 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3048 
3049 	uint32_t page_offset = start_offset;
3050 	uint32_t delta = 1;
3051 
3052 	while (delta != 0 && page_offset <= last_page_offset) {
3053 		uint8_t *loc;
3054 		uint32_t value;
3055 
3056 		loc = page_content + page_offset;
3057 		memcpy(&value, loc, sizeof(value));
3058 		delta = (value & delta_mask) >> delta_shift;
3059 		value &= value_mask;
3060 
3061 		if (value != 0) {
3062 			value += value_add;
3063 			value += slide_amount;
3064 		}
3065 		memcpy(loc, &value, sizeof(value));
3066 		page_offset += delta;
3067 	}
3068 
3069 	/* If the offset went past the end of the page, then the slide data is invalid. */
3070 	if (page_offset > last_page_offset) {
3071 		return KERN_FAILURE;
3072 	}
3073 	return KERN_SUCCESS;
3074 }
3075 
3076 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3077 rebase_chain_64(
3078 	uint8_t *page_content,
3079 	uint16_t start_offset,
3080 	uint32_t slide_amount,
3081 	vm_shared_region_slide_info_entry_v2_t s_info)
3082 {
3083 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3084 
3085 	const uint64_t delta_mask = s_info->delta_mask;
3086 	const uint64_t value_mask = ~delta_mask;
3087 	const uint64_t value_add = s_info->value_add;
3088 	const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3089 
3090 	uint32_t page_offset = start_offset;
3091 	uint32_t delta = 1;
3092 
3093 	while (delta != 0 && page_offset <= last_page_offset) {
3094 		uint8_t *loc;
3095 		uint64_t value;
3096 
3097 		loc = page_content + page_offset;
3098 		memcpy(&value, loc, sizeof(value));
3099 		delta = (uint32_t)((value & delta_mask) >> delta_shift);
3100 		value &= value_mask;
3101 
3102 		if (value != 0) {
3103 			value += value_add;
3104 			value += slide_amount;
3105 		}
3106 		memcpy(loc, &value, sizeof(value));
3107 		page_offset += delta;
3108 	}
3109 
3110 	if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3111 		/* If a pointer straddling the page boundary needs to be adjusted, then
3112 		 * add the slide to the lower half. The encoding guarantees that the upper
3113 		 * half on the next page will need no masking.
3114 		 *
3115 		 * This assumes a little-endian machine and that the region being slid
3116 		 * never crosses a 4 GB boundary. */
3117 
3118 		uint8_t *loc = page_content + page_offset;
3119 		uint32_t value;
3120 
3121 		memcpy(&value, loc, sizeof(value));
3122 		value += slide_amount;
3123 		memcpy(loc, &value, sizeof(value));
3124 	} else if (page_offset > last_page_offset) {
3125 		return KERN_FAILURE;
3126 	}
3127 
3128 	return KERN_SUCCESS;
3129 }
3130 
3131 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3132 rebase_chain(
3133 	boolean_t is_64,
3134 	uint32_t pageIndex,
3135 	uint8_t *page_content,
3136 	uint16_t start_offset,
3137 	uint32_t slide_amount,
3138 	vm_shared_region_slide_info_entry_v2_t s_info)
3139 {
3140 	kern_return_t kr;
3141 	if (is_64) {
3142 		kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3143 	} else {
3144 		kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3145 	}
3146 
3147 	if (kr != KERN_SUCCESS) {
3148 		printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3149 		    pageIndex, start_offset, slide_amount);
3150 	}
3151 	return kr;
3152 }
3153 
3154 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3155 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3156 {
3157 	vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3158 	const uint32_t slide_amount = si->si_slide;
3159 
3160 	/* The high bits of the delta_mask field are nonzero precisely when the shared
3161 	 * cache is 64-bit. */
3162 	const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3163 
3164 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3165 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3166 
3167 	uint8_t *page_content = (uint8_t *)vaddr;
3168 	uint16_t page_entry;
3169 
3170 	if (pageIndex >= s_info->page_starts_count) {
3171 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3172 		    pageIndex, s_info->page_starts_count);
3173 		return KERN_FAILURE;
3174 	}
3175 	page_entry = page_starts[pageIndex];
3176 
3177 	if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3178 		return KERN_SUCCESS;
3179 	}
3180 
3181 	if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3182 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3183 		uint16_t info;
3184 
3185 		do {
3186 			uint16_t page_start_offset;
3187 			kern_return_t kr;
3188 
3189 			if (chain_index >= s_info->page_extras_count) {
3190 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3191 				    chain_index, s_info->page_extras_count);
3192 				return KERN_FAILURE;
3193 			}
3194 			info = page_extras[chain_index];
3195 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3196 
3197 			kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3198 			if (kr != KERN_SUCCESS) {
3199 				return KERN_FAILURE;
3200 			}
3201 
3202 			chain_index++;
3203 		} while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3204 	} else {
3205 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3206 		kern_return_t kr;
3207 
3208 		kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3209 		if (kr != KERN_SUCCESS) {
3210 			return KERN_FAILURE;
3211 		}
3212 	}
3213 
3214 	return KERN_SUCCESS;
3215 }
3216 
3217 
3218 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3219 vm_shared_region_slide_page_v3(
3220 	vm_shared_region_slide_info_t si,
3221 	vm_offset_t vaddr,
3222 	__unused mach_vm_offset_t uservaddr,
3223 	uint32_t pageIndex,
3224 #if !__has_feature(ptrauth_calls)
3225 	__unused
3226 #endif /* !__has_feature(ptrauth_calls) */
3227 	uint64_t jop_key)
3228 {
3229 	vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3230 	const uint32_t slide_amount = si->si_slide;
3231 
3232 	uint8_t *page_content = (uint8_t *)vaddr;
3233 	uint16_t page_entry;
3234 
3235 	if (pageIndex >= s_info->page_starts_count) {
3236 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3237 		    pageIndex, s_info->page_starts_count);
3238 		return KERN_FAILURE;
3239 	}
3240 	page_entry = s_info->page_starts[pageIndex];
3241 
3242 	if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3243 		return KERN_SUCCESS;
3244 	}
3245 
3246 	uint8_t* rebaseLocation = page_content;
3247 	uint64_t delta = page_entry;
3248 	do {
3249 		rebaseLocation += delta;
3250 		uint64_t value;
3251 		memcpy(&value, rebaseLocation, sizeof(value));
3252 		delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3253 
3254 		// A pointer is one of :
3255 		// {
3256 		//	 uint64_t pointerValue : 51;
3257 		//	 uint64_t offsetToNextPointer : 11;
3258 		//	 uint64_t isBind : 1 = 0;
3259 		//	 uint64_t authenticated : 1 = 0;
3260 		// }
3261 		// {
3262 		//	 uint32_t offsetFromSharedCacheBase;
3263 		//	 uint16_t diversityData;
3264 		//	 uint16_t hasAddressDiversity : 1;
3265 		//	 uint16_t hasDKey : 1;
3266 		//	 uint16_t hasBKey : 1;
3267 		//	 uint16_t offsetToNextPointer : 11;
3268 		//	 uint16_t isBind : 1;
3269 		//	 uint16_t authenticated : 1 = 1;
3270 		// }
3271 
3272 		bool isBind = (value & (1ULL << 62)) != 0;
3273 		if (isBind) {
3274 			return KERN_FAILURE;
3275 		}
3276 
3277 #if __has_feature(ptrauth_calls)
3278 		uint16_t diversity_data = (uint16_t)(value >> 32);
3279 		bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3280 		ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3281 #endif /* __has_feature(ptrauth_calls) */
3282 		bool isAuthenticated = (value & (1ULL << 63)) != 0;
3283 
3284 		if (isAuthenticated) {
3285 			// The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3286 			value = (value & 0xFFFFFFFF) + slide_amount;
3287 			// Add in the offset from the mach_header
3288 			const uint64_t value_add = s_info->value_add;
3289 			value += value_add;
3290 
3291 #if __has_feature(ptrauth_calls)
3292 			uint64_t discriminator = diversity_data;
3293 			if (hasAddressDiversity) {
3294 				// First calculate a new discriminator using the address of where we are trying to store the value
3295 				uintptr_t pageOffset = rebaseLocation - page_content;
3296 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3297 			}
3298 
3299 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3300 				/*
3301 				 * these pointers are used in user mode. disable the kernel key diversification
3302 				 * so we can sign them for use in user mode.
3303 				 */
3304 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3305 			}
3306 #endif /* __has_feature(ptrauth_calls) */
3307 		} else {
3308 			// The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3309 			// Regular pointer which needs to fit in 51-bits of value.
3310 			// C++ RTTI uses the top bit, so we'll allow the whole top-byte
3311 			// and the bottom 43-bits to be fit in to 51-bits.
3312 			uint64_t top8Bits = value & 0x0007F80000000000ULL;
3313 			uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3314 			uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3315 			value = targetValue + slide_amount;
3316 		}
3317 
3318 		memcpy(rebaseLocation, &value, sizeof(value));
3319 	} while (delta != 0);
3320 
3321 	return KERN_SUCCESS;
3322 }
3323 
3324 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3325 rebase_chainv4(
3326 	uint8_t *page_content,
3327 	uint16_t start_offset,
3328 	uint32_t slide_amount,
3329 	vm_shared_region_slide_info_entry_v4_t s_info)
3330 {
3331 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3332 
3333 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3334 	const uint32_t value_mask = ~delta_mask;
3335 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3336 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3337 
3338 	uint32_t page_offset = start_offset;
3339 	uint32_t delta = 1;
3340 
3341 	while (delta != 0 && page_offset <= last_page_offset) {
3342 		uint8_t *loc;
3343 		uint32_t value;
3344 
3345 		loc = page_content + page_offset;
3346 		memcpy(&value, loc, sizeof(value));
3347 		delta = (value & delta_mask) >> delta_shift;
3348 		value &= value_mask;
3349 
3350 		if ((value & 0xFFFF8000) == 0) {
3351 			// small positive non-pointer, use as-is
3352 		} else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3353 			// small negative non-pointer
3354 			value |= 0xC0000000;
3355 		} else {
3356 			// pointer that needs rebasing
3357 			value += value_add;
3358 			value += slide_amount;
3359 		}
3360 		memcpy(loc, &value, sizeof(value));
3361 		page_offset += delta;
3362 	}
3363 
3364 	/* If the offset went past the end of the page, then the slide data is invalid. */
3365 	if (page_offset > last_page_offset) {
3366 		return KERN_FAILURE;
3367 	}
3368 	return KERN_SUCCESS;
3369 }
3370 
3371 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3372 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3373 {
3374 	vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3375 	const uint32_t slide_amount = si->si_slide;
3376 
3377 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3378 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3379 
3380 	uint8_t *page_content = (uint8_t *)vaddr;
3381 	uint16_t page_entry;
3382 
3383 	if (pageIndex >= s_info->page_starts_count) {
3384 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3385 		    pageIndex, s_info->page_starts_count);
3386 		return KERN_FAILURE;
3387 	}
3388 	page_entry = page_starts[pageIndex];
3389 
3390 	if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3391 		return KERN_SUCCESS;
3392 	}
3393 
3394 	if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3395 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3396 		uint16_t info;
3397 
3398 		do {
3399 			uint16_t page_start_offset;
3400 			kern_return_t kr;
3401 
3402 			if (chain_index >= s_info->page_extras_count) {
3403 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3404 				    chain_index, s_info->page_extras_count);
3405 				return KERN_FAILURE;
3406 			}
3407 			info = page_extras[chain_index];
3408 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3409 
3410 			kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3411 			if (kr != KERN_SUCCESS) {
3412 				return KERN_FAILURE;
3413 			}
3414 
3415 			chain_index++;
3416 		} while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3417 	} else {
3418 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3419 		kern_return_t kr;
3420 
3421 		kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3422 		if (kr != KERN_SUCCESS) {
3423 			return KERN_FAILURE;
3424 		}
3425 	}
3426 
3427 	return KERN_SUCCESS;
3428 }
3429 
3430 
3431 static kern_return_t
vm_shared_region_slide_page_v5(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3432 vm_shared_region_slide_page_v5(
3433 	vm_shared_region_slide_info_t si,
3434 	vm_offset_t vaddr,
3435 	__unused mach_vm_offset_t uservaddr,
3436 	uint32_t pageIndex,
3437 #if !__has_feature(ptrauth_calls)
3438 	__unused
3439 #endif /* !__has_feature(ptrauth_calls) */
3440 	uint64_t jop_key)
3441 {
3442 	vm_shared_region_slide_info_entry_v5_t s_info = &si->si_slide_info_entry->v5;
3443 	const uint32_t slide_amount = si->si_slide;
3444 	const uint64_t value_add = s_info->value_add;
3445 
3446 	uint8_t *page_content = (uint8_t *)vaddr;
3447 	uint16_t page_entry;
3448 
3449 	if (pageIndex >= s_info->page_starts_count) {
3450 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3451 		    pageIndex, s_info->page_starts_count);
3452 		return KERN_FAILURE;
3453 	}
3454 	page_entry = s_info->page_starts[pageIndex];
3455 
3456 	if (page_entry == DYLD_CACHE_SLIDE_V5_PAGE_ATTR_NO_REBASE) {
3457 		return KERN_SUCCESS;
3458 	}
3459 
3460 	uint8_t* rebaseLocation = page_content;
3461 	uint64_t delta = page_entry;
3462 	do {
3463 		rebaseLocation += delta;
3464 		uint64_t value;
3465 		memcpy(&value, rebaseLocation, sizeof(value));
3466 		delta = ((value & 0x7FF0000000000000ULL) >> 52) * sizeof(uint64_t);
3467 
3468 		// A pointer is one of :
3469 		// {
3470 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3471 		//               high8           :  8,
3472 		//               unused          : 10,
3473 		//               next            : 11,   // 8-byte stide
3474 		//               auth            :  1;   // == 0
3475 		// }
3476 		// {
3477 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3478 		//               diversity       : 16,
3479 		//               addrDiv         :  1,
3480 		//               keyIsData       :  1,   // implicitly always the 'A' key.  0 -> IA.  1 -> DA
3481 		//               next            : 11,   // 8-byte stide
3482 		//               auth            :  1;   // == 1
3483 		// }
3484 
3485 #if __has_feature(ptrauth_calls)
3486 		bool        addrDiv = ((value & (1ULL << 50)) != 0);
3487 		bool        keyIsData = ((value & (1ULL << 51)) != 0);
3488 		// the key is always A, and the bit tells us if its IA or ID
3489 		ptrauth_key key = keyIsData ? ptrauth_key_asda : ptrauth_key_asia;
3490 		uint16_t    diversity = (uint16_t)((value >> 34) & 0xFFFF);
3491 #endif /* __has_feature(ptrauth_calls) */
3492 		uint64_t    high8 = (value << 22) & 0xFF00000000000000ULL;
3493 		bool        isAuthenticated = (value & (1ULL << 63)) != 0;
3494 
3495 		// The new value for a rebase is the low 34-bits of the threaded value plus the base plus slide.
3496 		value = (value & 0x3FFFFFFFFULL) + value_add + slide_amount;
3497 		if (isAuthenticated) {
3498 #if __has_feature(ptrauth_calls)
3499 			uint64_t discriminator = diversity;
3500 			if (addrDiv) {
3501 				// First calculate a new discriminator using the address of where we are trying to store the value
3502 				uintptr_t pageOffset = rebaseLocation - page_content;
3503 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3504 			}
3505 
3506 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3507 				/*
3508 				 * these pointers are used in user mode. disable the kernel key diversification
3509 				 * so we can sign them for use in user mode.
3510 				 */
3511 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3512 			}
3513 #endif /* __has_feature(ptrauth_calls) */
3514 		} else {
3515 			// the value already has the correct low bits, so just add in the high8 if it exists
3516 			value += high8;
3517 		}
3518 
3519 		memcpy(rebaseLocation, &value, sizeof(value));
3520 	} while (delta != 0);
3521 
3522 	return KERN_SUCCESS;
3523 }
3524 
3525 
3526 
3527 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3528 vm_shared_region_slide_page(
3529 	vm_shared_region_slide_info_t si,
3530 	vm_offset_t vaddr,
3531 	mach_vm_offset_t uservaddr,
3532 	uint32_t pageIndex,
3533 	uint64_t jop_key)
3534 {
3535 	switch (si->si_slide_info_entry->version) {
3536 	case 1:
3537 		return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3538 	case 2:
3539 		return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3540 	case 3:
3541 		return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3542 	case 4:
3543 		return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3544 	case 5:
3545 		return vm_shared_region_slide_page_v5(si, vaddr, uservaddr, pageIndex, jop_key);
3546 	default:
3547 		return KERN_FAILURE;
3548 	}
3549 }
3550 
3551 /******************************************************************************/
3552 /* Comm page support                                                          */
3553 /******************************************************************************/
3554 
3555 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3556 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3557 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3558 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3559 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3560 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3561 
3562 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3563 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3564 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3565 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3566 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3567 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3568 
3569 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3570 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3571 
3572 #if defined(__i386__) || defined(__x86_64__)
3573 /*
3574  * Create a memory entry, VM submap and pmap for one commpage.
3575  */
3576 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3577 _vm_commpage_init(
3578 	ipc_port_t      *handlep,
3579 	vm_map_size_t   size)
3580 {
3581 	vm_named_entry_t        mem_entry;
3582 	vm_map_t                new_map;
3583 
3584 	SHARED_REGION_TRACE_DEBUG(
3585 		("commpage: -> _init(0x%llx)\n",
3586 		(long long)size));
3587 
3588 	pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3589 	if (new_pmap == NULL) {
3590 		panic("_vm_commpage_init: could not allocate pmap");
3591 	}
3592 	new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3593 
3594 	mem_entry = mach_memory_entry_allocate(handlep);
3595 	mem_entry->backing.map = new_map;
3596 	mem_entry->internal = TRUE;
3597 	mem_entry->is_sub_map = TRUE;
3598 	mem_entry->offset = 0;
3599 	mem_entry->protection = VM_PROT_ALL;
3600 	mem_entry->size = size;
3601 
3602 	SHARED_REGION_TRACE_DEBUG(
3603 		("commpage: _init(0x%llx) <- %p\n",
3604 		(long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3605 }
3606 #endif
3607 
3608 
3609 /*
3610  * Initialize the comm text pages at boot time
3611  */
3612 void
vm_commpage_text_init(void)3613 vm_commpage_text_init(void)
3614 {
3615 	SHARED_REGION_TRACE_DEBUG(
3616 		("commpage text: ->init()\n"));
3617 #if defined(__i386__) || defined(__x86_64__)
3618 	/* create the 32 bit comm text page */
3619 	unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3620 	_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3621 	commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3622 	commpage_text32_map = commpage_text32_entry->backing.map;
3623 	commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3624 	/* XXX if (cpu_is_64bit_capable()) ? */
3625 	/* create the 64-bit comm page */
3626 	offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3627 	_vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3628 	commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3629 	commpage_text64_map = commpage_text64_entry->backing.map;
3630 	commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3631 #endif
3632 
3633 	commpage_text_populate();
3634 
3635 	/* populate the routines in here */
3636 	SHARED_REGION_TRACE_DEBUG(
3637 		("commpage text: init() <-\n"));
3638 }
3639 
3640 /*
3641  * Initialize the comm pages at boot time.
3642  */
3643 void
vm_commpage_init(void)3644 vm_commpage_init(void)
3645 {
3646 	SHARED_REGION_TRACE_DEBUG(
3647 		("commpage: -> init()\n"));
3648 
3649 #if defined(__i386__) || defined(__x86_64__)
3650 	/* create the 32-bit comm page */
3651 	_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3652 	commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3653 	commpage32_map = commpage32_entry->backing.map;
3654 
3655 	/* XXX if (cpu_is_64bit_capable()) ? */
3656 	/* create the 64-bit comm page */
3657 	_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3658 	commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3659 	commpage64_map = commpage64_entry->backing.map;
3660 
3661 #endif /* __i386__ || __x86_64__ */
3662 
3663 	/* populate them according to this specific platform */
3664 	commpage_populate();
3665 	__commpage_setup = 1;
3666 #if XNU_TARGET_OS_OSX
3667 	if (__system_power_source == 0) {
3668 		post_sys_powersource_internal(0, 1);
3669 	}
3670 #endif /* XNU_TARGET_OS_OSX */
3671 
3672 	SHARED_REGION_TRACE_DEBUG(
3673 		("commpage: init() <-\n"));
3674 }
3675 
3676 /*
3677  * Enter the appropriate comm page into the task's address space.
3678  * This is called at exec() time via vm_map_exec().
3679  */
3680 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3681 vm_commpage_enter(
3682 	vm_map_t        map,
3683 	task_t          task,
3684 	boolean_t       is64bit)
3685 {
3686 #if   defined(__arm64__)
3687 #pragma unused(is64bit)
3688 	(void)task;
3689 	(void)map;
3690 	pmap_insert_commpage(vm_map_pmap(map));
3691 	return KERN_SUCCESS;
3692 #else
3693 	ipc_port_t              commpage_handle, commpage_text_handle;
3694 	vm_map_offset_t         commpage_address, objc_address, commpage_text_address;
3695 	vm_map_size_t           commpage_size, objc_size, commpage_text_size;
3696 	vm_map_kernel_flags_t   vmk_flags;
3697 	kern_return_t           kr;
3698 
3699 	SHARED_REGION_TRACE_DEBUG(
3700 		("commpage: -> enter(%p,%p)\n",
3701 		(void *)VM_KERNEL_ADDRPERM(map),
3702 		(void *)VM_KERNEL_ADDRPERM(task)));
3703 
3704 	commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3705 	/* the comm page is likely to be beyond the actual end of the VM map */
3706 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3707 	vmk_flags.vmkf_beyond_max = TRUE;
3708 
3709 	/* select the appropriate comm page for this task */
3710 	assert(!(is64bit ^ vm_map_is_64bit(map)));
3711 	if (is64bit) {
3712 		commpage_handle = commpage64_handle;
3713 		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3714 		commpage_size = _COMM_PAGE64_AREA_LENGTH;
3715 		objc_size = _COMM_PAGE64_OBJC_SIZE;
3716 		objc_address = _COMM_PAGE64_OBJC_BASE;
3717 		commpage_text_handle = commpage_text64_handle;
3718 		commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3719 	} else {
3720 		commpage_handle = commpage32_handle;
3721 		commpage_address =
3722 		    (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3723 		commpage_size = _COMM_PAGE32_AREA_LENGTH;
3724 		objc_size = _COMM_PAGE32_OBJC_SIZE;
3725 		objc_address = _COMM_PAGE32_OBJC_BASE;
3726 		commpage_text_handle = commpage_text32_handle;
3727 		commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3728 	}
3729 
3730 	if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3731 	    (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3732 		/* the commpage is properly aligned or sized for pmap-nesting */
3733 		vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3734 		vmk_flags.vmkf_nested_pmap = TRUE;
3735 	}
3736 
3737 	/* map the comm page in the task's address space */
3738 	assert(commpage_handle != IPC_PORT_NULL);
3739 	kr = mach_vm_map_kernel(
3740 		map,
3741 		vm_sanitize_wrap_addr_ref(&commpage_address),
3742 		commpage_size,
3743 		0,
3744 		vmk_flags,
3745 		commpage_handle,
3746 		0,
3747 		FALSE,
3748 		VM_PROT_READ,
3749 		VM_PROT_READ,
3750 		VM_INHERIT_SHARE);
3751 	if (kr != KERN_SUCCESS) {
3752 		SHARED_REGION_TRACE_ERROR(
3753 			("commpage: enter(%p,0x%llx,0x%llx) "
3754 			"commpage %p mapping failed 0x%x\n",
3755 			(void *)VM_KERNEL_ADDRPERM(map),
3756 			(long long)commpage_address,
3757 			(long long)commpage_size,
3758 			(void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3759 	}
3760 
3761 	/* map the comm text page in the task's address space */
3762 	assert(commpage_text_handle != IPC_PORT_NULL);
3763 	kr = mach_vm_map_kernel(
3764 		map,
3765 		vm_sanitize_wrap_addr_ref(&commpage_text_address),
3766 		commpage_text_size,
3767 		0,
3768 		vmk_flags,
3769 		commpage_text_handle,
3770 		0,
3771 		FALSE,
3772 		VM_PROT_READ | VM_PROT_EXECUTE,
3773 		VM_PROT_READ | VM_PROT_EXECUTE,
3774 		VM_INHERIT_SHARE);
3775 	if (kr != KERN_SUCCESS) {
3776 		SHARED_REGION_TRACE_ERROR(
3777 			("commpage text: enter(%p,0x%llx,0x%llx) "
3778 			"commpage text %p mapping failed 0x%x\n",
3779 			(void *)VM_KERNEL_ADDRPERM(map),
3780 			(long long)commpage_text_address,
3781 			(long long)commpage_text_size,
3782 			(void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3783 	}
3784 
3785 	/*
3786 	 * Since we're here, we also pre-allocate some virtual space for the
3787 	 * Objective-C run-time, if needed...
3788 	 */
3789 	if (objc_size != 0) {
3790 		kr = mach_vm_map_kernel(
3791 			map,
3792 			vm_sanitize_wrap_addr_ref(&objc_address),
3793 			objc_size,
3794 			0,
3795 			vmk_flags,
3796 			IPC_PORT_NULL,
3797 			0,
3798 			FALSE,
3799 			VM_PROT_ALL,
3800 			VM_PROT_ALL,
3801 			VM_INHERIT_DEFAULT);
3802 		if (kr != KERN_SUCCESS) {
3803 			SHARED_REGION_TRACE_ERROR(
3804 				("commpage: enter(%p,0x%llx,0x%llx) "
3805 				"objc mapping failed 0x%x\n",
3806 				(void *)VM_KERNEL_ADDRPERM(map),
3807 				(long long)objc_address,
3808 				(long long)objc_size, kr));
3809 		}
3810 	}
3811 
3812 	SHARED_REGION_TRACE_DEBUG(
3813 		("commpage: enter(%p,%p) <- 0x%x\n",
3814 		(void *)VM_KERNEL_ADDRPERM(map),
3815 		(void *)VM_KERNEL_ADDRPERM(task), kr));
3816 	return kr;
3817 #endif
3818 }
3819 
3820 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3821 vm_shared_region_slide(
3822 	uint32_t slide,
3823 	mach_vm_offset_t        entry_start_address,
3824 	mach_vm_size_t          entry_size,
3825 	mach_vm_offset_t        slide_start,
3826 	mach_vm_size_t          slide_size,
3827 	mach_vm_offset_t        slid_mapping,
3828 	memory_object_control_t sr_file_control,
3829 	vm_prot_t               prot)
3830 {
3831 	vm_shared_region_t      sr;
3832 	kern_return_t           error;
3833 
3834 	SHARED_REGION_TRACE_DEBUG(
3835 		("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3836 		slide, entry_start_address, entry_size, slide_start, slide_size));
3837 
3838 	sr = vm_shared_region_get(current_task());
3839 	if (sr == NULL) {
3840 		printf("%s: no shared region?\n", __FUNCTION__);
3841 		SHARED_REGION_TRACE_DEBUG(
3842 			("vm_shared_region_slide: <- %d (no shared region)\n",
3843 			KERN_FAILURE));
3844 		return KERN_FAILURE;
3845 	}
3846 
3847 	/*
3848 	 * Protect from concurrent access.
3849 	 */
3850 	vm_shared_region_lock();
3851 	while (sr->sr_slide_in_progress) {
3852 		vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3853 	}
3854 
3855 	sr->sr_slide_in_progress = current_thread();
3856 	vm_shared_region_unlock();
3857 
3858 	error = vm_shared_region_slide_mapping(sr,
3859 	    (user_addr_t)slide_start,
3860 	    slide_size,
3861 	    entry_start_address,
3862 	    entry_size,
3863 	    slid_mapping,
3864 	    slide,
3865 	    sr_file_control,
3866 	    prot);
3867 	if (error) {
3868 		printf("slide_info initialization failed with kr=%d\n", error);
3869 	}
3870 
3871 	vm_shared_region_lock();
3872 
3873 	assert(sr->sr_slide_in_progress == current_thread());
3874 	sr->sr_slide_in_progress = THREAD_NULL;
3875 	vm_shared_region_wakeup(&sr->sr_slide_in_progress);
3876 
3877 #if XNU_TARGET_OS_OSX
3878 	if (error == KERN_SUCCESS) {
3879 		shared_region_completed_slide = TRUE;
3880 	}
3881 #endif /* XNU_TARGET_OS_OSX */
3882 	vm_shared_region_unlock();
3883 
3884 	vm_shared_region_deallocate(sr);
3885 
3886 	SHARED_REGION_TRACE_DEBUG(
3887 		("vm_shared_region_slide: <- %d\n",
3888 		error));
3889 
3890 	return error;
3891 }
3892 
3893 /*
3894  * Used during Authenticated Root Volume macOS boot.
3895  * Launchd re-execs itself and wants the new launchd to use
3896  * the shared cache from the new root volume. This call
3897  * makes all the existing shared caches stale to allow
3898  * that to happen.
3899  */
3900 void
vm_shared_region_pivot(void)3901 vm_shared_region_pivot(void)
3902 {
3903 	vm_shared_region_t      shared_region = NULL;
3904 
3905 	vm_shared_region_lock();
3906 
3907 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3908 		assert(shared_region->sr_ref_count > 0);
3909 		shared_region->sr_stale = TRUE;
3910 		if (shared_region->sr_timer_call) {
3911 			/*
3912 			 * We have a shared region ready to be destroyed
3913 			 * and just waiting for a delayed timer to fire.
3914 			 * Marking it stale cements its ineligibility to
3915 			 * be used ever again. So let's shorten the timer
3916 			 * aggressively down to 10 milliseconds and get rid of it.
3917 			 * This is a single quantum and we don't need to go
3918 			 * shorter than this duration. We want it to be short
3919 			 * enough, however, because we could have an unmount
3920 			 * of the volume hosting this shared region just behind
3921 			 * us.
3922 			 */
3923 			uint64_t deadline;
3924 			assert(shared_region->sr_ref_count == 1);
3925 
3926 			/*
3927 			 * Free the old timer call. Returns with a reference held.
3928 			 * If the old timer has fired and is waiting for the vm_shared_region_lock
3929 			 * lock, we will just return with an additional ref_count i.e. 2.
3930 			 * The old timer will then fire and just drop the ref count down to 1
3931 			 * with no other modifications.
3932 			 */
3933 			vm_shared_region_reference_locked(shared_region);
3934 
3935 			/* set up the timer. Keep the reference from above for this timer.*/
3936 			shared_region->sr_timer_call = thread_call_allocate(
3937 				(thread_call_func_t) vm_shared_region_timeout,
3938 				(thread_call_param_t) shared_region);
3939 
3940 			/* schedule the timer */
3941 			clock_interval_to_deadline(10, /* 10 milliseconds */
3942 			    NSEC_PER_MSEC,
3943 			    &deadline);
3944 			thread_call_enter_delayed(shared_region->sr_timer_call,
3945 			    deadline);
3946 
3947 			SHARED_REGION_TRACE_DEBUG(
3948 				("shared_region: pivot(%p): armed timer\n",
3949 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
3950 		}
3951 	}
3952 
3953 	vm_shared_region_unlock();
3954 }
3955 
3956 /*
3957  * Routine to mark any non-standard slide shared cache region as stale.
3958  * This causes the next "reslide" spawn to create a new shared region.
3959  */
3960 void
vm_shared_region_reslide_stale(boolean_t driverkit)3961 vm_shared_region_reslide_stale(boolean_t driverkit)
3962 {
3963 #if __has_feature(ptrauth_calls)
3964 	vm_shared_region_t      shared_region = NULL;
3965 
3966 	vm_shared_region_lock();
3967 
3968 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3969 		assert(shared_region->sr_ref_count > 0);
3970 		if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3971 			shared_region->sr_stale = TRUE;
3972 			vm_shared_region_reslide_count++;
3973 		}
3974 	}
3975 
3976 	vm_shared_region_unlock();
3977 #else
3978 	(void)driverkit;
3979 #endif /* __has_feature(ptrauth_calls) */
3980 }
3981 
3982 /*
3983  * report if the task is using a reslide shared cache region.
3984  */
3985 bool
vm_shared_region_is_reslide(__unused struct task * task)3986 vm_shared_region_is_reslide(__unused struct task *task)
3987 {
3988 	bool is_reslide = FALSE;
3989 #if __has_feature(ptrauth_calls)
3990 	vm_shared_region_t sr = vm_shared_region_get(task);
3991 
3992 	if (sr != NULL) {
3993 		is_reslide = sr->sr_reslide;
3994 		vm_shared_region_deallocate(sr);
3995 	}
3996 #endif /* __has_feature(ptrauth_calls) */
3997 	return is_reslide;
3998 }
3999 
4000 /*
4001  * This is called from powermanagement code to let kernel know the current source of power.
4002  * 0 if it is external source (connected to power )
4003  * 1 if it is internal power source ie battery
4004  */
4005 void
4006 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)4007 post_sys_powersource(int i)
4008 #else /* XNU_TARGET_OS_OSX */
4009 post_sys_powersource(__unused int i)
4010 #endif /* XNU_TARGET_OS_OSX */
4011 {
4012 #if XNU_TARGET_OS_OSX
4013 	post_sys_powersource_internal(i, 0);
4014 #endif /* XNU_TARGET_OS_OSX */
4015 }
4016 
4017 
4018 #if XNU_TARGET_OS_OSX
4019 static void
post_sys_powersource_internal(int i,int internal)4020 post_sys_powersource_internal(int i, int internal)
4021 {
4022 	if (internal == 0) {
4023 		__system_power_source = i;
4024 	}
4025 }
4026 #endif /* XNU_TARGET_OS_OSX */
4027 
4028 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)4029 vm_shared_region_root_dir(
4030 	struct vm_shared_region *sr)
4031 {
4032 	void *vnode;
4033 
4034 	vm_shared_region_lock();
4035 	vnode = sr->sr_root_dir;
4036 	vm_shared_region_unlock();
4037 	return vnode;
4038 }
4039