xref: /xnu-12377.61.12/osfmk/vm/vm_shared_region.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http://www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * Shared region (... and comm page)
26  *
27  * This file handles the VM shared region and comm page.
28  *
29  */
30 /*
31  * SHARED REGIONS
32  * --------------
33  *
34  * A shared region is a submap that contains the most common system shared
35  * libraries for a given environment which is defined by:
36  * - cpu-type
37  * - 64-bitness
38  * - root directory
39  * - Team ID - when we have pointer authentication.
40  *
41  * The point of a shared region is to reduce the setup overhead when exec'ing
42  * a new process. A shared region uses a shared VM submap that gets mapped
43  * automatically at exec() time, see vm_map_exec().  The first process of a given
44  * environment sets up the shared region and all further processes in that
45  * environment can re-use that shared region without having to re-create
46  * the same mappings in their VM map.  All they need is contained in the shared
47  * region.
48  *
49  * The region can also share a pmap (mostly for read-only parts but also for the
50  * initial version of some writable parts), which gets "nested" into the
51  * process's pmap.  This reduces the number of soft faults:  once one process
52  * brings in a page in the shared region, all the other processes can access
53  * it without having to enter it in their own pmap.
54  *
55  * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56  * to associate the appropriate shared region with the process's address space.
57  * We look up the appropriate shared region for the process's environment.
58  * If we can't find one, we create a new (empty) one and add it to the list.
59  * Otherwise, we just take an extra reference on the shared region we found.
60  * At this point, the shared region is not actually mapped into the process's
61  * address space, but rather a permanent VM_PROT_NONE placeholder covering the
62  * same VA region as the shared region is inserted.
63  *
64  * The "dyld" runtime, mapped into the process's address space at exec() time,
65  * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
66  * system calls to validate and/or populate the shared region with the
67  * appropriate dyld_shared_cache file.  If the initial call to shared_region_check_np()
68  * indicates that the shared region has not been configured, dyld will then call
69  * shared_region_map_and_slide_2_np() to configure the shared region.  It's possible
70  * that multiple tasks may simultaneously issue this call sequence for the same shared
71  * region, but the synchronization done by shared_region_acquire() will ensure that
72  * only one task will ultimately configure the shared region.  All other tasks will
73  * wait for that task to finish its configuration step, at which point (assuming
74  * successful configuration) they will observe the configured shared region and
75  * re-issue the shared_region_check_np() system call to obtain the final shared
76  * region info.
77  *
78  * For the task that ends up configuring the shared region, the mapping and
79  * sliding of the shared region is performed against a temporary configuration-only
80  * vm_map, which is temporarily activated for the calling thread using
81  * vm_map_switch_to().  Once mapping and sliding completes successfully, the shared
82  * region will be "sealed" by stabilizing all its vm_map_entrys using COPY_DELAY
83  * objects, which eliminates the need for later modification of shared region map
84  * entries and thus simplifies the shared region's runtime locking requirements.
85  * After this sealing step, the original task vm_map will be restored.  Since this
86  * entire configuration sequence happens within the context of a single system call,
87  * use of the temporary vm_map effectively guarantees that the shared region will
88  * not be visible in the task's address space (either to other threads in the task
89  * or to other tasks attempting to query the address space e.g. for debugging purposes)
90  * until it has been fully configured and sealed.
91  *
92  * The shared region is only inserted into a task's address space when the
93  * shared_region_check_np() system call detects that the shared region has been fully
94  * configured.  Only at this point will the placeholder entry inserted at exec()
95  * time be replaced with the real shared region submap entry.  This step is required
96  * of all tasks; even the task that previously configured the shared region must
97  * issue a final shared_region_check_np() system call to obtain the real shared
98  * region mapping.
99  *
100  * The shared region is inherited on fork() and the child simply takes an
101  * extra reference on its parent's shared region.
102  *
103  * When the task terminates, we release the reference on its shared region.
104  * When the last reference is released, we destroy the shared region.
105  *
106  * After a chroot(), the calling process keeps using its original shared region,
107  * since that's what was mapped when it was started.  But its children
108  * will use a different shared region, because they need to use the shared
109  * cache that's relative to the new root directory.
110  */
111 
112 /*
113  * COMM PAGE
114  *
115  * A "comm page" is an area of memory that is populated by the kernel with
116  * the appropriate platform-specific version of some commonly used code.
117  * There is one "comm page" per platform (cpu-type, 64-bitness) but only
118  * for the native cpu-type.  No need to overly optimize translated code
119  * for hardware that is not really there !
120  *
121  * The comm pages are created and populated at boot time.
122  *
123  * The appropriate comm page is mapped into a process's address space
124  * at exec() time, in vm_map_exec(). It is then inherited on fork().
125  *
126  * The comm page is shared between the kernel and all applications of
127  * a given platform. Only the kernel can modify it.
128  *
129  * Applications just branch to fixed addresses in the comm page and find
130  * the right version of the code for the platform.  There is also some
131  * data provided and updated by the kernel for processes to retrieve easily
132  * without having to do a system call.
133  */
134 
135 #include <debug.h>
136 
137 #include <kern/ipc_tt.h>
138 #include <kern/kalloc.h>
139 #include <kern/thread_call.h>
140 
141 #include <mach/mach_vm.h>
142 #include <mach/machine.h>
143 
144 #include <vm/vm_map_internal.h>
145 #include <vm/vm_memory_entry_xnu.h>
146 #include <vm/vm_shared_region_internal.h>
147 #include <vm/vm_kern_xnu.h>
148 #include <vm/memory_object_internal.h>
149 #include <vm/vm_protos_internal.h>
150 #include <vm/vm_object_internal.h>
151 
152 #include <machine/commpage.h>
153 #include <machine/cpu_capabilities.h>
154 #include <sys/random.h>
155 #include <sys/errno.h>
156 #include <sys/code_signing.h>
157 
158 #if defined(__arm64__)
159 #include <arm/cpu_data_internal.h>
160 #include <arm/misc_protos.h>
161 #endif
162 
163 /*
164  * the following codes are used in the  subclass
165  * of the DBG_MACH_SHAREDREGION class
166  */
167 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
168 
169 #if __has_feature(ptrauth_calls)
170 #include <ptrauth.h>
171 #endif /* __has_feature(ptrauth_calls) */
172 
173 /* "dyld" uses this to figure out what the kernel supports */
174 int shared_region_version = 3;
175 
176 /* trace level, output is sent to the system log file */
177 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
178 
179 /* should local (non-chroot) shared regions persist when no task uses them ? */
180 int shared_region_persistence = 0;      /* no by default */
181 
182 
183 /* delay in seconds before reclaiming an unused shared region */
184 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
185 
186 #if DEVELOPMENT || DEBUG
187 #define PANIC_ON_DYLD_ISSUE_DEFAULT 0
188 #else /* DEVELOPMENT || DEBUG */
189 #define PANIC_ON_DYLD_ISSUE_DEFAULT 0
190 #endif /* DEVELOPMENT || DEBUG */
191 TUNABLE_WRITEABLE(int, panic_on_dyld_issue, "panic_on_dyld_issue", PANIC_ON_DYLD_ISSUE_DEFAULT);
192 
193 /*
194  * Cached pointer to the most recently mapped shared region from PID 1, which should
195  * be the most commonly mapped shared region in the system.  There are many processes
196  * which do not use this, for a variety of reasons.
197  *
198  * The main consumer of this is stackshot.
199  */
200 struct vm_shared_region *primary_system_shared_region = NULL;
201 
202 #if XNU_TARGET_OS_OSX
203 /*
204  * Only one cache gets to slide on Desktop, since we can't
205  * tear down slide info properly today and the desktop actually
206  * produces lots of shared caches.
207  */
208 boolean_t shared_region_completed_slide = FALSE;
209 #endif /* XNU_TARGET_OS_OSX */
210 
211 /* this lock protects all the shared region data structures */
212 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
213 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
214 
215 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
216 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
217 #define vm_shared_region_sleep(event, interruptible)                    \
218 	lck_mtx_sleep_with_inheritor(&vm_shared_region_lock,            \
219 	              LCK_SLEEP_DEFAULT,                                \
220 	              (event_t) (event),                                \
221 	              *(event),                                         \
222 	              (interruptible) | THREAD_WAIT_NOREPORT,           \
223 	              TIMEOUT_WAIT_FOREVER)
224 #define vm_shared_region_wakeup(event)                                  \
225 	wakeup_all_with_inheritor((event), THREAD_AWAKENED)
226 
227 /* the list of currently available shared regions (one per environment) */
228 queue_head_t    vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
229 int             vm_shared_region_count = 0;
230 int             vm_shared_region_peak = 0;
231 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
232 
233 /*
234  * the number of times an event has forced the recalculation of the reslide
235  * shared region slide.
236  */
237 #if __has_feature(ptrauth_calls)
238 int                             vm_shared_region_reslide_count = 0;
239 #endif /* __has_feature(ptrauth_calls) */
240 
241 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
242 static vm_shared_region_t vm_shared_region_create(
243 	void          *root_dir,
244 	cpu_type_t    cputype,
245 	cpu_subtype_t cpu_subtype,
246 	boolean_t     is_64bit,
247 	int           target_page_shift,
248 	boolean_t     reslide,
249 	boolean_t     is_driverkit,
250 	uint32_t      rsr_version);
251 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
252 
253 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
254 static void vm_shared_region_timeout(thread_call_param_t param0,
255     thread_call_param_t param1);
256 static kern_return_t vm_shared_region_slide_mapping(
257 	vm_shared_region_t sr,
258 	user_addr_t        slide_info_addr,
259 	mach_vm_size_t     slide_info_size,
260 	mach_vm_offset_t   start,
261 	mach_vm_size_t     size,
262 	mach_vm_offset_t   slid_mapping,
263 	uint32_t           slide,
264 	memory_object_control_t,
265 	vm_prot_t          prot); /* forward */
266 static kern_return_t vm_shared_region_insert_placeholder(vm_map_t map, vm_shared_region_t shared_region);
267 static kern_return_t vm_shared_region_insert_submap(vm_map_t map, vm_shared_region_t shared_region, bool overwrite);
268 
269 static int __commpage_setup = 0;
270 #if XNU_TARGET_OS_OSX
271 static int __system_power_source = 1;   /* init to extrnal power source */
272 static void post_sys_powersource_internal(int i, int internal);
273 #endif /* XNU_TARGET_OS_OSX */
274 
275 extern u_int32_t random(void);
276 
277 /*
278  * Retrieve a task's shared region and grab an extra reference to
279  * make sure it doesn't disappear while the caller is using it.
280  * The caller is responsible for consuming that extra reference if
281  * necessary.
282  */
283 vm_shared_region_t
vm_shared_region_get(task_t task)284 vm_shared_region_get(
285 	task_t          task)
286 {
287 	vm_shared_region_t      shared_region;
288 
289 	SHARED_REGION_TRACE_DEBUG(
290 		("shared_region: -> get(%p)\n",
291 		(void *)VM_KERNEL_ADDRPERM(task)));
292 
293 	task_lock(task);
294 	vm_shared_region_lock();
295 	shared_region = task->shared_region;
296 	if (shared_region != NULL) {
297 		assert(shared_region->sr_ref_count > 0);
298 		vm_shared_region_reference_locked(shared_region);
299 	}
300 	vm_shared_region_unlock();
301 	task_unlock(task);
302 
303 	SHARED_REGION_TRACE_DEBUG(
304 		("shared_region: get(%p) <- %p\n",
305 		(void *)VM_KERNEL_ADDRPERM(task),
306 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
307 
308 	return shared_region;
309 }
310 
311 static void
vm_shared_region_acquire(vm_shared_region_t shared_region)312 vm_shared_region_acquire(vm_shared_region_t shared_region)
313 {
314 	vm_shared_region_lock();
315 	assert(shared_region->sr_ref_count > 0);
316 	while (shared_region->sr_mapping_in_progress != NULL) {
317 		/* wait for our turn... */
318 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
319 		    THREAD_UNINT);
320 	}
321 	assert(shared_region->sr_mapping_in_progress == NULL);
322 	assert(shared_region->sr_ref_count > 0);
323 
324 	/* let others know to wait while we're working in this shared region */
325 	shared_region->sr_mapping_in_progress = current_thread();
326 	vm_shared_region_unlock();
327 }
328 
329 static void
vm_shared_region_release(vm_shared_region_t shared_region)330 vm_shared_region_release(vm_shared_region_t shared_region)
331 {
332 	vm_shared_region_lock();
333 	assert(shared_region->sr_mapping_in_progress == current_thread());
334 	shared_region->sr_mapping_in_progress = THREAD_NULL;
335 	vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
336 	vm_shared_region_unlock();
337 }
338 
339 static void
vm_shared_region_seal(struct vm_shared_region * sr)340 vm_shared_region_seal(
341 	struct vm_shared_region *sr)
342 {
343 	vm_map_t sr_map;
344 
345 	sr_map = vm_shared_region_vm_map(sr);
346 	vm_map_seal(sr_map, true /* nested_pmap */);
347 }
348 
349 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)350 vm_shared_region_vm_map(
351 	vm_shared_region_t      shared_region)
352 {
353 	ipc_port_t              sr_handle;
354 	vm_named_entry_t        sr_mem_entry;
355 	vm_map_t                sr_map;
356 
357 	SHARED_REGION_TRACE_DEBUG(
358 		("shared_region: -> vm_map(%p)\n",
359 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
360 	assert(shared_region->sr_ref_count > 0);
361 
362 	sr_handle = shared_region->sr_mem_entry;
363 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
364 	sr_map = sr_mem_entry->backing.map;
365 	assert(sr_mem_entry->is_sub_map);
366 
367 	SHARED_REGION_TRACE_DEBUG(
368 		("shared_region: vm_map(%p) <- %p\n",
369 		(void *)VM_KERNEL_ADDRPERM(shared_region),
370 		(void *)VM_KERNEL_ADDRPERM(sr_map)));
371 	return sr_map;
372 }
373 
374 /*
375  * Set the shared region the process should use.
376  * A NULL new shared region means that we just want to release the old
377  * shared region.
378  * The caller should already have an extra reference on the new shared region
379  * (if any).  We release a reference on the old shared region (if any).
380  */
381 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)382 vm_shared_region_set(
383 	task_t                  task,
384 	vm_shared_region_t      new_shared_region)
385 {
386 	vm_shared_region_t      old_shared_region;
387 
388 	SHARED_REGION_TRACE_DEBUG(
389 		("shared_region: -> set(%p, %p)\n",
390 		(void *)VM_KERNEL_ADDRPERM(task),
391 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
392 
393 	task_lock(task);
394 	vm_shared_region_lock();
395 
396 	old_shared_region = task->shared_region;
397 	if (new_shared_region) {
398 		assert(new_shared_region->sr_ref_count > 0);
399 	}
400 
401 	task->shared_region = new_shared_region;
402 
403 	vm_shared_region_unlock();
404 	task_unlock(task);
405 
406 	if (old_shared_region) {
407 		assert(old_shared_region->sr_ref_count > 0);
408 		vm_shared_region_deallocate(old_shared_region);
409 	}
410 
411 	SHARED_REGION_TRACE_DEBUG(
412 		("shared_region: set(%p) <- old=%p new=%p\n",
413 		(void *)VM_KERNEL_ADDRPERM(task),
414 		(void *)VM_KERNEL_ADDRPERM(old_shared_region),
415 		(void *)VM_KERNEL_ADDRPERM(new_shared_region)));
416 }
417 
418 /*
419  * New arm64 shared regions match with an existing arm64e region.
420  * They just get a private non-authenticating pager.
421  */
422 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)423 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
424 {
425 	if (exist == new) {
426 		return true;
427 	}
428 	if (cputype == CPU_TYPE_ARM64 &&
429 	    exist == CPU_SUBTYPE_ARM64E &&
430 	    new == CPU_SUBTYPE_ARM64_ALL) {
431 		return true;
432 	}
433 	return false;
434 }
435 
436 
437 /*
438  * Lookup up the shared region for the desired environment.
439  * If none is found, create a new (empty) one.
440  * Grab an extra reference on the returned shared region, to make sure
441  * it doesn't get destroyed before the caller is done with it.  The caller
442  * is responsible for consuming that extra reference if necessary.
443  */
444 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)445 vm_shared_region_lookup(
446 	void            *root_dir,
447 	cpu_type_t      cputype,
448 	cpu_subtype_t   cpu_subtype,
449 	boolean_t       is_64bit,
450 	int             target_page_shift,
451 	boolean_t       reslide,
452 	boolean_t       is_driverkit,
453 	uint32_t        rsr_version)
454 {
455 	vm_shared_region_t      shared_region;
456 	vm_shared_region_t      new_shared_region;
457 
458 	SHARED_REGION_TRACE_DEBUG(
459 		("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
460 		(void *)VM_KERNEL_ADDRPERM(root_dir),
461 		cputype, cpu_subtype, is_64bit, target_page_shift,
462 		reslide, is_driverkit));
463 
464 	shared_region = NULL;
465 	new_shared_region = NULL;
466 
467 	vm_shared_region_lock();
468 	for (;;) {
469 		queue_iterate(&vm_shared_region_queue,
470 		    shared_region,
471 		    vm_shared_region_t,
472 		    sr_q) {
473 			assert(shared_region->sr_ref_count > 0);
474 			if (shared_region->sr_cpu_type == cputype &&
475 			    match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
476 			    shared_region->sr_root_dir == root_dir &&
477 			    shared_region->sr_64bit == is_64bit &&
478 #if __ARM_MIXED_PAGE_SIZE__
479 			    shared_region->sr_page_shift == target_page_shift &&
480 #endif /* __ARM_MIXED_PAGE_SIZE__ */
481 #if __has_feature(ptrauth_calls)
482 			    shared_region->sr_reslide == reslide &&
483 #endif /* __has_feature(ptrauth_calls) */
484 			    shared_region->sr_driverkit == is_driverkit &&
485 			    shared_region->sr_rsr_version == rsr_version &&
486 			    !shared_region->sr_stale) {
487 				/* found a match ! */
488 				vm_shared_region_reference_locked(shared_region);
489 				goto done;
490 			}
491 		}
492 		if (new_shared_region == NULL) {
493 			/* no match: create a new one */
494 			vm_shared_region_unlock();
495 			new_shared_region = vm_shared_region_create(root_dir,
496 			    cputype,
497 			    cpu_subtype,
498 			    is_64bit,
499 			    target_page_shift,
500 			    reslide,
501 			    is_driverkit,
502 			    rsr_version);
503 			/* do the lookup again, in case we lost a race */
504 			vm_shared_region_lock();
505 			continue;
506 		}
507 		/* still no match: use our new one */
508 		shared_region = new_shared_region;
509 		new_shared_region = NULL;
510 		uint32_t newid = ++vm_shared_region_lastid;
511 		if (newid == 0) {
512 			panic("shared_region: vm_shared_region_lastid wrapped");
513 		}
514 		shared_region->sr_id = newid;
515 		shared_region->sr_install_time = mach_absolute_time();
516 		queue_enter(&vm_shared_region_queue,
517 		    shared_region,
518 		    vm_shared_region_t,
519 		    sr_q);
520 		vm_shared_region_count++;
521 		if (vm_shared_region_count > vm_shared_region_peak) {
522 			vm_shared_region_peak = vm_shared_region_count;
523 		}
524 		break;
525 	}
526 
527 done:
528 	vm_shared_region_unlock();
529 
530 	if (new_shared_region) {
531 		/*
532 		 * We lost a race with someone else to create a new shared
533 		 * region for that environment. Get rid of our unused one.
534 		 */
535 		assert(new_shared_region->sr_ref_count == 1);
536 		new_shared_region->sr_ref_count--;
537 		vm_shared_region_destroy(new_shared_region);
538 		new_shared_region = NULL;
539 	}
540 
541 	SHARED_REGION_TRACE_DEBUG(
542 		("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
543 		(void *)VM_KERNEL_ADDRPERM(root_dir),
544 		cputype, cpu_subtype, is_64bit, target_page_shift,
545 		reslide, is_driverkit,
546 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
547 
548 	assert(shared_region->sr_ref_count > 0);
549 	return shared_region;
550 }
551 
552 /*
553  * Take an extra reference on a shared region.
554  * The vm_shared_region_lock should already be held by the caller.
555  */
556 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)557 vm_shared_region_reference_locked(
558 	vm_shared_region_t      shared_region)
559 {
560 	LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
561 
562 	SHARED_REGION_TRACE_DEBUG(
563 		("shared_region: -> reference_locked(%p)\n",
564 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
565 	assert(shared_region->sr_ref_count > 0);
566 	shared_region->sr_ref_count++;
567 	assert(shared_region->sr_ref_count != 0);
568 
569 	if (shared_region->sr_timer_call != NULL) {
570 		boolean_t cancelled;
571 
572 		/* cancel and free any pending timeout */
573 		cancelled = thread_call_cancel(shared_region->sr_timer_call);
574 		if (cancelled) {
575 			thread_call_free(shared_region->sr_timer_call);
576 			shared_region->sr_timer_call = NULL;
577 			/* release the reference held by the cancelled timer */
578 			shared_region->sr_ref_count--;
579 		} else {
580 			/* the timer will drop the reference and free itself */
581 		}
582 	}
583 
584 	SHARED_REGION_TRACE_DEBUG(
585 		("shared_region: reference_locked(%p) <- %d\n",
586 		(void *)VM_KERNEL_ADDRPERM(shared_region),
587 		shared_region->sr_ref_count));
588 }
589 
590 /*
591  * Take a reference on a shared region.
592  */
593 void
vm_shared_region_reference(vm_shared_region_t shared_region)594 vm_shared_region_reference(vm_shared_region_t shared_region)
595 {
596 	SHARED_REGION_TRACE_DEBUG(
597 		("shared_region: -> reference(%p)\n",
598 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
599 
600 	vm_shared_region_lock();
601 	vm_shared_region_reference_locked(shared_region);
602 	vm_shared_region_unlock();
603 
604 	SHARED_REGION_TRACE_DEBUG(
605 		("shared_region: reference(%p) <- %d\n",
606 		(void *)VM_KERNEL_ADDRPERM(shared_region),
607 		shared_region->sr_ref_count));
608 }
609 
610 /*
611  * Release a reference on the shared region.
612  * Destroy it if there are no references left.
613  */
614 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)615 vm_shared_region_deallocate(
616 	vm_shared_region_t      shared_region)
617 {
618 	SHARED_REGION_TRACE_DEBUG(
619 		("shared_region: -> deallocate(%p)\n",
620 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
621 
622 	vm_shared_region_lock();
623 
624 	assert(shared_region->sr_ref_count > 0);
625 
626 	if (shared_region->sr_root_dir == NULL) {
627 		/*
628 		 * Local (i.e. based on the boot volume) shared regions
629 		 * can persist or not based on the "shared_region_persistence"
630 		 * sysctl.
631 		 * Make sure that this one complies.
632 		 *
633 		 * See comments in vm_shared_region_slide() for notes about
634 		 * shared regions we have slid (which are not torn down currently).
635 		 */
636 		if (shared_region_persistence &&
637 		    !shared_region->sr_persists) {
638 			/* make this one persistent */
639 			shared_region->sr_ref_count++;
640 			shared_region->sr_persists = TRUE;
641 		} else if (!shared_region_persistence &&
642 		    shared_region->sr_persists) {
643 			/* make this one no longer persistent */
644 			assert(shared_region->sr_ref_count > 1);
645 			shared_region->sr_ref_count--;
646 			shared_region->sr_persists = FALSE;
647 		}
648 	}
649 
650 	assert(shared_region->sr_ref_count > 0);
651 	shared_region->sr_ref_count--;
652 	SHARED_REGION_TRACE_DEBUG(
653 		("shared_region: deallocate(%p): ref now %d\n",
654 		(void *)VM_KERNEL_ADDRPERM(shared_region),
655 		shared_region->sr_ref_count));
656 
657 	if (shared_region->sr_ref_count == 0) {
658 		uint64_t deadline;
659 
660 		/*
661 		 * Even though a shared region is unused, delay a while before
662 		 * tearing it down, in case a new app launch can use it.
663 		 * We don't keep around stale shared regions, nor older RSR ones.
664 		 */
665 		if (shared_region->sr_timer_call == NULL &&
666 		    shared_region_destroy_delay != 0 &&
667 		    !shared_region->sr_stale &&
668 		    !(shared_region->sr_rsr_version != 0 &&
669 		    shared_region->sr_rsr_version != rsr_get_version())) {
670 			/* hold one reference for the timer */
671 			assert(!shared_region->sr_mapping_in_progress);
672 			shared_region->sr_ref_count++;
673 
674 			/* set up the timer */
675 			shared_region->sr_timer_call = thread_call_allocate(
676 				(thread_call_func_t) vm_shared_region_timeout,
677 				(thread_call_param_t) shared_region);
678 
679 			/* schedule the timer */
680 			clock_interval_to_deadline(shared_region_destroy_delay,
681 			    NSEC_PER_SEC,
682 			    &deadline);
683 			thread_call_enter_delayed(shared_region->sr_timer_call,
684 			    deadline);
685 
686 			SHARED_REGION_TRACE_DEBUG(
687 				("shared_region: deallocate(%p): armed timer\n",
688 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
689 
690 			vm_shared_region_unlock();
691 		} else {
692 			/* timer expired: let go of this shared region */
693 
694 			/* Make sure there's no cached pointer to the region. */
695 			if (primary_system_shared_region == shared_region) {
696 				primary_system_shared_region = NULL;
697 			}
698 
699 			/*
700 			 * Remove it from the queue first, so no one can find
701 			 * it...
702 			 */
703 			queue_remove(&vm_shared_region_queue,
704 			    shared_region,
705 			    vm_shared_region_t,
706 			    sr_q);
707 			vm_shared_region_count--;
708 			vm_shared_region_unlock();
709 
710 			/* ... and destroy it */
711 			vm_shared_region_destroy(shared_region);
712 			shared_region = NULL;
713 		}
714 	} else {
715 		vm_shared_region_unlock();
716 	}
717 
718 	SHARED_REGION_TRACE_DEBUG(
719 		("shared_region: deallocate(%p) <-\n",
720 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
721 }
722 
723 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)724 vm_shared_region_timeout(
725 	thread_call_param_t     param0,
726 	__unused thread_call_param_t    param1)
727 {
728 	vm_shared_region_t      shared_region;
729 
730 	shared_region = (vm_shared_region_t) param0;
731 
732 	vm_shared_region_deallocate(shared_region);
733 }
734 
735 
736 /*
737  * Create a new (empty) shared region for a new environment.
738  */
739 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)740 vm_shared_region_create(
741 	void                    *root_dir,
742 	cpu_type_t              cputype,
743 	cpu_subtype_t           cpu_subtype,
744 	boolean_t               is_64bit,
745 	int                     target_page_shift,
746 #if !__has_feature(ptrauth_calls)
747 	__unused
748 #endif /* __has_feature(ptrauth_calls) */
749 	boolean_t               reslide,
750 	boolean_t               is_driverkit,
751 	uint32_t                rsr_version)
752 {
753 	vm_named_entry_t        mem_entry;
754 	ipc_port_t              mem_entry_port;
755 	vm_shared_region_t      shared_region;
756 	vm_map_t                sub_map, config_map;
757 	pmap_t                  nested_pmap, config_pmap;
758 	mach_vm_offset_t        base_address, pmap_nesting_start;
759 	mach_vm_size_t          size, pmap_nesting_size;
760 
761 	SHARED_REGION_TRACE_INFO(
762 		("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
763 		(void *)VM_KERNEL_ADDRPERM(root_dir),
764 		cputype, cpu_subtype, is_64bit, target_page_shift,
765 		reslide, is_driverkit));
766 
767 	base_address = 0;
768 	size = 0;
769 	mem_entry = NULL;
770 	mem_entry_port = IPC_PORT_NULL;
771 	sub_map = VM_MAP_NULL;
772 	config_map = VM_MAP_NULL;
773 	nested_pmap = PMAP_NULL;
774 	config_pmap = PMAP_NULL;
775 
776 	/* create a new shared region structure... */
777 	shared_region = kalloc_type(struct vm_shared_region,
778 	    Z_WAITOK | Z_NOFAIL);
779 
780 	/* figure out the correct settings for the desired environment */
781 	if (is_64bit) {
782 		switch (cputype) {
783 #if defined(__arm64__)
784 		case CPU_TYPE_ARM64:
785 			base_address = SHARED_REGION_BASE_ARM64;
786 			size = SHARED_REGION_SIZE_ARM64;
787 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
788 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
789 			break;
790 #else
791 		case CPU_TYPE_I386:
792 			base_address = SHARED_REGION_BASE_X86_64;
793 			size = SHARED_REGION_SIZE_X86_64;
794 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
795 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
796 			break;
797 		case CPU_TYPE_POWERPC:
798 			base_address = SHARED_REGION_BASE_PPC64;
799 			size = SHARED_REGION_SIZE_PPC64;
800 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
801 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
802 			break;
803 #endif
804 		default:
805 			SHARED_REGION_TRACE_ERROR(
806 				("shared_region: create: unknown cpu type %d\n",
807 				cputype));
808 			kfree_type(struct vm_shared_region, shared_region);
809 			shared_region = NULL;
810 			goto done;
811 		}
812 	} else {
813 		switch (cputype) {
814 #if defined(__arm64__)
815 		case CPU_TYPE_ARM:
816 			base_address = SHARED_REGION_BASE_ARM;
817 			size = SHARED_REGION_SIZE_ARM;
818 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
819 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
820 			break;
821 #else
822 		case CPU_TYPE_I386:
823 			base_address = SHARED_REGION_BASE_I386;
824 			size = SHARED_REGION_SIZE_I386;
825 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
826 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
827 			break;
828 		case CPU_TYPE_POWERPC:
829 			base_address = SHARED_REGION_BASE_PPC;
830 			size = SHARED_REGION_SIZE_PPC;
831 			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
832 			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
833 			break;
834 #endif
835 		default:
836 			SHARED_REGION_TRACE_ERROR(
837 				("shared_region: create: unknown cpu type %d\n",
838 				cputype));
839 			kfree_type(struct vm_shared_region, shared_region);
840 			shared_region = NULL;
841 			goto done;
842 		}
843 	}
844 
845 	/* create a memory entry structure and a Mach port handle */
846 	mem_entry = mach_memory_entry_allocate(&mem_entry_port);
847 
848 #if defined(__arm64__)
849 	{
850 		int pmap_flags = 0;
851 		pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
852 
853 
854 #if __ARM_MIXED_PAGE_SIZE__
855 		if (cputype == CPU_TYPE_ARM64 &&
856 		    target_page_shift == FOURK_PAGE_SHIFT) {
857 			/* arm64/4k address space */
858 			pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
859 		}
860 #endif /* __ARM_MIXED_PAGE_SIZE__ */
861 
862 		nested_pmap = pmap_create_options(NULL, 0, pmap_flags | PMAP_CREATE_NESTED);
863 		config_pmap = pmap_create_options(NULL, 0, pmap_flags);
864 		if ((nested_pmap != PMAP_NULL) && (config_pmap != PMAP_NULL)) {
865 			pmap_set_nested(nested_pmap);
866 #if CODE_SIGNING_MONITOR
867 			csm_setup_nested_address_space(nested_pmap, base_address, size);
868 #endif /* CODE_SIGNING_MONITOR */
869 			pmap_set_shared_region(config_pmap, nested_pmap, base_address, size);
870 			sub_map = vm_map_create_options(nested_pmap, 0,
871 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
872 			config_map = vm_map_create_options(config_pmap, base_address,
873 			    base_address + size, VM_MAP_CREATE_PAGEABLE);
874 
875 			if (is_64bit ||
876 			    page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
877 				/* enforce 16KB alignment of VM map entries */
878 				vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
879 				vm_map_set_page_shift(config_map, SIXTEENK_PAGE_SHIFT);
880 			}
881 #if __ARM_MIXED_PAGE_SIZE__
882 			if (cputype == CPU_TYPE_ARM64 &&
883 			    target_page_shift == FOURK_PAGE_SHIFT) {
884 				/* arm64/4k address space */
885 				vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
886 				vm_map_set_page_shift(config_map, FOURK_PAGE_SHIFT);
887 			}
888 #endif /* __ARM_MIXED_PAGE_SIZE__ */
889 		}
890 	}
891 #else /* defined(__arm64__) */
892 	{
893 		/* create a VM sub map and its pmap */
894 		nested_pmap = pmap_create_options(NULL, 0, is_64bit);
895 		config_pmap = pmap_create_options(NULL, 0, is_64bit);
896 		if ((nested_pmap != NULL) && (config_pmap != NULL)) {
897 			pmap_set_shared_region(config_pmap, nested_pmap, base_address, size);
898 			sub_map = vm_map_create_options(nested_pmap, 0,
899 			    (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
900 			config_map = vm_map_create_options(config_pmap, base_address,
901 			    base_address + size, VM_MAP_CREATE_PAGEABLE);
902 		}
903 	}
904 #endif /* defined(__arm64__) */
905 
906 	if (sub_map != VM_MAP_NULL) {
907 		nested_pmap = PMAP_NULL;
908 	}
909 	if (config_map != VM_MAP_NULL) {
910 		config_pmap = PMAP_NULL;
911 	}
912 	if (nested_pmap != PMAP_NULL) {
913 		pmap_destroy(nested_pmap);
914 	}
915 	if (config_pmap != PMAP_NULL) {
916 		pmap_destroy(config_pmap);
917 	}
918 
919 	if ((sub_map == VM_MAP_NULL) || (config_map == VM_MAP_NULL)) {
920 		if (sub_map != VM_MAP_NULL) {
921 			vm_map_deallocate(sub_map);
922 		}
923 		if (config_map != VM_MAP_NULL) {
924 			vm_map_deallocate(config_map);
925 		}
926 		ipc_port_release_send(mem_entry_port);
927 		kfree_type(struct vm_shared_region, shared_region);
928 		shared_region = NULL;
929 		SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate maps\n"));
930 		goto done;
931 	}
932 
933 	/* shared regions should always enforce code-signing */
934 	vm_map_cs_enforcement_set(sub_map, true);
935 	assert(vm_map_cs_enforcement(sub_map));
936 	assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
937 	vm_map_cs_enforcement_set(config_map, true);
938 	assert(vm_map_cs_enforcement(config_map));
939 	assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(config_map)));
940 
941 	assert(!sub_map->disable_vmentry_reuse);
942 	sub_map->is_nested_map = TRUE;
943 	sub_map->vmmap_sealed = VM_MAP_WILL_BE_SEALED;
944 
945 	/* make the memory entry point to the VM sub map */
946 	mem_entry->is_sub_map = TRUE;
947 	mem_entry->backing.map = sub_map;
948 	mem_entry->size = size;
949 	mem_entry->protection = VM_PROT_ALL;
950 
951 	/* make the shared region point at the memory entry */
952 	shared_region->sr_mem_entry = mem_entry_port;
953 
954 	/* fill in the shared region's environment and settings */
955 	shared_region->sr_config_map = config_map;
956 	shared_region->sr_base_address = base_address;
957 	shared_region->sr_size = size;
958 	shared_region->sr_pmap_nesting_start = pmap_nesting_start;
959 	shared_region->sr_pmap_nesting_size = pmap_nesting_size;
960 	shared_region->sr_cpu_type = cputype;
961 	shared_region->sr_cpu_subtype = cpu_subtype;
962 	shared_region->sr_64bit = (uint8_t)is_64bit;
963 #if __ARM_MIXED_PAGE_SIZE__
964 	shared_region->sr_page_shift = (uint8_t)target_page_shift;
965 #endif /* __ARM_MIXED_PAGE_SIZE__ */
966 	shared_region->sr_driverkit = (uint8_t)is_driverkit;
967 	shared_region->sr_rsr_version = rsr_version;
968 	shared_region->sr_root_dir = root_dir;
969 
970 	queue_init(&shared_region->sr_q);
971 	shared_region->sr_mapping_in_progress = THREAD_NULL;
972 	shared_region->sr_slide_in_progress = THREAD_NULL;
973 	shared_region->sr_persists = FALSE;
974 	shared_region->sr_stale = FALSE;
975 	shared_region->sr_timer_call = NULL;
976 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
977 
978 	/* grab a reference for the caller */
979 	shared_region->sr_ref_count = 1;
980 
981 	shared_region->sr_slide = 0; /* not slid yet */
982 
983 	/* Initialize UUID and other metadata */
984 	memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
985 	shared_region->sr_uuid_copied = FALSE;
986 	shared_region->sr_images_count = 0;
987 	shared_region->sr_images = NULL;
988 #if __has_feature(ptrauth_calls)
989 	shared_region->sr_reslide = reslide;
990 	shared_region->sr_num_auth_section = 0;
991 	shared_region->sr_next_auth_section = 0;
992 	shared_region->sr_auth_section = NULL;
993 #endif /* __has_feature(ptrauth_calls) */
994 	kern_return_t kr = vm_shared_region_insert_submap(config_map, shared_region, false);
995 	if (kr != KERN_SUCCESS) {
996 		SHARED_REGION_TRACE_ERROR(
997 			("shared_region: create(%p): insert_submap returned 0x%x\n", shared_region, kr));
998 		shared_region->sr_ref_count = 0;
999 		vm_shared_region_destroy(shared_region);
1000 		shared_region = NULL;
1001 	}
1002 
1003 done:
1004 	if (shared_region) {
1005 		SHARED_REGION_TRACE_INFO(
1006 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
1007 			"base=0x%llx,size=0x%llx) <- "
1008 			"%p mem=(%p,%p) map=%p pmap=%p\n",
1009 			(void *)VM_KERNEL_ADDRPERM(root_dir),
1010 			cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
1011 			(long long)base_address,
1012 			(long long)size,
1013 			(void *)VM_KERNEL_ADDRPERM(shared_region),
1014 			(void *)VM_KERNEL_ADDRPERM(mem_entry_port),
1015 			(void *)VM_KERNEL_ADDRPERM(mem_entry),
1016 			(void *)VM_KERNEL_ADDRPERM(sub_map),
1017 			(void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
1018 	} else {
1019 		SHARED_REGION_TRACE_INFO(
1020 			("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
1021 			"base=0x%llx,size=0x%llx) <- NULL",
1022 			(void *)VM_KERNEL_ADDRPERM(root_dir),
1023 			cputype, cpu_subtype, is_64bit, is_driverkit,
1024 			(long long)base_address,
1025 			(long long)size));
1026 	}
1027 	return shared_region;
1028 }
1029 
1030 /*
1031  * Destroy a now-unused shared region.
1032  * The shared region is no longer in the queue and can not be looked up.
1033  */
1034 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)1035 vm_shared_region_destroy(
1036 	vm_shared_region_t      shared_region)
1037 {
1038 	vm_named_entry_t        mem_entry;
1039 	vm_map_t                map;
1040 
1041 	SHARED_REGION_TRACE_INFO(
1042 		("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
1043 		(void *)VM_KERNEL_ADDRPERM(shared_region),
1044 		(void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
1045 		shared_region->sr_cpu_type,
1046 		shared_region->sr_cpu_subtype,
1047 		shared_region->sr_64bit,
1048 		shared_region->sr_driverkit));
1049 
1050 	assert(shared_region->sr_ref_count == 0);
1051 	assert(!shared_region->sr_persists);
1052 
1053 	mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
1054 	assert(mem_entry->is_sub_map);
1055 	assert(!mem_entry->internal);
1056 	assert(!mem_entry->is_copy);
1057 
1058 	if (shared_region->sr_config_map != VM_MAP_NULL) {
1059 		vm_map_deallocate(shared_region->sr_config_map);
1060 		shared_region->sr_config_map = VM_MAP_NULL;
1061 	}
1062 
1063 	map = mem_entry->backing.map;
1064 
1065 	/*
1066 	 * Clean up the pmap first.  The virtual addresses that were
1067 	 * entered in this possibly "nested" pmap may have different values
1068 	 * than the VM map's min and max offsets, if the VM sub map was
1069 	 * mapped at a non-zero offset in the processes' main VM maps, which
1070 	 * is usually the case, so the clean-up we do in vm_map_destroy() would
1071 	 * not be enough.
1072 	 */
1073 	if (map->pmap) {
1074 		pmap_remove(map->pmap,
1075 		    (vm_map_offset_t)shared_region->sr_base_address,
1076 		    (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
1077 	}
1078 
1079 	/*
1080 	 * Release our (one and only) handle on the memory entry.
1081 	 * This will generate a no-senders notification, which will be processed
1082 	 * by ipc_notify_no_senders_kobject(), which will release the one and only
1083 	 * reference on the memory entry and cause it to be destroyed, along
1084 	 * with the VM sub map and its pmap.
1085 	 */
1086 	mach_memory_entry_port_release(shared_region->sr_mem_entry);
1087 	mem_entry = NULL;
1088 	shared_region->sr_mem_entry = IPC_PORT_NULL;
1089 
1090 	if (shared_region->sr_timer_call) {
1091 		thread_call_free(shared_region->sr_timer_call);
1092 	}
1093 
1094 #if __has_feature(ptrauth_calls)
1095 	/*
1096 	 * Free the cached copies of slide_info for the AUTH regions.
1097 	 */
1098 	for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
1099 		vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
1100 		if (si != NULL) {
1101 			vm_object_deallocate(si->si_slide_object);
1102 			kfree_data(si->si_slide_info_entry,
1103 			    si->si_slide_info_size);
1104 			kfree_type(struct vm_shared_region_slide_info, si);
1105 			shared_region->sr_auth_section[i] = NULL;
1106 		}
1107 	}
1108 	if (shared_region->sr_auth_section != NULL) {
1109 		assert(shared_region->sr_num_auth_section > 0);
1110 		kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
1111 		shared_region->sr_auth_section = NULL;
1112 		shared_region->sr_num_auth_section = 0;
1113 	}
1114 #endif /* __has_feature(ptrauth_calls) */
1115 
1116 	/* release the shared region structure... */
1117 	kfree_type(struct vm_shared_region, shared_region);
1118 
1119 	SHARED_REGION_TRACE_DEBUG(
1120 		("shared_region: destroy(%p) <-\n",
1121 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1122 	shared_region = NULL;
1123 }
1124 
1125 /*
1126  * Gets the address of the first (in time) mapping in the shared region.
1127  * If used during initial task setup by dyld, task should non-NULL.
1128  */
1129 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address)1130 vm_shared_region_start_address(
1131 	vm_shared_region_t      shared_region,
1132 	mach_vm_offset_t        *start_address)
1133 {
1134 	kern_return_t           kr;
1135 	mach_vm_offset_t        sr_base_address;
1136 	mach_vm_offset_t        sr_first_mapping;
1137 
1138 	SHARED_REGION_TRACE_DEBUG(
1139 		("shared_region: -> start_address(%p)\n",
1140 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1141 
1142 	vm_shared_region_lock();
1143 
1144 	/*
1145 	 * Wait if there's another thread establishing a mapping
1146 	 * in this shared region right when we're looking at it.
1147 	 * We want a consistent view of the map...
1148 	 */
1149 	while (shared_region->sr_mapping_in_progress != NULL) {
1150 		/* wait for our turn... */
1151 		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1152 		    THREAD_UNINT);
1153 	}
1154 	assert(shared_region->sr_mapping_in_progress == NULL);
1155 	assert(shared_region->sr_ref_count > 0);
1156 
1157 	sr_base_address = shared_region->sr_base_address;
1158 	sr_first_mapping = shared_region->sr_first_mapping;
1159 
1160 	if (sr_first_mapping == (mach_vm_offset_t) -1) {
1161 		/* shared region is empty */
1162 		kr = KERN_INVALID_ADDRESS;
1163 	} else {
1164 		kr = KERN_SUCCESS;
1165 		*start_address = sr_base_address + sr_first_mapping;
1166 	}
1167 
1168 
1169 	vm_shared_region_unlock();
1170 
1171 	SHARED_REGION_TRACE_DEBUG(
1172 		("shared_region: start_address(%p) <- 0x%llx\n",
1173 		(void *)VM_KERNEL_ADDRPERM(shared_region),
1174 		(long long)shared_region->sr_base_address));
1175 
1176 	return kr;
1177 }
1178 
1179 kern_return_t
vm_shared_region_update_task(task_t task,vm_shared_region_t shared_region,mach_vm_offset_t start_address)1180 vm_shared_region_update_task(task_t task, vm_shared_region_t shared_region, mach_vm_offset_t start_address)
1181 {
1182 	kern_return_t kr = KERN_SUCCESS;
1183 	uuid_t shared_region_uuid;
1184 	_Static_assert(sizeof(shared_region_uuid) == sizeof(task->task_shared_region_uuid),
1185 	    "sizeof task_shared_region_uuid != sizeof uuid_t");
1186 	task_lock(task);
1187 	if (task->task_shared_region_slide == -1) {
1188 		assert(vm_map_is_sealed(vm_shared_region_vm_map(shared_region)));
1189 		kr = vm_shared_region_insert_submap(task->map, shared_region, true);
1190 		if (kr == KERN_SUCCESS) {
1191 			task->task_shared_region_slide = shared_region->sr_slide;
1192 			/*
1193 			 * Drop the task lock to avoid potential deadlock if copyin() faults.
1194 			 * With the lock dropped, another thread in the task could theoretically
1195 			 * call this function, observe task_shared_region_slide != -1, and
1196 			 * return before the UUID has been copied to the task, but in practice
1197 			 * dyld should only issue the shared_region_check_np() syscall that ends
1198 			 * up invoking this function exactly once, and while the task is still
1199 			 * single-threaded at that.
1200 			 */
1201 			task_unlock(task);
1202 			/*
1203 			 * Now that shared region is accessible in the task's address space,
1204 			 * copyin the UUID for debugging/telemetry purposes.
1205 			 * copyin had better succeed here.  We've already inserted the submap,
1206 			 * which can't be undone or re-done later.  If the shared region header
1207 			 * isn't accessible at this point, we have big problems.
1208 			 */
1209 			const uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1210 			if (copyin((user_addr_t)(start_address + sc_header_uuid_offset),
1211 			    (char *)&shared_region_uuid, sizeof(shared_region_uuid)) != 0) {
1212 				SHARED_REGION_TRACE_ERROR(
1213 					("shared_region: update_task(%p) copyin failed\n",
1214 					(void *)VM_KERNEL_ADDRPERM(shared_region)));
1215 			}
1216 			task_lock(task);
1217 			memcpy(&task->task_shared_region_uuid, shared_region_uuid, sizeof(shared_region_uuid));
1218 		}
1219 	}
1220 
1221 	task_unlock(task);
1222 	return kr;
1223 }
1224 
1225 /*
1226  * Look up a pre-existing mapping in shared region, for replacement.
1227  * Takes an extra object reference if found.
1228  */
1229 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1230 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1231 {
1232 	vm_map_entry_t found;
1233 
1234 	vmlp_api_start(FIND_MAPPING_TO_SLIDE);
1235 
1236 	/* find the shared region's map entry to slide */
1237 	vm_map_lock_read(map);
1238 	if (!vm_map_lookup_entry(map, addr, &found)) {
1239 		/* no mapping there */
1240 		vm_map_unlock(map);
1241 		vmlp_api_end(FIND_MAPPING_TO_SLIDE, KERN_INVALID_ARGUMENT);
1242 		return KERN_INVALID_ARGUMENT;
1243 	}
1244 
1245 	*entry = *found;
1246 
1247 	vmlp_range_event_entry(map, entry);
1248 
1249 	/* extra ref to keep object alive while map is unlocked */
1250 	vm_object_reference(VME_OBJECT(found));
1251 	vm_map_unlock_read(map);
1252 	vmlp_api_end(FIND_MAPPING_TO_SLIDE, KERN_SUCCESS);
1253 	return KERN_SUCCESS;
1254 }
1255 
1256 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1257 shared_region_make_permanent(
1258 	vm_shared_region_t sr,
1259 	vm_prot_t max_prot)
1260 {
1261 	if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1262 		return false;
1263 	}
1264 	if (max_prot & VM_PROT_WRITE) {
1265 		/*
1266 		 * Potentially writable mapping: no major issue with allowing
1267 		 * it to be replaced since its contents could be modified
1268 		 * anyway.
1269 		 */
1270 		return false;
1271 	}
1272 	if (max_prot & VM_PROT_EXECUTE) {
1273 		/*
1274 		 * Potentially executable mapping: some software might want
1275 		 * to try and replace it to interpose their own code when a
1276 		 * given routine is called or returns, for example.
1277 		 * So let's not make it "permanent".
1278 		 */
1279 		return false;
1280 	}
1281 	/*
1282 	 * Make this mapping "permanent" to prevent it from being deleted
1283 	 * and/or replaced with another mapping.
1284 	 */
1285 	return true;
1286 }
1287 
1288 static bool
shared_region_tpro_protect(vm_shared_region_t sr,vm_prot_t max_prot __unused)1289 shared_region_tpro_protect(
1290 	vm_shared_region_t sr,
1291 	vm_prot_t max_prot __unused)
1292 {
1293 	if (sr->sr_cpu_type != CPU_TYPE_ARM64) {
1294 		return false;
1295 	}
1296 
1297 
1298 	/*
1299 	 * Unless otherwise explicitly requested all other mappings do not get
1300 	 * TPRO protection.
1301 	 */
1302 	return false;
1303 }
1304 
1305 #if __has_feature(ptrauth_calls)
1306 
1307 /*
1308  * Determine if this task is actually using pointer signing.
1309  */
1310 static boolean_t
task_sign_pointers(task_t task)1311 task_sign_pointers(task_t task)
1312 {
1313 	if (task->map &&
1314 	    task->map->pmap &&
1315 	    !task->map->pmap->disable_jop) {
1316 		return TRUE;
1317 	}
1318 	return FALSE;
1319 }
1320 
1321 /*
1322  * If the shared region contains mappings that are authenticated, then
1323  * remap them into the task private map.
1324  *
1325  * Failures are possible in this routine when jetsam kills a process
1326  * just as dyld is trying to set it up. The vm_map and task shared region
1327  * info get torn down w/o waiting for this thread to finish up.
1328  */
1329 __attribute__((noinline))
1330 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1331 vm_shared_region_auth_remap(vm_shared_region_t sr)
1332 {
1333 	memory_object_t               sr_pager = MEMORY_OBJECT_NULL;
1334 	task_t                        task = current_task();
1335 	vm_shared_region_slide_info_t si;
1336 	uint_t                        i;
1337 	vm_object_t                   object;
1338 	vm_map_t                      sr_map;
1339 	struct vm_map_entry           tmp_entry_store = {0};
1340 	vm_map_entry_t                tmp_entry = NULL;
1341 	vm_map_kernel_flags_t         vmk_flags;
1342 	vm_map_offset_t               map_addr;
1343 	kern_return_t                 kr = KERN_SUCCESS;
1344 	boolean_t                     use_ptr_auth = task_sign_pointers(task);
1345 
1346 	/*
1347 	 * Taking the full shared region lock here shouldn't be necessary for
1348 	 * functional correctness here, so we could potentially gain some scalability
1349 	 * by only taking the task lock here which would avoid the possibility of
1350 	 * serializing multiple tasks at the auth_remap step.  But shared_region_pager_match()
1351 	 * is slightly racy and can produce duplicate pagers without shared-region-wide
1352 	 * synchronization, which is a potential memory footprint issue.
1353 	 */
1354 	vm_shared_region_acquire(sr);
1355 
1356 	/* Just return if already done. */
1357 	if (task->shared_region_auth_remapped) {
1358 		vm_shared_region_release(sr);
1359 		return KERN_SUCCESS;
1360 	}
1361 
1362 	/*
1363 	 * Remap any sections with pointer authentications into the private map.
1364 	 */
1365 	for (i = 0; i < sr->sr_num_auth_section; ++i) {
1366 		si = sr->sr_auth_section[i];
1367 		assert(si != NULL);
1368 		assert(si->si_ptrauth);
1369 
1370 		/*
1371 		 * We have mapping that needs to be private.
1372 		 * Look for an existing slid mapping's pager with matching
1373 		 * object, offset, slide info and shared_region_id to reuse.
1374 		 */
1375 		object = si->si_slide_object;
1376 		sr_pager = shared_region_pager_match(object, si->si_start, si,
1377 		    use_ptr_auth ? task->jop_pid : 0);
1378 		if (sr_pager == MEMORY_OBJECT_NULL) {
1379 			printf("%s(): shared_region_pager_match() failed\n", __func__);
1380 			kr = KERN_FAILURE;
1381 			goto done;
1382 		}
1383 
1384 		/*
1385 		 * verify matching jop_pid for this task and this pager
1386 		 */
1387 		if (use_ptr_auth) {
1388 			shared_region_pager_match_task_key(sr_pager, task);
1389 		}
1390 
1391 		sr_map = vm_shared_region_vm_map(sr);
1392 		tmp_entry = NULL;
1393 
1394 		kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1395 		if (kr != KERN_SUCCESS) {
1396 			printf("%s(): find_mapping_to_slide() failed\n", __func__);
1397 			goto done;
1398 		}
1399 		tmp_entry = &tmp_entry_store;
1400 
1401 		/*
1402 		 * Check that the object exactly covers the region to slide.
1403 		 */
1404 		if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1405 			printf("%s(): doesn't fully cover\n", __func__);
1406 			kr = KERN_FAILURE;
1407 			goto done;
1408 		}
1409 
1410 		/*
1411 		 * map the pager over the portion of the mapping that needs sliding
1412 		 */
1413 		vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1414 		vmk_flags.vmkf_overwrite_immutable = true;
1415 		vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1416 		    tmp_entry->max_protection);
1417 
1418 		/* Preserve the TPRO flag if task has TPRO enabled */
1419 		vmk_flags.vmf_tpro = (vm_map_tpro(task->map) &&
1420 		    tmp_entry->used_for_tpro &&
1421 		    task_has_tpro(task));
1422 
1423 		map_addr = si->si_slid_address;
1424 		kr = mach_vm_map_kernel(task->map,
1425 		    vm_sanitize_wrap_addr_ref(&map_addr),
1426 		    si->si_end - si->si_start,
1427 		    0,
1428 		    vmk_flags,
1429 		    (ipc_port_t)(uintptr_t) sr_pager,
1430 		    0,
1431 		    TRUE,
1432 		    tmp_entry->protection,
1433 		    tmp_entry->max_protection,
1434 		    tmp_entry->inheritance);
1435 		memory_object_deallocate(sr_pager);
1436 		sr_pager = MEMORY_OBJECT_NULL;
1437 		if (kr != KERN_SUCCESS) {
1438 			printf("%s(): mach_vm_map_kernel() failed\n", __func__);
1439 			goto done;
1440 		}
1441 		assertf(map_addr == si->si_slid_address,
1442 		    "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1443 		    (uint64_t)map_addr,
1444 		    (uint64_t)si->si_slid_address,
1445 		    tmp_entry);
1446 
1447 		/* Drop the ref count grabbed by find_mapping_to_slide */
1448 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1449 		tmp_entry = NULL;
1450 	}
1451 
1452 done:
1453 	if (tmp_entry) {
1454 		/* Drop the ref count grabbed by find_mapping_to_slide */
1455 		vm_object_deallocate(VME_OBJECT(tmp_entry));
1456 		tmp_entry = NULL;
1457 	}
1458 
1459 	/*
1460 	 * Drop any extra reference to the pager in case we're quitting due to an error above.
1461 	 */
1462 	if (sr_pager != MEMORY_OBJECT_NULL) {
1463 		memory_object_deallocate(sr_pager);
1464 	}
1465 
1466 	/*
1467 	 * Mark the region as having it's auth sections remapped.
1468 	 */
1469 	task->shared_region_auth_remapped = TRUE;
1470 	vm_shared_region_release(sr);
1471 	return kr;
1472 }
1473 #endif /* __has_feature(ptrauth_calls) */
1474 
1475 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1476 vm_shared_region_undo_mappings(
1477 	vm_map_t                 sr_map,
1478 	mach_vm_offset_t         sr_base_address,
1479 	struct _sr_file_mappings *srf_mappings,
1480 	struct _sr_file_mappings *srf_mappings_current,
1481 	unsigned int             srf_current_mappings_count)
1482 {
1483 	unsigned int             j = 0;
1484 	vm_shared_region_t       shared_region = NULL;
1485 	struct _sr_file_mappings *srfmp;
1486 	unsigned int             mappings_count;
1487 	struct shared_file_mapping_slide_np *mappings;
1488 
1489 	shared_region = vm_shared_region_get(current_task());
1490 	if (shared_region == NULL) {
1491 		printf("Failed to undo mappings because of NULL shared region.\n");
1492 		return;
1493 	}
1494 
1495 	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1496 
1497 	if (sr_map == NULL) {
1498 		ipc_port_t              sr_handle;
1499 		vm_named_entry_t        sr_mem_entry;
1500 
1501 		/* no need to lock because this data is never modified... */
1502 		sr_handle = shared_region->sr_mem_entry;
1503 		sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1504 		sr_map = sr_mem_entry->backing.map;
1505 		sr_base_address = shared_region->sr_base_address;
1506 	}
1507 	/*
1508 	 * Undo the mappings we've established so far.
1509 	 */
1510 	for (srfmp = &srf_mappings[0];
1511 	    srfmp <= srf_mappings_current;
1512 	    srfmp++) {
1513 		mappings = srfmp->mappings;
1514 		mappings_count = srfmp->mappings_count;
1515 		if (srfmp == srf_mappings_current) {
1516 			mappings_count = srf_current_mappings_count;
1517 		}
1518 
1519 		for (j = 0; j < mappings_count; j++) {
1520 			kern_return_t kr2;
1521 			mach_vm_offset_t start, end;
1522 
1523 			if (mappings[j].sms_size == 0) {
1524 				/*
1525 				 * We didn't establish this
1526 				 * mapping, so nothing to undo.
1527 				 */
1528 				continue;
1529 			}
1530 			SHARED_REGION_TRACE_INFO(
1531 				("shared_region: mapping[%d]: "
1532 				"address:0x%016llx "
1533 				"size:0x%016llx "
1534 				"offset:0x%016llx "
1535 				"maxprot:0x%x prot:0x%x: "
1536 				"undoing...\n",
1537 				j,
1538 				(long long)mappings[j].sms_address,
1539 				(long long)mappings[j].sms_size,
1540 				(long long)mappings[j].sms_file_offset,
1541 				mappings[j].sms_max_prot,
1542 				mappings[j].sms_init_prot));
1543 			start = (mappings[j].sms_address - sr_base_address);
1544 			end = start + mappings[j].sms_size;
1545 			start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1546 			end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1547 			kr2 = vm_map_remove_guard(sr_map,
1548 			    start,
1549 			    end,
1550 			    VM_MAP_REMOVE_IMMUTABLE,
1551 			    KMEM_GUARD_NONE).kmr_return;
1552 			assert(kr2 == KERN_SUCCESS);
1553 		}
1554 	}
1555 
1556 	vm_shared_region_deallocate(shared_region);
1557 }
1558 
1559 /*
1560  * First part of vm_shared_region_map_file(). Split out to
1561  * avoid kernel stack overflow.
1562  */
1563 __attribute__((noinline))
1564 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1565 vm_shared_region_map_file_setup(
1566 	vm_shared_region_t              shared_region,
1567 	int                             sr_file_mappings_count,
1568 	struct _sr_file_mappings        *sr_file_mappings,
1569 	unsigned int                    *mappings_to_slide_cnt,
1570 	struct shared_file_mapping_slide_np **mappings_to_slide,
1571 	mach_vm_offset_t                *slid_mappings,
1572 	memory_object_control_t         *slid_file_controls,
1573 	mach_vm_offset_t                *sfm_min_address,
1574 	mach_vm_offset_t                *sfm_max_address,
1575 	vm_map_t                        *sr_map_ptr,
1576 	vm_map_offset_t                 *lowest_unnestable_addr_ptr,
1577 	unsigned int                    vmsr_num_slides)
1578 {
1579 	kern_return_t           kr = KERN_SUCCESS;
1580 	memory_object_control_t file_control;
1581 	vm_object_t             file_object;
1582 	ipc_port_t              sr_handle;
1583 	vm_named_entry_t        sr_mem_entry;
1584 	vm_map_t                sr_map;
1585 	mach_vm_offset_t        sr_base_address;
1586 	unsigned int            i = 0;
1587 	mach_port_t             map_port;
1588 	vm_map_offset_t         target_address;
1589 	vm_object_t             object;
1590 	vm_object_size_t        obj_size;
1591 	vm_map_offset_t         lowest_unnestable_addr = 0;
1592 	vm_map_kernel_flags_t   vmk_flags;
1593 	mach_vm_offset_t        sfm_end;
1594 	uint32_t                mappings_count;
1595 	struct shared_file_mapping_slide_np *mappings;
1596 	struct _sr_file_mappings *srfmp;
1597 
1598 	assert(shared_region->sr_mapping_in_progress == current_thread());
1599 
1600 	/* no need to lock because this data is never modified... */
1601 	sr_handle = shared_region->sr_mem_entry;
1602 	sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1603 	sr_map = sr_mem_entry->backing.map;
1604 	sr_base_address = shared_region->sr_base_address;
1605 
1606 	SHARED_REGION_TRACE_DEBUG(
1607 		("shared_region: -> map(%p)\n",
1608 		(void *)VM_KERNEL_ADDRPERM(shared_region)));
1609 
1610 	mappings_count = 0;
1611 	mappings = NULL;
1612 	srfmp = NULL;
1613 
1614 	/* process all the files to be mapped */
1615 	for (srfmp = &sr_file_mappings[0];
1616 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
1617 	    srfmp++) {
1618 		i = 0; /* reset i early because it's used in the error recovery path */
1619 		mappings_count = srfmp->mappings_count;
1620 		mappings = srfmp->mappings;
1621 		file_control = srfmp->file_control;
1622 
1623 		if (mappings_count == 0) {
1624 			/* no mappings here... */
1625 			continue;
1626 		}
1627 
1628 		/*
1629 		 * The code below can only correctly "slide" (perform relocations) for one
1630 		 * value of the slide amount. So if a file has a non-zero slide, it has to
1631 		 * match any previous value. A zero slide value is ok for things that are
1632 		 * just directly mapped.
1633 		 */
1634 		if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1635 			shared_region->sr_slide = srfmp->slide;
1636 		} else if (shared_region->sr_slide != 0 &&
1637 		    srfmp->slide != 0 &&
1638 		    shared_region->sr_slide != srfmp->slide) {
1639 			SHARED_REGION_TRACE_ERROR(
1640 				("shared_region: more than 1 non-zero slide value amount "
1641 				"slide 1:0x%x slide 2:0x%x\n ",
1642 				shared_region->sr_slide, srfmp->slide));
1643 			kr = KERN_INVALID_ARGUMENT;
1644 			break;
1645 		}
1646 
1647 		/*
1648 		 * An FD of -1 means we need to copyin the data to an anonymous object.
1649 		 */
1650 		if (srfmp->fd == -1) {
1651 			assert(mappings_count == 1);
1652 			SHARED_REGION_TRACE_INFO(
1653 				("shared_region: mapping[0]: "
1654 				"address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1655 				"maxprot:0x%x prot:0x%x fd==-1\n",
1656 				(long long)mappings[0].sms_address,
1657 				(long long)mappings[0].sms_size,
1658 				(long long)mappings[0].sms_file_offset,
1659 				mappings[0].sms_max_prot,
1660 				mappings[0].sms_init_prot));
1661 
1662 			/*
1663 			 * We need an anon object to hold the data in the shared region.
1664 			 * The size needs to be suitable to map into kernel.
1665 			 */
1666 			obj_size = vm_object_round_page(mappings->sms_size);
1667 			object = vm_object_allocate(obj_size, kernel_map->serial_id);
1668 			if (object == VM_OBJECT_NULL) {
1669 				printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1670 				kr = KERN_RESOURCE_SHORTAGE;
1671 				break;
1672 			}
1673 
1674 			/*
1675 			 * map the object into the kernel
1676 			 */
1677 			vm_map_offset_t kaddr = 0;
1678 			vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1679 			vmk_flags.vmkf_no_copy_on_read = 1;
1680 			vmk_flags.vmkf_range_id = kmem_needs_data_share_range() ?
1681 			    KMEM_RANGE_ID_DATA_SHARED : KMEM_RANGE_ID_DATA;
1682 
1683 			kr = vm_map_enter(kernel_map,
1684 			    &kaddr,
1685 			    obj_size,
1686 			    0,
1687 			    vmk_flags,
1688 			    object,
1689 			    0,
1690 			    FALSE,
1691 			    (VM_PROT_READ | VM_PROT_WRITE),
1692 			    (VM_PROT_READ | VM_PROT_WRITE),
1693 			    VM_INHERIT_NONE);
1694 			if (kr != KERN_SUCCESS) {
1695 				printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1696 				vm_object_deallocate(object);
1697 				object = VM_OBJECT_NULL;
1698 				break;
1699 			}
1700 
1701 			/*
1702 			 * We'll need another reference to keep the object alive after
1703 			 * we vm_map_remove() it from the kernel.
1704 			 */
1705 			vm_object_reference(object);
1706 
1707 			/*
1708 			 * Zero out the object's pages, so we can't leak data.
1709 			 */
1710 			bzero((void *)kaddr, obj_size);
1711 
1712 			/*
1713 			 * Copyin the data from dyld to the new object.
1714 			 * Then remove the kernel mapping.
1715 			 */
1716 			int copyin_err =
1717 			    copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1718 			vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1719 			if (copyin_err) {
1720 				printf("%s(): for fd==-1 copyin(%p) failed, errno=%d\n", __func__, (void*)mappings->sms_file_offset, copyin_err);
1721 				switch (copyin_err) {
1722 				case EPERM:
1723 				case EACCES:
1724 					kr = KERN_PROTECTION_FAILURE;
1725 					break;
1726 				case EFAULT:
1727 					kr = KERN_INVALID_ADDRESS;
1728 					break;
1729 				default:
1730 					kr = KERN_FAILURE;
1731 					break;
1732 				}
1733 				vm_object_deallocate(object);
1734 				object = VM_OBJECT_NULL;
1735 				break;
1736 			}
1737 
1738 			/*
1739 			 * Finally map the object into the shared region.
1740 			 */
1741 			target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1742 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1743 			vmk_flags.vmkf_already = TRUE;
1744 			vmk_flags.vmkf_no_copy_on_read = 1;
1745 			vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1746 			    mappings[0].sms_max_prot);
1747 
1748 			kr = vm_map_enter(
1749 				sr_map,
1750 				&target_address,
1751 				vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1752 				0,
1753 				vmk_flags,
1754 				object,
1755 				0,
1756 				TRUE,
1757 				mappings[0].sms_init_prot & VM_PROT_ALL,
1758 				mappings[0].sms_max_prot & VM_PROT_ALL,
1759 				VM_INHERIT_DEFAULT);
1760 			if (kr != KERN_SUCCESS) {
1761 				printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1762 				vm_object_deallocate(object);
1763 				break;
1764 			}
1765 
1766 			if (mappings[0].sms_address < *sfm_min_address) {
1767 				*sfm_min_address = mappings[0].sms_address;
1768 			}
1769 
1770 			if (os_add_overflow(mappings[0].sms_address,
1771 			    mappings[0].sms_size,
1772 			    &sfm_end) ||
1773 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1774 			    mappings[0].sms_address)) {
1775 				/* overflow */
1776 				kr = KERN_INVALID_ARGUMENT;
1777 				break;
1778 			}
1779 
1780 			if (sfm_end > *sfm_max_address) {
1781 				*sfm_max_address = sfm_end;
1782 			}
1783 
1784 			continue;
1785 		}
1786 
1787 		/* get the VM object associated with the file to be mapped */
1788 		file_object = memory_object_control_to_vm_object(file_control);
1789 		assert(file_object);
1790 
1791 		if (!file_object->object_is_shared_cache) {
1792 			vm_object_lock(file_object);
1793 			file_object->object_is_shared_cache = true;
1794 			vm_object_unlock(file_object);
1795 		}
1796 
1797 #if CONFIG_SECLUDED_MEMORY
1798 		/*
1799 		 * Camera will need the shared cache, so don't put the pages
1800 		 * on the secluded queue, assume that's the primary region.
1801 		 * Also keep DEXT shared cache pages off secluded.
1802 		 */
1803 		if (primary_system_shared_region == NULL ||
1804 		    primary_system_shared_region == shared_region ||
1805 		    shared_region->sr_driverkit) {
1806 			memory_object_mark_eligible_for_secluded(file_control, FALSE);
1807 		}
1808 #endif /* CONFIG_SECLUDED_MEMORY */
1809 
1810 		/* establish the mappings for that file */
1811 		for (i = 0; i < mappings_count; i++) {
1812 			SHARED_REGION_TRACE_INFO(
1813 				("shared_region: mapping[%d]: "
1814 				"address:0x%016llx size:0x%016llx offset:0x%016llx "
1815 				"maxprot:0x%x prot:0x%x\n",
1816 				i,
1817 				(long long)mappings[i].sms_address,
1818 				(long long)mappings[i].sms_size,
1819 				(long long)mappings[i].sms_file_offset,
1820 				mappings[i].sms_max_prot,
1821 				mappings[i].sms_init_prot));
1822 
1823 			if (mappings[i].sms_address < *sfm_min_address) {
1824 				*sfm_min_address = mappings[i].sms_address;
1825 			}
1826 
1827 			if (os_add_overflow(mappings[i].sms_address,
1828 			    mappings[i].sms_size,
1829 			    &sfm_end) ||
1830 			    (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1831 			    mappings[i].sms_address)) {
1832 				/* overflow */
1833 				kr = KERN_INVALID_ARGUMENT;
1834 				break;
1835 			}
1836 
1837 			if (sfm_end > *sfm_max_address) {
1838 				*sfm_max_address = sfm_end;
1839 			}
1840 
1841 			if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1842 				/* zero-filled memory */
1843 				map_port = MACH_PORT_NULL;
1844 			} else {
1845 				/* file-backed memory */
1846 				__IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1847 			}
1848 
1849 			/*
1850 			 * Remember which mappings need sliding.
1851 			 */
1852 			if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1853 				if (*mappings_to_slide_cnt == vmsr_num_slides) {
1854 					SHARED_REGION_TRACE_INFO(
1855 						("shared_region: mapping[%d]: "
1856 						"address:0x%016llx size:0x%016llx "
1857 						"offset:0x%016llx "
1858 						"maxprot:0x%x prot:0x%x "
1859 						"too many mappings to slide...\n",
1860 						i,
1861 						(long long)mappings[i].sms_address,
1862 						(long long)mappings[i].sms_size,
1863 						(long long)mappings[i].sms_file_offset,
1864 						mappings[i].sms_max_prot,
1865 						mappings[i].sms_init_prot));
1866 				} else {
1867 					mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1868 					*mappings_to_slide_cnt += 1;
1869 				}
1870 			}
1871 
1872 			/* mapping's address is relative to the shared region base */
1873 			if (__improbable(
1874 				    os_sub_overflow(
1875 					    mappings[i].sms_address,
1876 					    sr_base_address,
1877 					    &target_address))) {
1878 				kr = KERN_INVALID_ARGUMENT;
1879 				break;
1880 			}
1881 
1882 			vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1883 			vmk_flags.vmkf_already = TRUE;
1884 			/* no copy-on-read for mapped binaries */
1885 			vmk_flags.vmkf_no_copy_on_read = 1;
1886 			vmk_flags.vmf_permanent = shared_region_make_permanent(
1887 				shared_region,
1888 				mappings[i].sms_max_prot);
1889 			vmk_flags.vmf_tpro = shared_region_tpro_protect(
1890 				shared_region,
1891 				mappings[i].sms_max_prot);
1892 
1893 			/* establish that mapping, OK if it's "already" there */
1894 			if (map_port == MACH_PORT_NULL) {
1895 				/*
1896 				 * We want to map some anonymous memory in a shared region.
1897 				 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1898 				 */
1899 				obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1900 				object = vm_object_allocate(obj_size, sr_map->serial_id);
1901 				if (object == VM_OBJECT_NULL) {
1902 					kr = KERN_RESOURCE_SHORTAGE;
1903 				} else {
1904 					kr = vm_map_enter(
1905 						sr_map,
1906 						&target_address,
1907 						vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1908 						0,
1909 						vmk_flags,
1910 						object,
1911 						0,
1912 						TRUE,
1913 						mappings[i].sms_init_prot & VM_PROT_ALL,
1914 						mappings[i].sms_max_prot & VM_PROT_ALL,
1915 						VM_INHERIT_DEFAULT);
1916 				}
1917 			} else {
1918 				object = VM_OBJECT_NULL; /* no anonymous memory here */
1919 				kr = mach_vm_map_kernel(
1920 					sr_map,
1921 					vm_sanitize_wrap_addr_ref(&target_address),
1922 					vm_map_round_page(
1923 						mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1924 					0,
1925 					vmk_flags,
1926 					map_port,
1927 					mappings[i].sms_file_offset,
1928 					TRUE,
1929 					mappings[i].sms_init_prot & VM_PROT_ALL,
1930 					mappings[i].sms_max_prot & VM_PROT_ALL,
1931 					VM_INHERIT_DEFAULT);
1932 			}
1933 
1934 			if (kr == KERN_SUCCESS) {
1935 				/*
1936 				 * Record the first successful mapping(s) in the shared
1937 				 * region by file. We're protected by "sr_mapping_in_progress"
1938 				 * here, so no need to lock "shared_region".
1939 				 *
1940 				 * Note that if we have an AOT shared cache (ARM) for a
1941 				 * translated task, then it's always the first file.
1942 				 * The original "native" (i.e. x86) shared cache is the
1943 				 * second file.
1944 				 */
1945 
1946 				if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1947 					shared_region->sr_first_mapping = target_address;
1948 				}
1949 
1950 				if (*mappings_to_slide_cnt > 0 &&
1951 				    mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1952 					slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1953 					slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1954 				}
1955 
1956 				/*
1957 				 * Record the lowest writable address in this
1958 				 * sub map, to log any unexpected unnesting below
1959 				 * that address (see log_unnest_badness()).
1960 				 */
1961 				if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1962 				    sr_map->is_nested_map &&
1963 				    (lowest_unnestable_addr == 0 ||
1964 				    (target_address < lowest_unnestable_addr))) {
1965 					lowest_unnestable_addr = target_address;
1966 				}
1967 			} else {
1968 				if (map_port == MACH_PORT_NULL) {
1969 					/*
1970 					 * Get rid of the VM object we just created
1971 					 * but failed to map.
1972 					 */
1973 					vm_object_deallocate(object);
1974 					object = VM_OBJECT_NULL;
1975 				}
1976 				if (kr == KERN_MEMORY_PRESENT) {
1977 					/*
1978 					 * This exact mapping was already there:
1979 					 * that's fine.
1980 					 */
1981 					SHARED_REGION_TRACE_INFO(
1982 						("shared_region: mapping[%d]: "
1983 						"address:0x%016llx size:0x%016llx "
1984 						"offset:0x%016llx "
1985 						"maxprot:0x%x prot:0x%x "
1986 						"already mapped...\n",
1987 						i,
1988 						(long long)mappings[i].sms_address,
1989 						(long long)mappings[i].sms_size,
1990 						(long long)mappings[i].sms_file_offset,
1991 						mappings[i].sms_max_prot,
1992 						mappings[i].sms_init_prot));
1993 					/*
1994 					 * We didn't establish this mapping ourselves;
1995 					 * let's reset its size, so that we do not
1996 					 * attempt to undo it if an error occurs later.
1997 					 */
1998 					mappings[i].sms_size = 0;
1999 					kr = KERN_SUCCESS;
2000 				} else {
2001 					break;
2002 				}
2003 			}
2004 		}
2005 
2006 		if (kr != KERN_SUCCESS) {
2007 			break;
2008 		}
2009 	}
2010 
2011 	if (kr != KERN_SUCCESS) {
2012 		/* the last mapping we tried (mappings[i]) failed ! */
2013 		assert(i < mappings_count);
2014 		SHARED_REGION_TRACE_ERROR(
2015 			("shared_region: mapping[%d]: "
2016 			"address:0x%016llx size:0x%016llx "
2017 			"offset:0x%016llx "
2018 			"maxprot:0x%x prot:0x%x failed 0x%x\n",
2019 			i,
2020 			(long long)mappings[i].sms_address,
2021 			(long long)mappings[i].sms_size,
2022 			(long long)mappings[i].sms_file_offset,
2023 			mappings[i].sms_max_prot,
2024 			mappings[i].sms_init_prot,
2025 			kr));
2026 
2027 		/*
2028 		 * Respect the design of vm_shared_region_undo_mappings
2029 		 * as we are holding the sr_mapping_in_progress here.
2030 		 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
2031 		 * will be blocked at waiting sr_mapping_in_progress to be NULL.
2032 		 */
2033 		assert(sr_map != NULL);
2034 		/* undo all the previous mappings */
2035 		vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
2036 		return kr;
2037 	}
2038 
2039 	*lowest_unnestable_addr_ptr = lowest_unnestable_addr;
2040 	*sr_map_ptr = sr_map;
2041 	return KERN_SUCCESS;
2042 }
2043 
2044 /* forwared declaration */
2045 __attribute__((noinline))
2046 static void
2047 vm_shared_region_map_file_final(
2048 	vm_shared_region_t shared_region,
2049 	vm_map_t           sr_map,
2050 	mach_vm_offset_t   sfm_min_address,
2051 	mach_vm_offset_t   sfm_max_address);
2052 
2053 /*
2054  * Establish some mappings of a file in the shared region.
2055  * This is used by "dyld" via the shared_region_map_np() system call
2056  * to populate the shared region with the appropriate shared cache.
2057  *
2058  * One could also call it several times to incrementally load several
2059  * libraries, as long as they do not overlap.
2060  * It will return KERN_SUCCESS if the mappings were successfully established
2061  * or if they were already established identically by another process.
2062  */
2063 __attribute__((noinline))
2064 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)2065 vm_shared_region_map_file(
2066 	vm_shared_region_t       shared_region,
2067 	int                      sr_file_mappings_count,
2068 	struct _sr_file_mappings *sr_file_mappings)
2069 {
2070 	kern_return_t           kr = KERN_SUCCESS;
2071 	unsigned int            i;
2072 	unsigned int            mappings_to_slide_cnt = 0;
2073 	mach_vm_offset_t        sfm_min_address = (mach_vm_offset_t)-1;
2074 	mach_vm_offset_t        sfm_max_address = 0;
2075 	vm_map_t                sr_map = NULL;
2076 	vm_map_offset_t         lowest_unnestable_addr = 0;
2077 	unsigned int            vmsr_num_slides = 0;
2078 	typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
2079 	slid_mappings_t         *slid_mappings = NULL;                  /* [0..vmsr_num_slides] */
2080 	memory_object_control_t *slid_file_controls = NULL;             /* [0..vmsr_num_slides] */
2081 	struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
2082 	struct _sr_file_mappings *srfmp;
2083 	vm_map_switch_context_t switch_ctx;
2084 	bool                    map_switched = false;
2085 
2086 	vmlp_api_start(VM_SHARED_REGION_MAP_FILE);
2087 
2088 	/*
2089 	 * Figure out how many of the mappings have slides.
2090 	 */
2091 	for (srfmp = &sr_file_mappings[0];
2092 	    srfmp < &sr_file_mappings[sr_file_mappings_count];
2093 	    srfmp++) {
2094 		for (i = 0; i < srfmp->mappings_count; ++i) {
2095 			if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
2096 				++vmsr_num_slides;
2097 			}
2098 		}
2099 	}
2100 
2101 	/* Allocate per slide data structures */
2102 	if (vmsr_num_slides > 0) {
2103 		slid_mappings =
2104 		    kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
2105 		slid_file_controls =
2106 		    kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
2107 		mappings_to_slide =
2108 		    kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
2109 	}
2110 
2111 	vm_shared_region_acquire(shared_region);
2112 
2113 	/*
2114 	 * Did someone race in and map this shared region already, or did an earlier mapping fail?
2115 	 */
2116 	if (shared_region->sr_first_mapping != -1) {
2117 #if DEVELOPMENT || DEBUG
2118 		printf("shared_region: caught race in map and slide\n");
2119 #endif /* DEVELOPMENT || DEBUG */
2120 		kr = KERN_FAILURE;
2121 		goto done;
2122 	}
2123 
2124 	kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
2125 	    &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
2126 	    &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
2127 	if (kr != KERN_SUCCESS) {
2128 		goto done;
2129 	}
2130 	assert(vmsr_num_slides == mappings_to_slide_cnt);
2131 
2132 	assert(shared_region->sr_config_map != NULL);
2133 	switch_ctx = vm_map_switch_to(shared_region->sr_config_map);
2134 	map_switched = true;
2135 
2136 	/*
2137 	 * The call above installed direct mappings to the shared cache file.
2138 	 * Now we go back and overwrite the mappings that need relocation
2139 	 * with a special shared region pager.
2140 	 *
2141 	 * Note that this does copyin() of data, needed by the pager, which
2142 	 * the previous code just established mappings for. This is why we
2143 	 * do it in a separate pass.
2144 	 */
2145 #if __has_feature(ptrauth_calls)
2146 	/*
2147 	 * need to allocate storage needed for any sr_auth_sections
2148 	 */
2149 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2150 		if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2151 		    shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2152 		    !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2153 			++shared_region->sr_num_auth_section;
2154 		}
2155 	}
2156 	if (shared_region->sr_num_auth_section > 0) {
2157 		shared_region->sr_auth_section =
2158 		    kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2159 		    Z_WAITOK | Z_ZERO);
2160 	}
2161 #endif /* __has_feature(ptrauth_calls) */
2162 	for (i = 0; i < mappings_to_slide_cnt; ++i) {
2163 		kr = vm_shared_region_slide(shared_region->sr_slide,
2164 		    mappings_to_slide[i]->sms_file_offset,
2165 		    mappings_to_slide[i]->sms_size,
2166 		    mappings_to_slide[i]->sms_slide_start,
2167 		    mappings_to_slide[i]->sms_slide_size,
2168 		    slid_mappings[i],
2169 		    slid_file_controls[i],
2170 		    mappings_to_slide[i]->sms_max_prot);
2171 		if (kr != KERN_SUCCESS) {
2172 			SHARED_REGION_TRACE_ERROR(
2173 				("shared_region: region_slide("
2174 				"slide:0x%x start:0x%016llx "
2175 				"size:0x%016llx) failed 0x%x\n",
2176 				shared_region->sr_slide,
2177 				(long long)mappings_to_slide[i]->sms_slide_start,
2178 				(long long)mappings_to_slide[i]->sms_slide_size,
2179 				kr));
2180 			vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2181 			    &sr_file_mappings[0],
2182 			    &sr_file_mappings[sr_file_mappings_count - 1],
2183 			    sr_file_mappings_count);
2184 			goto done;
2185 		}
2186 	}
2187 
2188 	assert(kr == KERN_SUCCESS);
2189 
2190 	/* adjust the map's "lowest_unnestable_start" */
2191 	lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2192 	if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2193 		vm_map_lock(sr_map);
2194 		vmlp_range_event_none(sr_map);
2195 		sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2196 		vm_map_unlock(sr_map);
2197 	}
2198 
2199 	vm_shared_region_lock();
2200 	assert(shared_region->sr_ref_count > 0);
2201 	assert(shared_region->sr_mapping_in_progress == current_thread());
2202 
2203 	vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2204 	vm_shared_region_unlock();
2205 
2206 done:
2207 
2208 #ifndef NO_NESTED_PMAP
2209 	/*
2210 	 * If we succeeded, we know the bounds of the shared region.
2211 	 * Trim our pmaps to only cover this range (if applicable to
2212 	 * this platform).
2213 	 */
2214 	if (kr == KERN_SUCCESS) {
2215 		pmap_trim(shared_region->sr_config_map->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2216 	}
2217 #endif
2218 	if (map_switched) {
2219 		vm_map_switch_back(switch_ctx);
2220 	}
2221 
2222 	if (kr == KERN_SUCCESS) {
2223 		vm_map_deallocate(shared_region->sr_config_map);
2224 		shared_region->sr_config_map = VM_MAP_NULL;
2225 	}
2226 
2227 	if (kr == KERN_SUCCESS) {
2228 		vm_shared_region_seal(shared_region);
2229 	}
2230 	vm_shared_region_release(shared_region);
2231 
2232 	SHARED_REGION_TRACE_DEBUG(
2233 		("shared_region: map(%p) <- 0x%x \n",
2234 		(void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2235 	if (vmsr_num_slides > 0) {
2236 		kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2237 		kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2238 		kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2239 		    mappings_to_slide);
2240 	}
2241 	vmlp_api_end(VM_SHARED_REGION_MAP_FILE, kr);
2242 	return kr;
2243 }
2244 
2245 /*
2246  * Final part of vm_shared_region_map_file().
2247  * Kept in separate function to avoid blowing out the stack.
2248  */
2249 __attribute__((noinline))
2250 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2251 vm_shared_region_map_file_final(
2252 	vm_shared_region_t        shared_region,
2253 	vm_map_t                  sr_map __unused,
2254 	mach_vm_offset_t          sfm_min_address __unused,
2255 	mach_vm_offset_t          sfm_max_address __unused)
2256 {
2257 	struct _dyld_cache_header sr_cache_header;
2258 	int                       error;
2259 	size_t                    image_array_length;
2260 	struct _dyld_cache_image_text_info *sr_image_layout;
2261 	boolean_t                 locally_built = FALSE;
2262 
2263 
2264 	/*
2265 	 * copy in the shared region UUID to the shared region structure.
2266 	 * we do this indirectly by first copying in the shared cache header
2267 	 * and then copying the UUID from there because we'll need to look
2268 	 * at other content from the shared cache header.
2269 	 */
2270 	if (!shared_region->sr_uuid_copied) {
2271 		error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2272 		    (char *)&sr_cache_header,
2273 		    sizeof(sr_cache_header));
2274 		if (error == 0) {
2275 			memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2276 			shared_region->sr_uuid_copied = TRUE;
2277 			locally_built = sr_cache_header.locallyBuiltCache;
2278 		} else {
2279 #if DEVELOPMENT || DEBUG
2280 			panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2281 			    "offset:0 size:0x%016llx) failed with %d\n",
2282 			    (long long)shared_region->sr_base_address,
2283 			    (long long)shared_region->sr_first_mapping,
2284 			    (long long)sizeof(sr_cache_header),
2285 			    error);
2286 #endif /* DEVELOPMENT || DEBUG */
2287 			shared_region->sr_uuid_copied = FALSE;
2288 		}
2289 	}
2290 
2291 	/*
2292 	 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd.  This is used by
2293 	 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2294 	 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2295 	 * region.  In that case, launchd re-exec's itself, so we may go through this path multiple times.  We
2296 	 * let the most recent one win.
2297 	 *
2298 	 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2299 	 */
2300 	bool is_init_task = (task_pid(current_task()) == 1);
2301 	if (shared_region->sr_uuid_copied && is_init_task) {
2302 		/* Copy in the shared cache layout if we're running with a locally built shared cache */
2303 		if (locally_built) {
2304 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2305 			image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2306 			sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2307 			error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2308 			    sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2309 			if (error == 0) {
2310 				if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2311 					panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2312 				}
2313 				shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2314 				for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2315 					memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2316 					    sizeof(shared_region->sr_images[index].imageUUID));
2317 					shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2318 				}
2319 
2320 				shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2321 			} else {
2322 #if DEVELOPMENT || DEBUG
2323 				panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2324 				    "offset:0x%016llx size:0x%016llx) failed with %d\n",
2325 				    (long long)shared_region->sr_base_address,
2326 				    (long long)shared_region->sr_first_mapping,
2327 				    (long long)sr_cache_header.imagesTextOffset,
2328 				    (long long)image_array_length,
2329 				    error);
2330 #endif /* DEVELOPMENT || DEBUG */
2331 			}
2332 			KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2333 			kfree_data(sr_image_layout, image_array_length);
2334 			sr_image_layout = NULL;
2335 		}
2336 		primary_system_shared_region = shared_region;
2337 	}
2338 }
2339 
2340 /*
2341  * Insert the real shared region submap entry into a task's VM map over the placeholder
2342  * installed by vm_map_exec().  Note that this function can only be called once per vm_map,
2343  * and cannot be undone.  This is because it results in the shared region's pmap being nested
2344  * into [map]'s pmap; on some platforms the security model requires this nesting relationship
2345  * to be permanent, so the nested pmap cannot be "de-nested" from the top-level pmap or
2346  * "re-nested" again into the same top-level pmap.
2347  */
2348 kern_return_t
vm_shared_region_insert_submap(vm_map_t map,vm_shared_region_t shared_region,bool overwrite)2349 vm_shared_region_insert_submap(vm_map_t map, vm_shared_region_t shared_region, bool overwrite)
2350 {
2351 	vm_map_offset_t         sr_address, sr_offset, target_address;
2352 	vm_map_size_t           sr_size, mapping_size;
2353 	vm_map_offset_t         sr_pmap_nesting_start;
2354 	vm_map_size_t           sr_pmap_nesting_size;
2355 	ipc_port_t              sr_handle;
2356 	vm_prot_t               cur_prot, max_prot;
2357 	vm_map_kernel_flags_t   vmk_flags;
2358 
2359 	kern_return_t kr = KERN_SUCCESS;
2360 	/* no need to lock since this data is never modified */
2361 	sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2362 	sr_size = (vm_map_size_t)shared_region->sr_size;
2363 	sr_handle = shared_region->sr_mem_entry;
2364 	sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2365 	sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2366 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2367 	if (overwrite) {
2368 		vmk_flags.vmf_overwrite = true;
2369 		vmk_flags.vmkf_overwrite_immutable = true;
2370 	}
2371 
2372 	/*
2373 	 * vm_map_lookup_and_lock_object() expects the parent map entry
2374 	 * for a shared region submap to have protections r-- by default.
2375 	 */
2376 	cur_prot = VM_PROT_READ;
2377 	if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2378 		/*
2379 		 * XXX BINARY COMPATIBILITY
2380 		 * java6 apparently needs to modify some code in the
2381 		 * dyld shared cache and needs to be allowed to add
2382 		 * write access...
2383 		 */
2384 		max_prot = VM_PROT_ALL;
2385 	} else {
2386 		max_prot = VM_PROT_READ;
2387 		/* make it "permanent" to protect against re-mappings */
2388 		vmk_flags.vmf_permanent = true;
2389 	}
2390 
2391 	/*
2392 	 * Start mapping the shared region's VM sub map into the task's VM map.
2393 	 */
2394 	sr_offset = 0;
2395 
2396 	if (sr_pmap_nesting_start > sr_address) {
2397 		/* we need to map a range without pmap-nesting first */
2398 		target_address = sr_address;
2399 		mapping_size = sr_pmap_nesting_start - sr_address;
2400 		kr = mach_vm_map_kernel(
2401 			map,
2402 			vm_sanitize_wrap_addr_ref(&target_address),
2403 			mapping_size,
2404 			0,
2405 			vmk_flags,
2406 			sr_handle,
2407 			sr_offset,
2408 			TRUE,
2409 			cur_prot,
2410 			max_prot,
2411 			VM_INHERIT_SHARE);
2412 		if (kr != KERN_SUCCESS) {
2413 			SHARED_REGION_TRACE_ERROR(
2414 				("shared_region: insert_submap(%p,%p): "
2415 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2416 				(void *)VM_KERNEL_ADDRPERM(map),
2417 				(void *)VM_KERNEL_ADDRPERM(shared_region),
2418 				(long long)target_address,
2419 				(long long)mapping_size,
2420 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2421 			return kr;
2422 		}
2423 		SHARED_REGION_TRACE_DEBUG(
2424 			("shared_region: insert_submap(%p,%p): "
2425 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2426 			(void *)VM_KERNEL_ADDRPERM(map),
2427 			(void *)VM_KERNEL_ADDRPERM(shared_region),
2428 			(long long)target_address, (long long)mapping_size,
2429 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2430 		sr_offset += mapping_size;
2431 		sr_size -= mapping_size;
2432 	}
2433 
2434 	/* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2435 	vmk_flags.vmkf_nested_pmap = true;
2436 	vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2437 
2438 	/*
2439 	 * Use pmap-nesting to map the majority of the shared region into the task's
2440 	 * VM space. Very rarely will architectures have a shared region that isn't
2441 	 * the same size as the pmap-nesting region, or start at a different address
2442 	 * than the pmap-nesting region, so this code will map the entirety of the
2443 	 * shared region for most architectures.
2444 	 */
2445 	assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2446 	target_address = sr_pmap_nesting_start;
2447 	kr = mach_vm_map_kernel(
2448 		map,
2449 		vm_sanitize_wrap_addr_ref(&target_address),
2450 		sr_pmap_nesting_size,
2451 		0,
2452 		vmk_flags,
2453 		sr_handle,
2454 		sr_offset,
2455 		TRUE,
2456 		cur_prot,
2457 		max_prot,
2458 		VM_INHERIT_SHARE);
2459 	if (kr != KERN_SUCCESS) {
2460 		SHARED_REGION_TRACE_ERROR(
2461 			("shared_region: insert_submap(%p,%p): "
2462 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2463 			(void *)VM_KERNEL_ADDRPERM(map),
2464 			(void *)VM_KERNEL_ADDRPERM(shared_region),
2465 			(long long)target_address,
2466 			(long long)sr_pmap_nesting_size,
2467 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2468 		return kr;
2469 	}
2470 	SHARED_REGION_TRACE_DEBUG(
2471 		("shared_region: insert_submap(%p,%p): "
2472 		"nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2473 		(void *)VM_KERNEL_ADDRPERM(map),
2474 		(void *)VM_KERNEL_ADDRPERM(shared_region),
2475 		(long long)target_address, (long long)sr_pmap_nesting_size,
2476 		(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2477 
2478 	sr_offset += sr_pmap_nesting_size;
2479 	sr_size -= sr_pmap_nesting_size;
2480 
2481 	if (sr_size > 0) {
2482 		/* and there's some left to be mapped without pmap-nesting */
2483 		vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2484 		target_address = sr_address + sr_offset;
2485 		mapping_size = sr_size;
2486 		kr = mach_vm_map_kernel(
2487 			map,
2488 			vm_sanitize_wrap_addr_ref(&target_address),
2489 			mapping_size,
2490 			0,
2491 			VM_MAP_KERNEL_FLAGS_FIXED(),
2492 			sr_handle,
2493 			sr_offset,
2494 			TRUE,
2495 			cur_prot,
2496 			max_prot,
2497 			VM_INHERIT_SHARE);
2498 		if (kr != KERN_SUCCESS) {
2499 			SHARED_REGION_TRACE_ERROR(
2500 				("shared_region: insert_submap(%p,%p): "
2501 				"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2502 				(void *)VM_KERNEL_ADDRPERM(map),
2503 				(void *)VM_KERNEL_ADDRPERM(shared_region),
2504 				(long long)target_address,
2505 				(long long)mapping_size,
2506 				(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2507 			return kr;
2508 		}
2509 		SHARED_REGION_TRACE_DEBUG(
2510 			("shared_region: insert_submap(%p,%p): "
2511 			"vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2512 			(void *)VM_KERNEL_ADDRPERM(map),
2513 			(void *)VM_KERNEL_ADDRPERM(shared_region),
2514 			(long long)target_address, (long long)mapping_size,
2515 			(void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2516 		sr_offset += mapping_size;
2517 		sr_size -= mapping_size;
2518 	}
2519 	assert(sr_size == 0);
2520 
2521 	return kr;
2522 }
2523 
2524 /*
2525  * Inserts a VM_PROT_NONE placeholder covering the shared region into [map].
2526  * This is intended to be called when a new task is exec'ed and initially associated
2527  * with a shared region.  Once the userspace dyld initialization sequence successfully
2528  * queries the shared region start address via the shared_region_check_np syscall,
2529  * this placeholder will be replaced with the real shared region submap entry.
2530  */
2531 static kern_return_t
vm_shared_region_insert_placeholder(vm_map_t map,vm_shared_region_t shared_region)2532 vm_shared_region_insert_placeholder(vm_map_t map, vm_shared_region_t shared_region)
2533 {
2534 	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT();
2535 
2536 	vm_map_offset_t address = shared_region->sr_base_address;
2537 
2538 	pmap_set_shared_region(map->pmap, vm_shared_region_vm_map(shared_region)->pmap,
2539 	    address, shared_region->sr_size);
2540 
2541 	return vm_map_enter(
2542 		map,
2543 		&address,
2544 		shared_region->sr_size,
2545 		(vm_map_offset_t)0,
2546 		vmk_flags,
2547 		VM_OBJECT_NULL,
2548 		(vm_object_offset_t)0,
2549 		FALSE,
2550 		VM_PROT_NONE,
2551 		VM_PROT_NONE,
2552 		VM_INHERIT_COPY);
2553 }
2554 
2555 /*
2556  * Enter the appropriate shared region into "map" for "task".
2557  * This involves looking up the shared region (and possibly creating a new
2558  * one) for the desired environment, then entering a permanent placeholder
2559  * entry for the shared region.  If the task actually chooses to map a
2560  * shared region, this placeholder will later be overwritten by a submap
2561  * entry for the real shared region in vm_shared_region_insert_submap().
2562  */
2563 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2564 vm_shared_region_enter(
2565 	struct _vm_map          *map,
2566 	struct task             *task,
2567 	boolean_t               is_64bit,
2568 	void                    *fsroot,
2569 	cpu_type_t              cpu,
2570 	cpu_subtype_t           cpu_subtype,
2571 	boolean_t               reslide,
2572 	boolean_t               is_driverkit,
2573 	uint32_t                rsr_version)
2574 {
2575 	kern_return_t           kr;
2576 	vm_shared_region_t      shared_region;
2577 
2578 	SHARED_REGION_TRACE_DEBUG(
2579 		("shared_region: -> "
2580 		"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2581 		(void *)VM_KERNEL_ADDRPERM(map),
2582 		(void *)VM_KERNEL_ADDRPERM(task),
2583 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2584 		cpu, cpu_subtype, is_64bit, is_driverkit));
2585 
2586 	/* lookup (create if needed) the shared region for this environment */
2587 	shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2588 	if (shared_region == NULL) {
2589 		/* this should not happen ! */
2590 		SHARED_REGION_TRACE_ERROR(
2591 			("shared_region: -> "
2592 			"enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2593 			"lookup failed !\n",
2594 			(void *)VM_KERNEL_ADDRPERM(map),
2595 			(void *)VM_KERNEL_ADDRPERM(task),
2596 			(void *)VM_KERNEL_ADDRPERM(fsroot),
2597 			cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2598 		//panic("shared_region_enter: lookup failed");
2599 		return KERN_FAILURE;
2600 	}
2601 
2602 	kr = vm_shared_region_insert_placeholder(map, shared_region);
2603 
2604 	if (kr == KERN_SUCCESS) {
2605 		/* let the task use that shared region */
2606 		vm_shared_region_set(task, shared_region);
2607 	} else {
2608 		/* drop our reference since we're not using it */
2609 		vm_shared_region_deallocate(shared_region);
2610 		vm_shared_region_set(task, NULL);
2611 	}
2612 
2613 	SHARED_REGION_TRACE_DEBUG(
2614 		("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2615 		(void *)VM_KERNEL_ADDRPERM(map),
2616 		(void *)VM_KERNEL_ADDRPERM(task),
2617 		(void *)VM_KERNEL_ADDRPERM(fsroot),
2618 		cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2619 		kr));
2620 	return kr;
2621 }
2622 
2623 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2624 vm_shared_region_remove(
2625 	task_t task,
2626 	vm_shared_region_t sr)
2627 {
2628 	vm_map_t map;
2629 	mach_vm_offset_t start;
2630 	mach_vm_size_t size;
2631 	vm_map_kernel_flags_t vmk_flags;
2632 	kern_return_t kr;
2633 
2634 	if (sr == NULL) {
2635 		return;
2636 	}
2637 	map = get_task_map(task);
2638 	start = sr->sr_base_address;
2639 	size = sr->sr_size;
2640 
2641 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2642 	vmk_flags.vmkf_overwrite_immutable = true;
2643 	vmk_flags.vm_tag = VM_MEMORY_DYLD;
2644 
2645 	/* range_id is set by mach_vm_map_kernel */
2646 	kr = mach_vm_map_kernel(map,
2647 	    vm_sanitize_wrap_addr_ref(&start),
2648 	    size,
2649 	    0,                     /* mask */
2650 	    vmk_flags,
2651 	    MACH_PORT_NULL,
2652 	    0,
2653 	    FALSE,                     /* copy */
2654 	    VM_PROT_NONE,
2655 	    VM_PROT_NONE,
2656 	    VM_INHERIT_DEFAULT);
2657 	if (kr != KERN_SUCCESS) {
2658 		printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2659 	}
2660 }
2661 
2662 #define SANE_SLIDE_INFO_SIZE            (2560*1024) /*Can be changed if needed*/
2663 
2664 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2665 vm_shared_region_sliding_valid(uint32_t slide)
2666 {
2667 	kern_return_t kr = KERN_SUCCESS;
2668 	vm_shared_region_t sr = vm_shared_region_get(current_task());
2669 
2670 	/* No region yet? we're fine. */
2671 	if (sr == NULL) {
2672 		return kr;
2673 	}
2674 
2675 	if (sr->sr_slide != 0 && slide != 0) {
2676 		if (slide == sr->sr_slide) {
2677 			/*
2678 			 * Request for sliding when we've
2679 			 * already done it with exactly the
2680 			 * same slide value before.
2681 			 * This isn't wrong technically but
2682 			 * we don't want to slide again and
2683 			 * so we return this value.
2684 			 */
2685 			kr = KERN_INVALID_ARGUMENT;
2686 		} else {
2687 			printf("Mismatched shared region slide\n");
2688 			kr = KERN_FAILURE;
2689 		}
2690 	}
2691 	vm_shared_region_deallocate(sr);
2692 	return kr;
2693 }
2694 
2695 /*
2696  * Actually create (really overwrite) the mapping to part of the shared cache which
2697  * undergoes relocation.  This routine reads in the relocation info from dyld and
2698  * verifies it. It then creates a (or finds a matching) shared region pager which
2699  * handles the actual modification of the page contents and installs the mapping
2700  * using that pager.
2701  */
2702 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2703 vm_shared_region_slide_mapping(
2704 	vm_shared_region_t      sr,
2705 	user_addr_t             slide_info_addr,
2706 	mach_vm_size_t          slide_info_size,
2707 	mach_vm_offset_t        start,
2708 	mach_vm_size_t          size,
2709 	mach_vm_offset_t        slid_mapping,
2710 	uint32_t                slide,
2711 	memory_object_control_t sr_file_control,
2712 	vm_prot_t               prot)
2713 {
2714 	kern_return_t           kr;
2715 	vm_object_t             object = VM_OBJECT_NULL;
2716 	vm_shared_region_slide_info_t si = NULL;
2717 	vm_map_entry_t          tmp_entry = VM_MAP_ENTRY_NULL;
2718 	struct vm_map_entry     tmp_entry_store;
2719 	memory_object_t         sr_pager = MEMORY_OBJECT_NULL;
2720 	vm_map_t                sr_map;
2721 	vm_map_kernel_flags_t   vmk_flags;
2722 	vm_map_offset_t         map_addr;
2723 	void                    *slide_info_entry = NULL;
2724 	int                     error;
2725 
2726 	assert(sr->sr_slide_in_progress);
2727 
2728 	if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2729 		return KERN_INVALID_ARGUMENT;
2730 	}
2731 
2732 	/*
2733 	 * Copy in and verify the relocation information.
2734 	 */
2735 	if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2736 		printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2737 		return KERN_FAILURE;
2738 	}
2739 	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2740 		printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2741 		return KERN_FAILURE;
2742 	}
2743 
2744 	slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2745 	if (slide_info_entry == NULL) {
2746 		return KERN_RESOURCE_SHORTAGE;
2747 	}
2748 	error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2749 	if (error) {
2750 		printf("copyin of slide_info (%p) failed\n", (void*)slide_info_addr);
2751 		kr = KERN_INVALID_ADDRESS;
2752 		goto done;
2753 	}
2754 
2755 	if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2756 		printf("Sanity Check failed for slide_info\n");
2757 		goto done;
2758 	}
2759 
2760 	/*
2761 	 * Allocate and fill in a vm_shared_region_slide_info.
2762 	 * This will either be used by a new pager, or used to find
2763 	 * a pre-existing matching pager.
2764 	 */
2765 	object = memory_object_control_to_vm_object(sr_file_control);
2766 	if (object == VM_OBJECT_NULL || object->internal) {
2767 		object = VM_OBJECT_NULL;
2768 		kr = KERN_INVALID_ADDRESS;
2769 		goto done;
2770 	}
2771 
2772 	si = kalloc_type(struct vm_shared_region_slide_info,
2773 	    Z_WAITOK | Z_NOFAIL);
2774 	vm_object_lock(object);
2775 
2776 	vm_object_reference_locked(object);     /* for si->slide_object */
2777 	object->object_is_shared_cache = TRUE;
2778 	vm_object_unlock(object);
2779 
2780 	si->si_slide_info_entry = slide_info_entry;
2781 	si->si_slide_info_size = slide_info_size;
2782 
2783 	assert(slid_mapping != (mach_vm_offset_t) -1);
2784 	si->si_slid_address = slid_mapping + sr->sr_base_address;
2785 	si->si_slide_object = object;
2786 	si->si_start = start;
2787 	si->si_end = si->si_start + size;
2788 	si->si_slide = slide;
2789 #if __has_feature(ptrauth_calls)
2790 	/*
2791 	 * If there is authenticated pointer data in this slid mapping,
2792 	 * then just add the information needed to create new pagers for
2793 	 * different shared_region_id's later.
2794 	 */
2795 	if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2796 	    sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2797 	    !(prot & VM_PROT_NOAUTH)) {
2798 		if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2799 			printf("Too many auth/private sections for shared region!!\n");
2800 			kr = KERN_INVALID_ARGUMENT;
2801 			goto done;
2802 		}
2803 		si->si_ptrauth = TRUE;
2804 		sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2805 		/*
2806 		 * Remember the shared region, since that's where we'll
2807 		 * stash this info for all auth pagers to share. Each pager
2808 		 * will need to take a reference to it.
2809 		 */
2810 		si->si_shared_region = sr;
2811 		kr = KERN_SUCCESS;
2812 		goto done;
2813 	}
2814 	si->si_shared_region = NULL;
2815 	si->si_ptrauth = FALSE;
2816 #endif /* __has_feature(ptrauth_calls) */
2817 
2818 	/*
2819 	 * find the pre-existing shared region's map entry to slide
2820 	 */
2821 	sr_map = vm_shared_region_vm_map(sr);
2822 	kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2823 	if (kr != KERN_SUCCESS) {
2824 		goto done;
2825 	}
2826 	tmp_entry = &tmp_entry_store;
2827 
2828 	/*
2829 	 * The object must exactly cover the region to slide.
2830 	 */
2831 	assert(VME_OFFSET(tmp_entry) == start);
2832 	assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2833 
2834 	/*
2835 	 * We trust that the contents of this object are not writable, so
2836 	 * we do not need to get a "copy" of it.
2837 	 */
2838 
2839 	/* create a "shared_region" sliding pager */
2840 	sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2841 	if (sr_pager == MEMORY_OBJECT_NULL) {
2842 		kr = KERN_RESOURCE_SHORTAGE;
2843 		goto done;
2844 	}
2845 
2846 #if CONFIG_SECLUDED_MEMORY
2847 	/*
2848 	 * The shared region pagers used by camera or DEXT should have
2849 	 * pagers that won't go on the secluded queue.
2850 	 */
2851 	if (primary_system_shared_region == NULL ||
2852 	    primary_system_shared_region == sr ||
2853 	    sr->sr_driverkit) {
2854 		memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2855 	}
2856 #endif /* CONFIG_SECLUDED_MEMORY */
2857 
2858 	/* map that pager over the portion of the mapping that needs sliding */
2859 	map_addr = tmp_entry->vme_start;
2860 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2861 	vmk_flags.vmkf_overwrite_immutable = true;
2862 	vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2863 	    tmp_entry->max_protection);
2864 	vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2865 	    prot);
2866 	kr = mach_vm_map_kernel(sr_map,
2867 	    vm_sanitize_wrap_addr_ref(&map_addr),
2868 	    tmp_entry->vme_end - tmp_entry->vme_start,
2869 	    0,
2870 	    vmk_flags,
2871 	    (ipc_port_t)(uintptr_t) sr_pager,
2872 	    0,
2873 	    TRUE, /* copy; to make sure this object stays "clean" */
2874 	    tmp_entry->protection,
2875 	    tmp_entry->max_protection,
2876 	    tmp_entry->inheritance);
2877 	assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2878 	assertf(map_addr == tmp_entry->vme_start,
2879 	    "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2880 	    (uint64_t)map_addr,
2881 	    (uint64_t) tmp_entry->vme_start,
2882 	    tmp_entry);
2883 
2884 	/* success! */
2885 	kr = KERN_SUCCESS;
2886 
2887 done:
2888 	if (sr_pager != NULL) {
2889 		/*
2890 		 * Release the sr_pager reference obtained by shared_region_pager_setup().
2891 		 * The mapping, if it succeeded, is now holding a reference on the memory object.
2892 		 */
2893 		memory_object_deallocate(sr_pager);
2894 		sr_pager = MEMORY_OBJECT_NULL;
2895 	}
2896 	if (tmp_entry != NULL) {
2897 		/* release extra ref on tmp_entry's VM object */
2898 		vm_object_deallocate(VME_OBJECT(tmp_entry));
2899 		tmp_entry = VM_MAP_ENTRY_NULL;
2900 	}
2901 
2902 	if (kr != KERN_SUCCESS) {
2903 		/* cleanup */
2904 		if (si != NULL) {
2905 			if (si->si_slide_object) {
2906 				vm_object_deallocate(si->si_slide_object);
2907 				si->si_slide_object = VM_OBJECT_NULL;
2908 			}
2909 			kfree_type(struct vm_shared_region_slide_info, si);
2910 			si = NULL;
2911 		}
2912 		if (slide_info_entry != NULL) {
2913 			kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2914 			slide_info_entry = NULL;
2915 		}
2916 	}
2917 	return kr;
2918 }
2919 
2920 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2921 vm_shared_region_slide_sanity_check_v1(
2922 	vm_shared_region_slide_info_entry_v1_t s_info)
2923 {
2924 	uint32_t pageIndex = 0;
2925 	uint16_t entryIndex = 0;
2926 	uint16_t *toc = NULL;
2927 
2928 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2929 	for (; pageIndex < s_info->toc_count; pageIndex++) {
2930 		entryIndex =  (uint16_t)(toc[pageIndex]);
2931 
2932 		if (entryIndex >= s_info->entry_count) {
2933 			printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2934 			return KERN_FAILURE;
2935 		}
2936 	}
2937 	return KERN_SUCCESS;
2938 }
2939 
2940 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2941 vm_shared_region_slide_sanity_check_v2(
2942 	vm_shared_region_slide_info_entry_v2_t s_info,
2943 	mach_vm_size_t slide_info_size)
2944 {
2945 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2946 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2947 		return KERN_FAILURE;
2948 	}
2949 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2950 		return KERN_FAILURE;
2951 	}
2952 
2953 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
2954 
2955 	uint32_t page_starts_count = s_info->page_starts_count;
2956 	uint32_t page_extras_count = s_info->page_extras_count;
2957 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2958 	if (num_trailing_entries < page_starts_count) {
2959 		return KERN_FAILURE;
2960 	}
2961 
2962 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2963 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2964 	if (trailing_size >> 1 != num_trailing_entries) {
2965 		return KERN_FAILURE;
2966 	}
2967 
2968 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2969 	if (required_size < sizeof(*s_info)) {
2970 		return KERN_FAILURE;
2971 	}
2972 
2973 	if (required_size > slide_info_size) {
2974 		return KERN_FAILURE;
2975 	}
2976 
2977 	return KERN_SUCCESS;
2978 }
2979 
2980 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2981 vm_shared_region_slide_sanity_check_v3(
2982 	vm_shared_region_slide_info_entry_v3_t s_info,
2983 	mach_vm_size_t slide_info_size)
2984 {
2985 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2986 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2987 		return KERN_FAILURE;
2988 	}
2989 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2990 		printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2991 		return KERN_FAILURE;
2992 	}
2993 
2994 	uint32_t page_starts_count = s_info->page_starts_count;
2995 	mach_vm_size_t num_trailing_entries = page_starts_count;
2996 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
2997 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2998 	if (required_size < sizeof(*s_info)) {
2999 		printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
3000 		return KERN_FAILURE;
3001 	}
3002 
3003 	if (required_size > slide_info_size) {
3004 		printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
3005 		return KERN_FAILURE;
3006 	}
3007 
3008 	return KERN_SUCCESS;
3009 }
3010 
3011 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)3012 vm_shared_region_slide_sanity_check_v4(
3013 	vm_shared_region_slide_info_entry_v4_t s_info,
3014 	mach_vm_size_t slide_info_size)
3015 {
3016 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
3017 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
3018 		return KERN_FAILURE;
3019 	}
3020 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
3021 		return KERN_FAILURE;
3022 	}
3023 
3024 	/* Ensure that the slide info doesn't reference any data outside of its bounds. */
3025 
3026 	uint32_t page_starts_count = s_info->page_starts_count;
3027 	uint32_t page_extras_count = s_info->page_extras_count;
3028 	mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
3029 	if (num_trailing_entries < page_starts_count) {
3030 		return KERN_FAILURE;
3031 	}
3032 
3033 	/* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
3034 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
3035 	if (trailing_size >> 1 != num_trailing_entries) {
3036 		return KERN_FAILURE;
3037 	}
3038 
3039 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
3040 	if (required_size < sizeof(*s_info)) {
3041 		return KERN_FAILURE;
3042 	}
3043 
3044 	if (required_size > slide_info_size) {
3045 		return KERN_FAILURE;
3046 	}
3047 
3048 	return KERN_SUCCESS;
3049 }
3050 
3051 static kern_return_t
vm_shared_region_slide_sanity_check_v5(vm_shared_region_slide_info_entry_v5_t s_info,mach_vm_size_t slide_info_size)3052 vm_shared_region_slide_sanity_check_v5(
3053 	vm_shared_region_slide_info_entry_v5_t s_info,
3054 	mach_vm_size_t slide_info_size)
3055 {
3056 	if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v5)) {
3057 		printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
3058 		return KERN_FAILURE;
3059 	}
3060 	if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE_16KB) {
3061 		printf("vm_shared_region_slide_sanity_check_v5: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE_16KB);
3062 		return KERN_FAILURE;
3063 	}
3064 
3065 	uint32_t page_starts_count = s_info->page_starts_count;
3066 	mach_vm_size_t num_trailing_entries = page_starts_count;
3067 	mach_vm_size_t trailing_size = num_trailing_entries << 1;
3068 	mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
3069 	if (required_size < sizeof(*s_info)) {
3070 		printf("vm_shared_region_slide_sanity_check_v5: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
3071 		return KERN_FAILURE;
3072 	}
3073 
3074 	if (required_size > slide_info_size) {
3075 		printf("vm_shared_region_slide_sanity_check_v5: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
3076 		return KERN_FAILURE;
3077 	}
3078 
3079 	return KERN_SUCCESS;
3080 }
3081 
3082 
3083 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)3084 vm_shared_region_slide_sanity_check(
3085 	vm_shared_region_slide_info_entry_t s_info,
3086 	mach_vm_size_t s_info_size)
3087 {
3088 	kern_return_t kr;
3089 
3090 	switch (s_info->version) {
3091 	case 1:
3092 		kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
3093 		break;
3094 	case 2:
3095 		kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
3096 		break;
3097 	case 3:
3098 		kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
3099 		break;
3100 	case 4:
3101 		kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
3102 		break;
3103 	case 5:
3104 		kr = vm_shared_region_slide_sanity_check_v5(&s_info->v5, s_info_size);
3105 		break;
3106 	default:
3107 		kr = KERN_FAILURE;
3108 	}
3109 	return kr;
3110 }
3111 
3112 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3113 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3114 {
3115 	uint16_t *toc = NULL;
3116 	slide_info_entry_toc_t bitmap = NULL;
3117 	uint32_t i = 0, j = 0;
3118 	uint8_t b = 0;
3119 	uint32_t slide = si->si_slide;
3120 	int is_64 = task_has_64Bit_addr(current_task());
3121 
3122 	vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
3123 	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
3124 
3125 	if (pageIndex >= s_info->toc_count) {
3126 		printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
3127 	} else {
3128 		uint16_t entryIndex =  (uint16_t)(toc[pageIndex]);
3129 		slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
3130 
3131 		if (entryIndex >= s_info->entry_count) {
3132 			printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
3133 		} else {
3134 			bitmap = &slide_info_entries[entryIndex];
3135 
3136 			for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
3137 				b = bitmap->entry[i];
3138 				if (b != 0) {
3139 					for (j = 0; j < 8; ++j) {
3140 						if (b & (1 << j)) {
3141 							uint32_t *ptr_to_slide;
3142 							uint32_t old_value;
3143 
3144 							ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
3145 							old_value = *ptr_to_slide;
3146 							*ptr_to_slide += slide;
3147 							if (is_64 && *ptr_to_slide < old_value) {
3148 								/*
3149 								 * We just slid the low 32 bits of a 64-bit pointer
3150 								 * and it looks like there should have been a carry-over
3151 								 * to the upper 32 bits.
3152 								 * The sliding failed...
3153 								 */
3154 								printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
3155 								    i, j, b, slide, old_value, *ptr_to_slide);
3156 								return KERN_FAILURE;
3157 							}
3158 						}
3159 					}
3160 				}
3161 			}
3162 		}
3163 	}
3164 
3165 	return KERN_SUCCESS;
3166 }
3167 
3168 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3169 rebase_chain_32(
3170 	uint8_t *page_content,
3171 	uint16_t start_offset,
3172 	uint32_t slide_amount,
3173 	vm_shared_region_slide_info_entry_v2_t s_info)
3174 {
3175 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3176 
3177 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3178 	const uint32_t value_mask = ~delta_mask;
3179 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3180 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3181 
3182 	uint32_t page_offset = start_offset;
3183 	uint32_t delta = 1;
3184 
3185 	while (delta != 0 && page_offset <= last_page_offset) {
3186 		uint8_t *loc;
3187 		uint32_t value;
3188 
3189 		loc = page_content + page_offset;
3190 		memcpy(&value, loc, sizeof(value));
3191 		delta = (value & delta_mask) >> delta_shift;
3192 		value &= value_mask;
3193 
3194 		if (value != 0) {
3195 			value += value_add;
3196 			value += slide_amount;
3197 		}
3198 		memcpy(loc, &value, sizeof(value));
3199 		page_offset += delta;
3200 	}
3201 
3202 	/* If the offset went past the end of the page, then the slide data is invalid. */
3203 	if (page_offset > last_page_offset) {
3204 		return KERN_FAILURE;
3205 	}
3206 	return KERN_SUCCESS;
3207 }
3208 
3209 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3210 rebase_chain_64(
3211 	uint8_t *page_content,
3212 	uint16_t start_offset,
3213 	uint32_t slide_amount,
3214 	vm_shared_region_slide_info_entry_v2_t s_info)
3215 {
3216 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3217 
3218 	const uint64_t delta_mask = s_info->delta_mask;
3219 	const uint64_t value_mask = ~delta_mask;
3220 	const uint64_t value_add = s_info->value_add;
3221 	const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3222 
3223 	uint32_t page_offset = start_offset;
3224 	uint32_t delta = 1;
3225 
3226 	while (delta != 0 && page_offset <= last_page_offset) {
3227 		uint8_t *loc;
3228 		uint64_t value;
3229 
3230 		loc = page_content + page_offset;
3231 		memcpy(&value, loc, sizeof(value));
3232 		delta = (uint32_t)((value & delta_mask) >> delta_shift);
3233 		value &= value_mask;
3234 
3235 		if (value != 0) {
3236 			value += value_add;
3237 			value += slide_amount;
3238 		}
3239 		memcpy(loc, &value, sizeof(value));
3240 		page_offset += delta;
3241 	}
3242 
3243 	if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3244 		/* If a pointer straddling the page boundary needs to be adjusted, then
3245 		 * add the slide to the lower half. The encoding guarantees that the upper
3246 		 * half on the next page will need no masking.
3247 		 *
3248 		 * This assumes a little-endian machine and that the region being slid
3249 		 * never crosses a 4 GB boundary. */
3250 
3251 		uint8_t *loc = page_content + page_offset;
3252 		uint32_t value;
3253 
3254 		memcpy(&value, loc, sizeof(value));
3255 		value += slide_amount;
3256 		memcpy(loc, &value, sizeof(value));
3257 	} else if (page_offset > last_page_offset) {
3258 		return KERN_FAILURE;
3259 	}
3260 
3261 	return KERN_SUCCESS;
3262 }
3263 
3264 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3265 rebase_chain(
3266 	boolean_t is_64,
3267 	uint32_t pageIndex,
3268 	uint8_t *page_content,
3269 	uint16_t start_offset,
3270 	uint32_t slide_amount,
3271 	vm_shared_region_slide_info_entry_v2_t s_info)
3272 {
3273 	kern_return_t kr;
3274 	if (is_64) {
3275 		kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3276 	} else {
3277 		kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3278 	}
3279 
3280 	if (kr != KERN_SUCCESS) {
3281 		printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3282 		    pageIndex, start_offset, slide_amount);
3283 	}
3284 	return kr;
3285 }
3286 
3287 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3288 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3289 {
3290 	vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3291 	const uint32_t slide_amount = si->si_slide;
3292 
3293 	/* The high bits of the delta_mask field are nonzero precisely when the shared
3294 	 * cache is 64-bit. */
3295 	const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3296 
3297 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3298 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3299 
3300 	uint8_t *page_content = (uint8_t *)vaddr;
3301 	uint16_t page_entry;
3302 
3303 	if (pageIndex >= s_info->page_starts_count) {
3304 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3305 		    pageIndex, s_info->page_starts_count);
3306 		return KERN_FAILURE;
3307 	}
3308 	page_entry = page_starts[pageIndex];
3309 
3310 	if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3311 		return KERN_SUCCESS;
3312 	}
3313 
3314 	if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3315 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3316 		uint16_t info;
3317 
3318 		do {
3319 			uint16_t page_start_offset;
3320 			kern_return_t kr;
3321 
3322 			if (chain_index >= s_info->page_extras_count) {
3323 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3324 				    chain_index, s_info->page_extras_count);
3325 				return KERN_FAILURE;
3326 			}
3327 			info = page_extras[chain_index];
3328 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3329 
3330 			kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3331 			if (kr != KERN_SUCCESS) {
3332 				return KERN_FAILURE;
3333 			}
3334 
3335 			chain_index++;
3336 		} while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3337 	} else {
3338 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3339 		kern_return_t kr;
3340 
3341 		kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3342 		if (kr != KERN_SUCCESS) {
3343 			return KERN_FAILURE;
3344 		}
3345 	}
3346 
3347 	return KERN_SUCCESS;
3348 }
3349 
3350 
3351 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3352 vm_shared_region_slide_page_v3(
3353 	vm_shared_region_slide_info_t si,
3354 	vm_offset_t vaddr,
3355 	__unused mach_vm_offset_t uservaddr,
3356 	uint32_t pageIndex,
3357 #if !__has_feature(ptrauth_calls)
3358 	__unused
3359 #endif /* !__has_feature(ptrauth_calls) */
3360 	uint64_t jop_key)
3361 {
3362 	vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3363 	const uint32_t slide_amount = si->si_slide;
3364 
3365 	uint8_t *page_content = (uint8_t *)vaddr;
3366 	uint16_t page_entry;
3367 
3368 	if (pageIndex >= s_info->page_starts_count) {
3369 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3370 		    pageIndex, s_info->page_starts_count);
3371 		return KERN_FAILURE;
3372 	}
3373 	page_entry = s_info->page_starts[pageIndex];
3374 
3375 	if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3376 		return KERN_SUCCESS;
3377 	}
3378 
3379 	uint8_t* rebaseLocation = page_content;
3380 	uint64_t delta = page_entry;
3381 	do {
3382 		rebaseLocation += delta;
3383 		uint64_t value;
3384 		memcpy(&value, rebaseLocation, sizeof(value));
3385 		delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3386 
3387 		// A pointer is one of :
3388 		// {
3389 		//	 uint64_t pointerValue : 51;
3390 		//	 uint64_t offsetToNextPointer : 11;
3391 		//	 uint64_t isBind : 1 = 0;
3392 		//	 uint64_t authenticated : 1 = 0;
3393 		// }
3394 		// {
3395 		//	 uint32_t offsetFromSharedCacheBase;
3396 		//	 uint16_t diversityData;
3397 		//	 uint16_t hasAddressDiversity : 1;
3398 		//	 uint16_t hasDKey : 1;
3399 		//	 uint16_t hasBKey : 1;
3400 		//	 uint16_t offsetToNextPointer : 11;
3401 		//	 uint16_t isBind : 1;
3402 		//	 uint16_t authenticated : 1 = 1;
3403 		// }
3404 
3405 		bool isBind = (value & (1ULL << 62)) != 0;
3406 		if (isBind) {
3407 #if CONFIG_SPTM
3408 			pmap_batch_sign_user_ptr(NULL, NULL, 0, 0, 0);
3409 			assert(preemption_enabled());
3410 #endif /* CONFIG_SPTM */
3411 			return KERN_FAILURE;
3412 		}
3413 
3414 #if __has_feature(ptrauth_calls)
3415 		uint16_t diversity_data = (uint16_t)(value >> 32);
3416 		bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3417 		ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3418 #endif /* __has_feature(ptrauth_calls) */
3419 		bool isAuthenticated = (value & (1ULL << 63)) != 0;
3420 
3421 		if (isAuthenticated) {
3422 			// The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3423 			value = (value & 0xFFFFFFFF) + slide_amount;
3424 			// Add in the offset from the mach_header
3425 			const uint64_t value_add = s_info->value_add;
3426 			value += value_add;
3427 
3428 #if __has_feature(ptrauth_calls)
3429 			uint64_t discriminator = diversity_data;
3430 			if (hasAddressDiversity) {
3431 				// First calculate a new discriminator using the address of where we are trying to store the value
3432 				uintptr_t pageOffset = rebaseLocation - page_content;
3433 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3434 			}
3435 
3436 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3437 #if CONFIG_SPTM
3438 				pmap_batch_sign_user_ptr(rebaseLocation, (void *)value, key, discriminator, jop_key);
3439 #else /* CONFIG_SPTM */
3440 				/*
3441 				 * these pointers are used in user mode. disable the kernel key diversification
3442 				 * so we can sign them for use in user mode.
3443 				 */
3444 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3445 				memcpy(rebaseLocation, &value, sizeof(value));
3446 #endif /* CONFIG_SPTM */
3447 			} else {
3448 				memcpy(rebaseLocation, &value, sizeof(value));
3449 			}
3450 #endif /* __has_feature(ptrauth_calls) */
3451 		} else {
3452 			// The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3453 			// Regular pointer which needs to fit in 51-bits of value.
3454 			// C++ RTTI uses the top bit, so we'll allow the whole top-byte
3455 			// and the bottom 43-bits to be fit in to 51-bits.
3456 			uint64_t top8Bits = value & 0x0007F80000000000ULL;
3457 			uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3458 			uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3459 			value = targetValue + slide_amount;
3460 			memcpy(rebaseLocation, &value, sizeof(value));
3461 		}
3462 	} while (delta != 0);
3463 
3464 #if CONFIG_SPTM
3465 	/* Sign the leftovers if there's any. */
3466 	pmap_batch_sign_user_ptr(NULL, NULL, 0, 0, 0);
3467 	assert(preemption_enabled());
3468 #endif /* CONFIG_SPTM */
3469 
3470 	return KERN_SUCCESS;
3471 }
3472 
3473 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3474 rebase_chainv4(
3475 	uint8_t *page_content,
3476 	uint16_t start_offset,
3477 	uint32_t slide_amount,
3478 	vm_shared_region_slide_info_entry_v4_t s_info)
3479 {
3480 	const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3481 
3482 	const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3483 	const uint32_t value_mask = ~delta_mask;
3484 	const uint32_t value_add = (uint32_t)(s_info->value_add);
3485 	const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3486 
3487 	uint32_t page_offset = start_offset;
3488 	uint32_t delta = 1;
3489 
3490 	while (delta != 0 && page_offset <= last_page_offset) {
3491 		uint8_t *loc;
3492 		uint32_t value;
3493 
3494 		loc = page_content + page_offset;
3495 		memcpy(&value, loc, sizeof(value));
3496 		delta = (value & delta_mask) >> delta_shift;
3497 		value &= value_mask;
3498 
3499 		if ((value & 0xFFFF8000) == 0) {
3500 			// small positive non-pointer, use as-is
3501 		} else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3502 			// small negative non-pointer
3503 			value |= 0xC0000000;
3504 		} else {
3505 			// pointer that needs rebasing
3506 			value += value_add;
3507 			value += slide_amount;
3508 		}
3509 		memcpy(loc, &value, sizeof(value));
3510 		page_offset += delta;
3511 	}
3512 
3513 	/* If the offset went past the end of the page, then the slide data is invalid. */
3514 	if (page_offset > last_page_offset) {
3515 		return KERN_FAILURE;
3516 	}
3517 	return KERN_SUCCESS;
3518 }
3519 
3520 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3521 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3522 {
3523 	vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3524 	const uint32_t slide_amount = si->si_slide;
3525 
3526 	const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3527 	const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3528 
3529 	uint8_t *page_content = (uint8_t *)vaddr;
3530 	uint16_t page_entry;
3531 
3532 	if (pageIndex >= s_info->page_starts_count) {
3533 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3534 		    pageIndex, s_info->page_starts_count);
3535 		return KERN_FAILURE;
3536 	}
3537 	page_entry = page_starts[pageIndex];
3538 
3539 	if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3540 		return KERN_SUCCESS;
3541 	}
3542 
3543 	if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3544 		uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3545 		uint16_t info;
3546 
3547 		do {
3548 			uint16_t page_start_offset;
3549 			kern_return_t kr;
3550 
3551 			if (chain_index >= s_info->page_extras_count) {
3552 				printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3553 				    chain_index, s_info->page_extras_count);
3554 				return KERN_FAILURE;
3555 			}
3556 			info = page_extras[chain_index];
3557 			page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3558 
3559 			kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3560 			if (kr != KERN_SUCCESS) {
3561 				return KERN_FAILURE;
3562 			}
3563 
3564 			chain_index++;
3565 		} while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3566 	} else {
3567 		const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3568 		kern_return_t kr;
3569 
3570 		kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3571 		if (kr != KERN_SUCCESS) {
3572 			return KERN_FAILURE;
3573 		}
3574 	}
3575 
3576 	return KERN_SUCCESS;
3577 }
3578 
3579 
3580 static kern_return_t
vm_shared_region_slide_page_v5(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3581 vm_shared_region_slide_page_v5(
3582 	vm_shared_region_slide_info_t si,
3583 	vm_offset_t vaddr,
3584 	__unused mach_vm_offset_t uservaddr,
3585 	uint32_t pageIndex,
3586 #if !__has_feature(ptrauth_calls)
3587 	__unused
3588 #endif /* !__has_feature(ptrauth_calls) */
3589 	uint64_t jop_key)
3590 {
3591 	vm_shared_region_slide_info_entry_v5_t s_info = &si->si_slide_info_entry->v5;
3592 	const uint32_t slide_amount = si->si_slide;
3593 	const uint64_t value_add = s_info->value_add;
3594 
3595 	uint8_t *page_content = (uint8_t *)vaddr;
3596 	uint16_t page_entry;
3597 
3598 	if (pageIndex >= s_info->page_starts_count) {
3599 		printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3600 		    pageIndex, s_info->page_starts_count);
3601 		return KERN_FAILURE;
3602 	}
3603 	page_entry = s_info->page_starts[pageIndex];
3604 
3605 	if (page_entry == DYLD_CACHE_SLIDE_V5_PAGE_ATTR_NO_REBASE) {
3606 		return KERN_SUCCESS;
3607 	}
3608 
3609 	uint8_t* rebaseLocation = page_content;
3610 	uint64_t delta = page_entry;
3611 	do {
3612 		rebaseLocation += delta;
3613 		uint64_t value;
3614 		memcpy(&value, rebaseLocation, sizeof(value));
3615 		delta = ((value & 0x7FF0000000000000ULL) >> 52) * sizeof(uint64_t);
3616 
3617 		// A pointer is one of :
3618 		// {
3619 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3620 		//               high8           :  8,
3621 		//               unused          : 10,
3622 		//               next            : 11,   // 8-byte stide
3623 		//               auth            :  1;   // == 0
3624 		// }
3625 		// {
3626 		//   uint64_t    runtimeOffset   : 34,   // offset from the start of the shared cache
3627 		//               diversity       : 16,
3628 		//               addrDiv         :  1,
3629 		//               keyIsData       :  1,   // implicitly always the 'A' key.  0 -> IA.  1 -> DA
3630 		//               next            : 11,   // 8-byte stide
3631 		//               auth            :  1;   // == 1
3632 		// }
3633 
3634 #if __has_feature(ptrauth_calls)
3635 		bool        addrDiv = ((value & (1ULL << 50)) != 0);
3636 		bool        keyIsData = ((value & (1ULL << 51)) != 0);
3637 		// the key is always A, and the bit tells us if its IA or ID
3638 		ptrauth_key key = keyIsData ? ptrauth_key_asda : ptrauth_key_asia;
3639 		uint16_t    diversity = (uint16_t)((value >> 34) & 0xFFFF);
3640 #endif /* __has_feature(ptrauth_calls) */
3641 		uint64_t    high8 = (value << 22) & 0xFF00000000000000ULL;
3642 		bool        isAuthenticated = (value & (1ULL << 63)) != 0;
3643 
3644 		// The new value for a rebase is the low 34-bits of the threaded value plus the base plus slide.
3645 		value = (value & 0x3FFFFFFFFULL) + value_add + slide_amount;
3646 		if (isAuthenticated) {
3647 #if __has_feature(ptrauth_calls)
3648 			uint64_t discriminator = diversity;
3649 			if (addrDiv) {
3650 				// First calculate a new discriminator using the address of where we are trying to store the value
3651 				uintptr_t pageOffset = rebaseLocation - page_content;
3652 				discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3653 			}
3654 
3655 			if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3656 #if CONFIG_SPTM
3657 				pmap_batch_sign_user_ptr(rebaseLocation, (void *)value, key, discriminator, jop_key);
3658 #else /* CONFIG_SPTM */
3659 				/*
3660 				 * these pointers are used in user mode. disable the kernel key diversification
3661 				 * so we can sign them for use in user mode.
3662 				 */
3663 				value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3664 				memcpy(rebaseLocation, &value, sizeof(value));
3665 #endif /* CONFIG_SPTM */
3666 			} else {
3667 				memcpy(rebaseLocation, &value, sizeof(value));
3668 			}
3669 #endif /* __has_feature(ptrauth_calls) */
3670 		} else {
3671 			// the value already has the correct low bits, so just add in the high8 if it exists
3672 			value += high8;
3673 			memcpy(rebaseLocation, &value, sizeof(value));
3674 		}
3675 	} while (delta != 0);
3676 
3677 #if CONFIG_SPTM
3678 	/* Sign the leftovers if there's any. */
3679 	pmap_batch_sign_user_ptr(NULL, NULL, 0, 0, 0);
3680 	assert(preemption_enabled());
3681 #endif /* CONFIG_SPTM */
3682 
3683 	return KERN_SUCCESS;
3684 }
3685 
3686 
3687 
3688 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3689 vm_shared_region_slide_page(
3690 	vm_shared_region_slide_info_t si,
3691 	vm_offset_t vaddr,
3692 	mach_vm_offset_t uservaddr,
3693 	uint32_t pageIndex,
3694 	uint64_t jop_key)
3695 {
3696 	switch (si->si_slide_info_entry->version) {
3697 	case 1:
3698 		return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3699 	case 2:
3700 		return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3701 	case 3:
3702 		return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3703 	case 4:
3704 		return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3705 	case 5:
3706 		return vm_shared_region_slide_page_v5(si, vaddr, uservaddr, pageIndex, jop_key);
3707 	default:
3708 		return KERN_FAILURE;
3709 	}
3710 }
3711 
3712 /******************************************************************************/
3713 /* Comm page support                                                          */
3714 /******************************************************************************/
3715 
3716 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3717 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3718 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3719 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3720 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3721 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3722 
3723 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3724 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3725 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3726 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3727 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3728 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3729 
3730 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3731 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3732 
3733 #if defined(__i386__) || defined(__x86_64__)
3734 /*
3735  * Create a memory entry, VM submap and pmap for one commpage.
3736  */
3737 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3738 _vm_commpage_init(
3739 	ipc_port_t      *handlep,
3740 	vm_map_size_t   size)
3741 {
3742 	vm_named_entry_t        mem_entry;
3743 	vm_map_t                new_map;
3744 
3745 	SHARED_REGION_TRACE_DEBUG(
3746 		("commpage: -> _init(0x%llx)\n",
3747 		(long long)size));
3748 
3749 	pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3750 	if (new_pmap == NULL) {
3751 		panic("_vm_commpage_init: could not allocate pmap");
3752 	}
3753 	new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3754 	new_map->vmmap_sealed = VM_MAP_WILL_BE_SEALED;
3755 
3756 	mem_entry = mach_memory_entry_allocate(handlep);
3757 	mem_entry->backing.map = new_map;
3758 	mem_entry->internal = TRUE;
3759 	mem_entry->is_sub_map = TRUE;
3760 	mem_entry->offset = 0;
3761 	mem_entry->protection = VM_PROT_ALL;
3762 	mem_entry->size = size;
3763 
3764 	SHARED_REGION_TRACE_DEBUG(
3765 		("commpage: _init(0x%llx) <- %p\n",
3766 		(long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3767 }
3768 #endif
3769 
3770 
3771 /*
3772  * Initialize the comm text pages at boot time
3773  */
3774 void
vm_commpage_text_init(void)3775 vm_commpage_text_init(void)
3776 {
3777 	SHARED_REGION_TRACE_DEBUG(
3778 		("commpage text: ->init()\n"));
3779 #if defined(__i386__) || defined(__x86_64__)
3780 	/* create the 32 bit comm text page */
3781 	unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3782 	_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3783 	commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3784 	commpage_text32_map = commpage_text32_entry->backing.map;
3785 	commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3786 	/* XXX if (cpu_is_64bit_capable()) ? */
3787 	/* create the 64-bit comm page */
3788 	offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3789 	_vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3790 	commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3791 	commpage_text64_map = commpage_text64_entry->backing.map;
3792 	commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3793 #endif
3794 
3795 	commpage_text_populate();
3796 
3797 	/* populate the routines in here */
3798 	SHARED_REGION_TRACE_DEBUG(
3799 		("commpage text: init() <-\n"));
3800 }
3801 
3802 /*
3803  * Initialize the comm pages at boot time.
3804  */
3805 void
vm_commpage_init(void)3806 vm_commpage_init(void)
3807 {
3808 	SHARED_REGION_TRACE_DEBUG(
3809 		("commpage: -> init()\n"));
3810 
3811 #if defined(__i386__) || defined(__x86_64__)
3812 	/* create the 32-bit comm page */
3813 	_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3814 	commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3815 	commpage32_map = commpage32_entry->backing.map;
3816 
3817 	/* XXX if (cpu_is_64bit_capable()) ? */
3818 	/* create the 64-bit comm page */
3819 	_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3820 	commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3821 	commpage64_map = commpage64_entry->backing.map;
3822 
3823 #endif /* __i386__ || __x86_64__ */
3824 
3825 	/* populate them according to this specific platform */
3826 	commpage_populate();
3827 	__commpage_setup = 1;
3828 #if XNU_TARGET_OS_OSX
3829 	if (__system_power_source == 0) {
3830 		post_sys_powersource_internal(0, 1);
3831 	}
3832 #endif /* XNU_TARGET_OS_OSX */
3833 
3834 	SHARED_REGION_TRACE_DEBUG(
3835 		("commpage: init() <-\n"));
3836 }
3837 
3838 /*
3839  * Enter the appropriate comm page into the task's address space.
3840  * This is called at exec() time via vm_map_exec().
3841  */
3842 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3843 vm_commpage_enter(
3844 	vm_map_t        map,
3845 	task_t          task,
3846 	boolean_t       is64bit)
3847 {
3848 #if   defined(__arm64__)
3849 #pragma unused(is64bit)
3850 	(void)task;
3851 	(void)map;
3852 	pmap_insert_commpage(vm_map_pmap(map));
3853 	return KERN_SUCCESS;
3854 #else
3855 	ipc_port_t              commpage_handle, commpage_text_handle;
3856 	vm_map_offset_t         commpage_address, objc_address, commpage_text_address;
3857 	vm_map_size_t           commpage_size, objc_size, commpage_text_size;
3858 	vm_map_kernel_flags_t   vmk_flags;
3859 	kern_return_t           kr;
3860 
3861 	SHARED_REGION_TRACE_DEBUG(
3862 		("commpage: -> enter(%p,%p)\n",
3863 		(void *)VM_KERNEL_ADDRPERM(map),
3864 		(void *)VM_KERNEL_ADDRPERM(task)));
3865 
3866 	commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3867 	/* the comm page is likely to be beyond the actual end of the VM map */
3868 	vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3869 	vmk_flags.vmkf_beyond_max = TRUE;
3870 
3871 	/* select the appropriate comm page for this task */
3872 	assert(!(is64bit ^ vm_map_is_64bit(map)));
3873 	if (is64bit) {
3874 		commpage_handle = commpage64_handle;
3875 		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3876 		commpage_size = _COMM_PAGE64_AREA_LENGTH;
3877 		objc_size = _COMM_PAGE64_OBJC_SIZE;
3878 		objc_address = _COMM_PAGE64_OBJC_BASE;
3879 		commpage_text_handle = commpage_text64_handle;
3880 		commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3881 	} else {
3882 		commpage_handle = commpage32_handle;
3883 		commpage_address =
3884 		    (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3885 		commpage_size = _COMM_PAGE32_AREA_LENGTH;
3886 		objc_size = _COMM_PAGE32_OBJC_SIZE;
3887 		objc_address = _COMM_PAGE32_OBJC_BASE;
3888 		commpage_text_handle = commpage_text32_handle;
3889 		commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3890 	}
3891 
3892 	if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3893 	    (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3894 		/* the commpage is properly aligned or sized for pmap-nesting */
3895 		vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3896 		vmk_flags.vmkf_nested_pmap = TRUE;
3897 	}
3898 
3899 	/* map the comm page in the task's address space */
3900 	assert(commpage_handle != IPC_PORT_NULL);
3901 	kr = mach_vm_map_kernel(
3902 		map,
3903 		vm_sanitize_wrap_addr_ref(&commpage_address),
3904 		commpage_size,
3905 		0,
3906 		vmk_flags,
3907 		commpage_handle,
3908 		0,
3909 		FALSE,
3910 		VM_PROT_READ,
3911 		VM_PROT_READ,
3912 		VM_INHERIT_SHARE);
3913 	if (kr != KERN_SUCCESS) {
3914 		SHARED_REGION_TRACE_ERROR(
3915 			("commpage: enter(%p,0x%llx,0x%llx) "
3916 			"commpage %p mapping failed 0x%x\n",
3917 			(void *)VM_KERNEL_ADDRPERM(map),
3918 			(long long)commpage_address,
3919 			(long long)commpage_size,
3920 			(void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3921 	}
3922 
3923 	/* map the comm text page in the task's address space */
3924 	assert(commpage_text_handle != IPC_PORT_NULL);
3925 	kr = mach_vm_map_kernel(
3926 		map,
3927 		vm_sanitize_wrap_addr_ref(&commpage_text_address),
3928 		commpage_text_size,
3929 		0,
3930 		vmk_flags,
3931 		commpage_text_handle,
3932 		0,
3933 		FALSE,
3934 		VM_PROT_READ | VM_PROT_EXECUTE,
3935 		VM_PROT_READ | VM_PROT_EXECUTE,
3936 		VM_INHERIT_SHARE);
3937 	if (kr != KERN_SUCCESS) {
3938 		SHARED_REGION_TRACE_ERROR(
3939 			("commpage text: enter(%p,0x%llx,0x%llx) "
3940 			"commpage text %p mapping failed 0x%x\n",
3941 			(void *)VM_KERNEL_ADDRPERM(map),
3942 			(long long)commpage_text_address,
3943 			(long long)commpage_text_size,
3944 			(void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3945 	}
3946 
3947 	/*
3948 	 * Since we're here, we also pre-allocate some virtual space for the
3949 	 * Objective-C run-time, if needed...
3950 	 */
3951 	if (objc_size != 0) {
3952 		kr = mach_vm_map_kernel(
3953 			map,
3954 			vm_sanitize_wrap_addr_ref(&objc_address),
3955 			objc_size,
3956 			0,
3957 			vmk_flags,
3958 			IPC_PORT_NULL,
3959 			0,
3960 			FALSE,
3961 			VM_PROT_ALL,
3962 			VM_PROT_ALL,
3963 			VM_INHERIT_DEFAULT);
3964 		if (kr != KERN_SUCCESS) {
3965 			SHARED_REGION_TRACE_ERROR(
3966 				("commpage: enter(%p,0x%llx,0x%llx) "
3967 				"objc mapping failed 0x%x\n",
3968 				(void *)VM_KERNEL_ADDRPERM(map),
3969 				(long long)objc_address,
3970 				(long long)objc_size, kr));
3971 		}
3972 	}
3973 
3974 	SHARED_REGION_TRACE_DEBUG(
3975 		("commpage: enter(%p,%p) <- 0x%x\n",
3976 		(void *)VM_KERNEL_ADDRPERM(map),
3977 		(void *)VM_KERNEL_ADDRPERM(task), kr));
3978 	return kr;
3979 #endif
3980 }
3981 
3982 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3983 vm_shared_region_slide(
3984 	uint32_t slide,
3985 	mach_vm_offset_t        entry_start_address,
3986 	mach_vm_size_t          entry_size,
3987 	mach_vm_offset_t        slide_start,
3988 	mach_vm_size_t          slide_size,
3989 	mach_vm_offset_t        slid_mapping,
3990 	memory_object_control_t sr_file_control,
3991 	vm_prot_t               prot)
3992 {
3993 	vm_shared_region_t      sr;
3994 	kern_return_t           error;
3995 
3996 	SHARED_REGION_TRACE_DEBUG(
3997 		("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3998 		slide, entry_start_address, entry_size, slide_start, slide_size));
3999 
4000 	sr = vm_shared_region_get(current_task());
4001 	if (sr == NULL) {
4002 		printf("%s: no shared region?\n", __FUNCTION__);
4003 		SHARED_REGION_TRACE_DEBUG(
4004 			("vm_shared_region_slide: <- %d (no shared region)\n",
4005 			KERN_FAILURE));
4006 		return KERN_FAILURE;
4007 	}
4008 
4009 	/*
4010 	 * Protect from concurrent access.
4011 	 */
4012 	vm_shared_region_lock();
4013 	while (sr->sr_slide_in_progress) {
4014 		vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
4015 	}
4016 
4017 	sr->sr_slide_in_progress = current_thread();
4018 	vm_shared_region_unlock();
4019 
4020 	error = vm_shared_region_slide_mapping(sr,
4021 	    (user_addr_t)slide_start,
4022 	    slide_size,
4023 	    entry_start_address,
4024 	    entry_size,
4025 	    slid_mapping,
4026 	    slide,
4027 	    sr_file_control,
4028 	    prot);
4029 	if (error) {
4030 		printf("slide_info initialization failed with kr=%d\n", error);
4031 	}
4032 
4033 	vm_shared_region_lock();
4034 
4035 	assert(sr->sr_slide_in_progress == current_thread());
4036 	sr->sr_slide_in_progress = THREAD_NULL;
4037 	vm_shared_region_wakeup(&sr->sr_slide_in_progress);
4038 
4039 #if XNU_TARGET_OS_OSX
4040 	if (error == KERN_SUCCESS) {
4041 		shared_region_completed_slide = TRUE;
4042 	}
4043 #endif /* XNU_TARGET_OS_OSX */
4044 	vm_shared_region_unlock();
4045 
4046 	vm_shared_region_deallocate(sr);
4047 
4048 	SHARED_REGION_TRACE_DEBUG(
4049 		("vm_shared_region_slide: <- %d\n",
4050 		error));
4051 
4052 	return error;
4053 }
4054 
4055 /*
4056  * Used during Authenticated Root Volume macOS boot.
4057  * Launchd re-execs itself and wants the new launchd to use
4058  * the shared cache from the new root volume. This call
4059  * makes all the existing shared caches stale to allow
4060  * that to happen.
4061  */
4062 void
vm_shared_region_pivot(void)4063 vm_shared_region_pivot(void)
4064 {
4065 	vm_shared_region_t      shared_region = NULL;
4066 
4067 	vm_shared_region_lock();
4068 
4069 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
4070 		assert(shared_region->sr_ref_count > 0);
4071 		shared_region->sr_stale = TRUE;
4072 		if (shared_region->sr_timer_call) {
4073 			/*
4074 			 * We have a shared region ready to be destroyed
4075 			 * and just waiting for a delayed timer to fire.
4076 			 * Marking it stale cements its ineligibility to
4077 			 * be used ever again. So let's shorten the timer
4078 			 * aggressively down to 10 milliseconds and get rid of it.
4079 			 * This is a single quantum and we don't need to go
4080 			 * shorter than this duration. We want it to be short
4081 			 * enough, however, because we could have an unmount
4082 			 * of the volume hosting this shared region just behind
4083 			 * us.
4084 			 */
4085 			uint64_t deadline;
4086 			assert(shared_region->sr_ref_count == 1);
4087 
4088 			/*
4089 			 * Free the old timer call. Returns with a reference held.
4090 			 * If the old timer has fired and is waiting for the vm_shared_region_lock
4091 			 * lock, we will just return with an additional ref_count i.e. 2.
4092 			 * The old timer will then fire and just drop the ref count down to 1
4093 			 * with no other modifications.
4094 			 */
4095 			vm_shared_region_reference_locked(shared_region);
4096 
4097 			/* set up the timer. Keep the reference from above for this timer.*/
4098 			shared_region->sr_timer_call = thread_call_allocate(
4099 				(thread_call_func_t) vm_shared_region_timeout,
4100 				(thread_call_param_t) shared_region);
4101 
4102 			/* schedule the timer */
4103 			clock_interval_to_deadline(10, /* 10 milliseconds */
4104 			    NSEC_PER_MSEC,
4105 			    &deadline);
4106 			thread_call_enter_delayed(shared_region->sr_timer_call,
4107 			    deadline);
4108 
4109 			SHARED_REGION_TRACE_DEBUG(
4110 				("shared_region: pivot(%p): armed timer\n",
4111 				(void *)VM_KERNEL_ADDRPERM(shared_region)));
4112 		}
4113 	}
4114 
4115 	vm_shared_region_unlock();
4116 }
4117 
4118 /*
4119  * Routine to mark any non-standard slide shared cache region as stale.
4120  * This causes the next "reslide" spawn to create a new shared region.
4121  */
4122 void
vm_shared_region_reslide_stale(boolean_t driverkit)4123 vm_shared_region_reslide_stale(boolean_t driverkit)
4124 {
4125 #if __has_feature(ptrauth_calls)
4126 	vm_shared_region_t      shared_region = NULL;
4127 
4128 	vm_shared_region_lock();
4129 
4130 	queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
4131 		assert(shared_region->sr_ref_count > 0);
4132 		if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
4133 			shared_region->sr_stale = TRUE;
4134 			vm_shared_region_reslide_count++;
4135 		}
4136 	}
4137 
4138 	vm_shared_region_unlock();
4139 #else
4140 	(void)driverkit;
4141 #endif /* __has_feature(ptrauth_calls) */
4142 }
4143 
4144 /*
4145  * report if the task is using a reslide shared cache region.
4146  */
4147 bool
vm_shared_region_is_reslide(__unused struct task * task)4148 vm_shared_region_is_reslide(__unused struct task *task)
4149 {
4150 	bool is_reslide = FALSE;
4151 #if __has_feature(ptrauth_calls)
4152 	vm_shared_region_t sr = vm_shared_region_get(task);
4153 
4154 	if (sr != NULL) {
4155 		is_reslide = sr->sr_reslide;
4156 		vm_shared_region_deallocate(sr);
4157 	}
4158 #endif /* __has_feature(ptrauth_calls) */
4159 	return is_reslide;
4160 }
4161 
4162 /*
4163  * This is called from powermanagement code to let kernel know the current source of power.
4164  * 0 if it is external source (connected to power )
4165  * 1 if it is internal power source ie battery
4166  */
4167 void
4168 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)4169 post_sys_powersource(int i)
4170 #else /* XNU_TARGET_OS_OSX */
4171 post_sys_powersource(__unused int i)
4172 #endif /* XNU_TARGET_OS_OSX */
4173 {
4174 #if XNU_TARGET_OS_OSX
4175 	post_sys_powersource_internal(i, 0);
4176 #endif /* XNU_TARGET_OS_OSX */
4177 }
4178 
4179 
4180 #if XNU_TARGET_OS_OSX
4181 static void
post_sys_powersource_internal(int i,int internal)4182 post_sys_powersource_internal(int i, int internal)
4183 {
4184 	if (internal == 0) {
4185 		__system_power_source = i;
4186 	}
4187 }
4188 #endif /* XNU_TARGET_OS_OSX */
4189 
4190 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)4191 vm_shared_region_root_dir(
4192 	struct vm_shared_region *sr)
4193 {
4194 	void *vnode;
4195 
4196 	vm_shared_region_lock();
4197 	vnode = sr->sr_root_dir;
4198 	vm_shared_region_unlock();
4199 	return vnode;
4200 }
4201