1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109
110 #include <vm/vm_map.h>
111 #include <vm/vm_map_internal.h>
112 #include <vm/vm_shared_region.h>
113
114 #include <vm/vm_protos.h>
115
116 #include <machine/commpage.h>
117 #include <machine/cpu_capabilities.h>
118 #include <sys/random.h>
119 #include <sys/errno.h>
120
121 #if defined(__arm64__)
122 #include <arm/cpu_data_internal.h>
123 #include <arm/misc_protos.h>
124 #endif
125
126 /*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132 #if __has_feature(ptrauth_calls)
133 #include <ptrauth.h>
134 #endif /* __has_feature(ptrauth_calls) */
135
136 /* "dyld" uses this to figure out what the kernel supports */
137 int shared_region_version = 3;
138
139 /* trace level, output is sent to the system log file */
140 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142 /* should local (non-chroot) shared regions persist when no task uses them ? */
143 int shared_region_persistence = 0; /* no by default */
144
145
146 /* delay in seconds before reclaiming an unused shared region */
147 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149 /*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156 struct vm_shared_region *primary_system_shared_region = NULL;
157
158 #if XNU_TARGET_OS_OSX
159 /*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164 boolean_t shared_region_completed_slide = FALSE;
165 #endif /* XNU_TARGET_OS_OSX */
166
167 /* this lock protects all the shared region data structures */
168 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173 #define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 (interruptible))
178
179 /* the list of currently available shared regions (one per environment) */
180 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
181 int vm_shared_region_count = 0;
182 int vm_shared_region_peak = 0;
183 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
184
185 /*
186 * the number of times an event has forced the recalculation of the reslide
187 * shared region slide.
188 */
189 #if __has_feature(ptrauth_calls)
190 int vm_shared_region_reslide_count = 0;
191 #endif /* __has_feature(ptrauth_calls) */
192
193 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
194 static vm_shared_region_t vm_shared_region_create(
195 void *root_dir,
196 cpu_type_t cputype,
197 cpu_subtype_t cpu_subtype,
198 boolean_t is_64bit,
199 int target_page_shift,
200 boolean_t reslide,
201 boolean_t is_driverkit,
202 uint32_t rsr_version);
203 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
204
205 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
206 static void vm_shared_region_timeout(thread_call_param_t param0,
207 thread_call_param_t param1);
208 static kern_return_t vm_shared_region_slide_mapping(
209 vm_shared_region_t sr,
210 user_addr_t slide_info_addr,
211 mach_vm_size_t slide_info_size,
212 mach_vm_offset_t start,
213 mach_vm_size_t size,
214 mach_vm_offset_t slid_mapping,
215 uint32_t slide,
216 memory_object_control_t,
217 vm_prot_t prot); /* forward */
218
219 static int __commpage_setup = 0;
220 #if XNU_TARGET_OS_OSX
221 static int __system_power_source = 1; /* init to extrnal power source */
222 static void post_sys_powersource_internal(int i, int internal);
223 #endif /* XNU_TARGET_OS_OSX */
224
225 extern u_int32_t random(void);
226
227 /*
228 * Retrieve a task's shared region and grab an extra reference to
229 * make sure it doesn't disappear while the caller is using it.
230 * The caller is responsible for consuming that extra reference if
231 * necessary.
232 */
233 vm_shared_region_t
vm_shared_region_get(task_t task)234 vm_shared_region_get(
235 task_t task)
236 {
237 vm_shared_region_t shared_region;
238
239 SHARED_REGION_TRACE_DEBUG(
240 ("shared_region: -> get(%p)\n",
241 (void *)VM_KERNEL_ADDRPERM(task)));
242
243 task_lock(task);
244 vm_shared_region_lock();
245 shared_region = task->shared_region;
246 if (shared_region) {
247 assert(shared_region->sr_ref_count > 0);
248 vm_shared_region_reference_locked(shared_region);
249 }
250 vm_shared_region_unlock();
251 task_unlock(task);
252
253 SHARED_REGION_TRACE_DEBUG(
254 ("shared_region: get(%p) <- %p\n",
255 (void *)VM_KERNEL_ADDRPERM(task),
256 (void *)VM_KERNEL_ADDRPERM(shared_region)));
257
258 return shared_region;
259 }
260
261 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)262 vm_shared_region_vm_map(
263 vm_shared_region_t shared_region)
264 {
265 ipc_port_t sr_handle;
266 vm_named_entry_t sr_mem_entry;
267 vm_map_t sr_map;
268
269 SHARED_REGION_TRACE_DEBUG(
270 ("shared_region: -> vm_map(%p)\n",
271 (void *)VM_KERNEL_ADDRPERM(shared_region)));
272 assert(shared_region->sr_ref_count > 0);
273
274 sr_handle = shared_region->sr_mem_entry;
275 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
276 sr_map = sr_mem_entry->backing.map;
277 assert(sr_mem_entry->is_sub_map);
278
279 SHARED_REGION_TRACE_DEBUG(
280 ("shared_region: vm_map(%p) <- %p\n",
281 (void *)VM_KERNEL_ADDRPERM(shared_region),
282 (void *)VM_KERNEL_ADDRPERM(sr_map)));
283 return sr_map;
284 }
285
286 /*
287 * Set the shared region the process should use.
288 * A NULL new shared region means that we just want to release the old
289 * shared region.
290 * The caller should already have an extra reference on the new shared region
291 * (if any). We release a reference on the old shared region (if any).
292 */
293 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)294 vm_shared_region_set(
295 task_t task,
296 vm_shared_region_t new_shared_region)
297 {
298 vm_shared_region_t old_shared_region;
299
300 SHARED_REGION_TRACE_DEBUG(
301 ("shared_region: -> set(%p, %p)\n",
302 (void *)VM_KERNEL_ADDRPERM(task),
303 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
304
305 task_lock(task);
306 vm_shared_region_lock();
307
308 old_shared_region = task->shared_region;
309 if (new_shared_region) {
310 assert(new_shared_region->sr_ref_count > 0);
311 }
312
313 task->shared_region = new_shared_region;
314
315 vm_shared_region_unlock();
316 task_unlock(task);
317
318 if (old_shared_region) {
319 assert(old_shared_region->sr_ref_count > 0);
320 vm_shared_region_deallocate(old_shared_region);
321 }
322
323 SHARED_REGION_TRACE_DEBUG(
324 ("shared_region: set(%p) <- old=%p new=%p\n",
325 (void *)VM_KERNEL_ADDRPERM(task),
326 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
327 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
328 }
329
330 /*
331 * New arm64 shared regions match with an existing arm64e region.
332 * They just get a private non-authenticating pager.
333 */
334 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)335 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
336 {
337 if (exist == new) {
338 return true;
339 }
340 if (cputype == CPU_TYPE_ARM64 &&
341 exist == CPU_SUBTYPE_ARM64E &&
342 new == CPU_SUBTYPE_ARM64_ALL) {
343 return true;
344 }
345 return false;
346 }
347
348
349 /*
350 * Lookup up the shared region for the desired environment.
351 * If none is found, create a new (empty) one.
352 * Grab an extra reference on the returned shared region, to make sure
353 * it doesn't get destroyed before the caller is done with it. The caller
354 * is responsible for consuming that extra reference if necessary.
355 */
356 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)357 vm_shared_region_lookup(
358 void *root_dir,
359 cpu_type_t cputype,
360 cpu_subtype_t cpu_subtype,
361 boolean_t is_64bit,
362 int target_page_shift,
363 boolean_t reslide,
364 boolean_t is_driverkit,
365 uint32_t rsr_version)
366 {
367 vm_shared_region_t shared_region;
368 vm_shared_region_t new_shared_region;
369
370 SHARED_REGION_TRACE_DEBUG(
371 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
372 (void *)VM_KERNEL_ADDRPERM(root_dir),
373 cputype, cpu_subtype, is_64bit, target_page_shift,
374 reslide, is_driverkit));
375
376 shared_region = NULL;
377 new_shared_region = NULL;
378
379 vm_shared_region_lock();
380 for (;;) {
381 queue_iterate(&vm_shared_region_queue,
382 shared_region,
383 vm_shared_region_t,
384 sr_q) {
385 assert(shared_region->sr_ref_count > 0);
386 if (shared_region->sr_cpu_type == cputype &&
387 match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
388 shared_region->sr_root_dir == root_dir &&
389 shared_region->sr_64bit == is_64bit &&
390 #if __ARM_MIXED_PAGE_SIZE__
391 shared_region->sr_page_shift == target_page_shift &&
392 #endif /* __ARM_MIXED_PAGE_SIZE__ */
393 #if __has_feature(ptrauth_calls)
394 shared_region->sr_reslide == reslide &&
395 #endif /* __has_feature(ptrauth_calls) */
396 shared_region->sr_driverkit == is_driverkit &&
397 shared_region->sr_rsr_version == rsr_version &&
398 !shared_region->sr_stale) {
399 /* found a match ! */
400 vm_shared_region_reference_locked(shared_region);
401 goto done;
402 }
403 }
404 if (new_shared_region == NULL) {
405 /* no match: create a new one */
406 vm_shared_region_unlock();
407 new_shared_region = vm_shared_region_create(root_dir,
408 cputype,
409 cpu_subtype,
410 is_64bit,
411 target_page_shift,
412 reslide,
413 is_driverkit,
414 rsr_version);
415 /* do the lookup again, in case we lost a race */
416 vm_shared_region_lock();
417 continue;
418 }
419 /* still no match: use our new one */
420 shared_region = new_shared_region;
421 new_shared_region = NULL;
422 uint32_t newid = ++vm_shared_region_lastid;
423 if (newid == 0) {
424 panic("shared_region: vm_shared_region_lastid wrapped");
425 }
426 shared_region->sr_id = newid;
427 shared_region->sr_install_time = mach_absolute_time();
428 queue_enter(&vm_shared_region_queue,
429 shared_region,
430 vm_shared_region_t,
431 sr_q);
432 vm_shared_region_count++;
433 if (vm_shared_region_count > vm_shared_region_peak) {
434 vm_shared_region_peak = vm_shared_region_count;
435 }
436 break;
437 }
438
439 done:
440 vm_shared_region_unlock();
441
442 if (new_shared_region) {
443 /*
444 * We lost a race with someone else to create a new shared
445 * region for that environment. Get rid of our unused one.
446 */
447 assert(new_shared_region->sr_ref_count == 1);
448 new_shared_region->sr_ref_count--;
449 vm_shared_region_destroy(new_shared_region);
450 new_shared_region = NULL;
451 }
452
453 SHARED_REGION_TRACE_DEBUG(
454 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
455 (void *)VM_KERNEL_ADDRPERM(root_dir),
456 cputype, cpu_subtype, is_64bit, target_page_shift,
457 reslide, is_driverkit,
458 (void *)VM_KERNEL_ADDRPERM(shared_region)));
459
460 assert(shared_region->sr_ref_count > 0);
461 return shared_region;
462 }
463
464 /*
465 * Take an extra reference on a shared region.
466 * The vm_shared_region_lock should already be held by the caller.
467 */
468 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)469 vm_shared_region_reference_locked(
470 vm_shared_region_t shared_region)
471 {
472 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
473
474 SHARED_REGION_TRACE_DEBUG(
475 ("shared_region: -> reference_locked(%p)\n",
476 (void *)VM_KERNEL_ADDRPERM(shared_region)));
477 assert(shared_region->sr_ref_count > 0);
478 shared_region->sr_ref_count++;
479 assert(shared_region->sr_ref_count != 0);
480
481 if (shared_region->sr_timer_call != NULL) {
482 boolean_t cancelled;
483
484 /* cancel and free any pending timeout */
485 cancelled = thread_call_cancel(shared_region->sr_timer_call);
486 if (cancelled) {
487 thread_call_free(shared_region->sr_timer_call);
488 shared_region->sr_timer_call = NULL;
489 /* release the reference held by the cancelled timer */
490 shared_region->sr_ref_count--;
491 } else {
492 /* the timer will drop the reference and free itself */
493 }
494 }
495
496 SHARED_REGION_TRACE_DEBUG(
497 ("shared_region: reference_locked(%p) <- %d\n",
498 (void *)VM_KERNEL_ADDRPERM(shared_region),
499 shared_region->sr_ref_count));
500 }
501
502 /*
503 * Take a reference on a shared region.
504 */
505 void
vm_shared_region_reference(vm_shared_region_t shared_region)506 vm_shared_region_reference(vm_shared_region_t shared_region)
507 {
508 SHARED_REGION_TRACE_DEBUG(
509 ("shared_region: -> reference(%p)\n",
510 (void *)VM_KERNEL_ADDRPERM(shared_region)));
511
512 vm_shared_region_lock();
513 vm_shared_region_reference_locked(shared_region);
514 vm_shared_region_unlock();
515
516 SHARED_REGION_TRACE_DEBUG(
517 ("shared_region: reference(%p) <- %d\n",
518 (void *)VM_KERNEL_ADDRPERM(shared_region),
519 shared_region->sr_ref_count));
520 }
521
522 /*
523 * Release a reference on the shared region.
524 * Destroy it if there are no references left.
525 */
526 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)527 vm_shared_region_deallocate(
528 vm_shared_region_t shared_region)
529 {
530 SHARED_REGION_TRACE_DEBUG(
531 ("shared_region: -> deallocate(%p)\n",
532 (void *)VM_KERNEL_ADDRPERM(shared_region)));
533
534 vm_shared_region_lock();
535
536 assert(shared_region->sr_ref_count > 0);
537
538 if (shared_region->sr_root_dir == NULL) {
539 /*
540 * Local (i.e. based on the boot volume) shared regions
541 * can persist or not based on the "shared_region_persistence"
542 * sysctl.
543 * Make sure that this one complies.
544 *
545 * See comments in vm_shared_region_slide() for notes about
546 * shared regions we have slid (which are not torn down currently).
547 */
548 if (shared_region_persistence &&
549 !shared_region->sr_persists) {
550 /* make this one persistent */
551 shared_region->sr_ref_count++;
552 shared_region->sr_persists = TRUE;
553 } else if (!shared_region_persistence &&
554 shared_region->sr_persists) {
555 /* make this one no longer persistent */
556 assert(shared_region->sr_ref_count > 1);
557 shared_region->sr_ref_count--;
558 shared_region->sr_persists = FALSE;
559 }
560 }
561
562 assert(shared_region->sr_ref_count > 0);
563 shared_region->sr_ref_count--;
564 SHARED_REGION_TRACE_DEBUG(
565 ("shared_region: deallocate(%p): ref now %d\n",
566 (void *)VM_KERNEL_ADDRPERM(shared_region),
567 shared_region->sr_ref_count));
568
569 if (shared_region->sr_ref_count == 0) {
570 uint64_t deadline;
571
572 /*
573 * Even though a shared region is unused, delay a while before
574 * tearing it down, in case a new app launch can use it.
575 * We don't keep around stale shared regions, nor older RSR ones.
576 */
577 if (shared_region->sr_timer_call == NULL &&
578 shared_region_destroy_delay != 0 &&
579 !shared_region->sr_stale &&
580 !(shared_region->sr_rsr_version != 0 &&
581 shared_region->sr_rsr_version != rsr_get_version())) {
582 /* hold one reference for the timer */
583 assert(!shared_region->sr_mapping_in_progress);
584 shared_region->sr_ref_count++;
585
586 /* set up the timer */
587 shared_region->sr_timer_call = thread_call_allocate(
588 (thread_call_func_t) vm_shared_region_timeout,
589 (thread_call_param_t) shared_region);
590
591 /* schedule the timer */
592 clock_interval_to_deadline(shared_region_destroy_delay,
593 NSEC_PER_SEC,
594 &deadline);
595 thread_call_enter_delayed(shared_region->sr_timer_call,
596 deadline);
597
598 SHARED_REGION_TRACE_DEBUG(
599 ("shared_region: deallocate(%p): armed timer\n",
600 (void *)VM_KERNEL_ADDRPERM(shared_region)));
601
602 vm_shared_region_unlock();
603 } else {
604 /* timer expired: let go of this shared region */
605
606 /* Make sure there's no cached pointer to the region. */
607 if (primary_system_shared_region == shared_region) {
608 primary_system_shared_region = NULL;
609 }
610
611 /*
612 * Remove it from the queue first, so no one can find
613 * it...
614 */
615 queue_remove(&vm_shared_region_queue,
616 shared_region,
617 vm_shared_region_t,
618 sr_q);
619 vm_shared_region_count--;
620 vm_shared_region_unlock();
621
622 /* ... and destroy it */
623 vm_shared_region_destroy(shared_region);
624 shared_region = NULL;
625 }
626 } else {
627 vm_shared_region_unlock();
628 }
629
630 SHARED_REGION_TRACE_DEBUG(
631 ("shared_region: deallocate(%p) <-\n",
632 (void *)VM_KERNEL_ADDRPERM(shared_region)));
633 }
634
635 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)636 vm_shared_region_timeout(
637 thread_call_param_t param0,
638 __unused thread_call_param_t param1)
639 {
640 vm_shared_region_t shared_region;
641
642 shared_region = (vm_shared_region_t) param0;
643
644 vm_shared_region_deallocate(shared_region);
645 }
646
647
648 /*
649 * Create a new (empty) shared region for a new environment.
650 */
651 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)652 vm_shared_region_create(
653 void *root_dir,
654 cpu_type_t cputype,
655 cpu_subtype_t cpu_subtype,
656 boolean_t is_64bit,
657 int target_page_shift,
658 #if !__has_feature(ptrauth_calls)
659 __unused
660 #endif /* __has_feature(ptrauth_calls) */
661 boolean_t reslide,
662 boolean_t is_driverkit,
663 uint32_t rsr_version)
664 {
665 vm_named_entry_t mem_entry;
666 ipc_port_t mem_entry_port;
667 vm_shared_region_t shared_region;
668 vm_map_t sub_map;
669 mach_vm_offset_t base_address, pmap_nesting_start;
670 mach_vm_size_t size, pmap_nesting_size;
671
672 SHARED_REGION_TRACE_INFO(
673 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
674 (void *)VM_KERNEL_ADDRPERM(root_dir),
675 cputype, cpu_subtype, is_64bit, target_page_shift,
676 reslide, is_driverkit));
677
678 base_address = 0;
679 size = 0;
680 mem_entry = NULL;
681 mem_entry_port = IPC_PORT_NULL;
682 sub_map = VM_MAP_NULL;
683
684 /* create a new shared region structure... */
685 shared_region = kalloc_type(struct vm_shared_region,
686 Z_WAITOK | Z_NOFAIL);
687
688 /* figure out the correct settings for the desired environment */
689 if (is_64bit) {
690 switch (cputype) {
691 #if defined(__arm64__)
692 case CPU_TYPE_ARM64:
693 base_address = SHARED_REGION_BASE_ARM64;
694 size = SHARED_REGION_SIZE_ARM64;
695 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
696 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
697 break;
698 #else
699 case CPU_TYPE_I386:
700 base_address = SHARED_REGION_BASE_X86_64;
701 size = SHARED_REGION_SIZE_X86_64;
702 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
703 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
704 break;
705 case CPU_TYPE_POWERPC:
706 base_address = SHARED_REGION_BASE_PPC64;
707 size = SHARED_REGION_SIZE_PPC64;
708 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
709 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
710 break;
711 #endif
712 default:
713 SHARED_REGION_TRACE_ERROR(
714 ("shared_region: create: unknown cpu type %d\n",
715 cputype));
716 kfree_type(struct vm_shared_region, shared_region);
717 shared_region = NULL;
718 goto done;
719 }
720 } else {
721 switch (cputype) {
722 #if defined(__arm64__)
723 case CPU_TYPE_ARM:
724 base_address = SHARED_REGION_BASE_ARM;
725 size = SHARED_REGION_SIZE_ARM;
726 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
727 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
728 break;
729 #else
730 case CPU_TYPE_I386:
731 base_address = SHARED_REGION_BASE_I386;
732 size = SHARED_REGION_SIZE_I386;
733 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
734 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
735 break;
736 case CPU_TYPE_POWERPC:
737 base_address = SHARED_REGION_BASE_PPC;
738 size = SHARED_REGION_SIZE_PPC;
739 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
740 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
741 break;
742 #endif
743 default:
744 SHARED_REGION_TRACE_ERROR(
745 ("shared_region: create: unknown cpu type %d\n",
746 cputype));
747 kfree_type(struct vm_shared_region, shared_region);
748 shared_region = NULL;
749 goto done;
750 }
751 }
752
753 /* create a memory entry structure and a Mach port handle */
754 mem_entry = mach_memory_entry_allocate(&mem_entry_port);
755
756 #if defined(__arm64__)
757 {
758 struct pmap *pmap_nested;
759 int pmap_flags = 0;
760 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
761
762
763 #if __ARM_MIXED_PAGE_SIZE__
764 if (cputype == CPU_TYPE_ARM64 &&
765 target_page_shift == FOURK_PAGE_SHIFT) {
766 /* arm64/4k address space */
767 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
768 }
769 #endif /* __ARM_MIXED_PAGE_SIZE__ */
770
771 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
772 if (pmap_nested != PMAP_NULL) {
773 pmap_set_nested(pmap_nested);
774 sub_map = vm_map_create_options(pmap_nested, 0,
775 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
776
777 if (is_64bit ||
778 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
779 /* enforce 16KB alignment of VM map entries */
780 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
781 }
782 #if __ARM_MIXED_PAGE_SIZE__
783 if (cputype == CPU_TYPE_ARM64 &&
784 target_page_shift == FOURK_PAGE_SHIFT) {
785 /* arm64/4k address space */
786 vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
787 }
788 #endif /* __ARM_MIXED_PAGE_SIZE__ */
789 } else {
790 sub_map = VM_MAP_NULL;
791 }
792 }
793 #else /* defined(__arm64__) */
794 {
795 /* create a VM sub map and its pmap */
796 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
797 if (pmap != NULL) {
798 sub_map = vm_map_create_options(pmap, 0,
799 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
800 } else {
801 sub_map = VM_MAP_NULL;
802 }
803 }
804 #endif /* defined(__arm64__) */
805 if (sub_map == VM_MAP_NULL) {
806 ipc_port_release_send(mem_entry_port);
807 kfree_type(struct vm_shared_region, shared_region);
808 shared_region = NULL;
809 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
810 goto done;
811 }
812
813 /* shared regions should always enforce code-signing */
814 vm_map_cs_enforcement_set(sub_map, true);
815 assert(vm_map_cs_enforcement(sub_map));
816 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
817
818 assert(!sub_map->disable_vmentry_reuse);
819 sub_map->is_nested_map = TRUE;
820
821 /* make the memory entry point to the VM sub map */
822 mem_entry->is_sub_map = TRUE;
823 mem_entry->backing.map = sub_map;
824 mem_entry->size = size;
825 mem_entry->protection = VM_PROT_ALL;
826
827 /* make the shared region point at the memory entry */
828 shared_region->sr_mem_entry = mem_entry_port;
829
830 /* fill in the shared region's environment and settings */
831 shared_region->sr_base_address = base_address;
832 shared_region->sr_size = size;
833 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
834 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
835 shared_region->sr_cpu_type = cputype;
836 shared_region->sr_cpu_subtype = cpu_subtype;
837 shared_region->sr_64bit = (uint8_t)is_64bit;
838 #if __ARM_MIXED_PAGE_SIZE__
839 shared_region->sr_page_shift = (uint8_t)target_page_shift;
840 #endif /* __ARM_MIXED_PAGE_SIZE__ */
841 shared_region->sr_driverkit = (uint8_t)is_driverkit;
842 shared_region->sr_rsr_version = rsr_version;
843 shared_region->sr_root_dir = root_dir;
844
845 queue_init(&shared_region->sr_q);
846 shared_region->sr_mapping_in_progress = FALSE;
847 shared_region->sr_slide_in_progress = FALSE;
848 shared_region->sr_persists = FALSE;
849 shared_region->sr_stale = FALSE;
850 shared_region->sr_timer_call = NULL;
851 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
852
853 /* grab a reference for the caller */
854 shared_region->sr_ref_count = 1;
855
856 shared_region->sr_slide = 0; /* not slid yet */
857
858 /* Initialize UUID and other metadata */
859 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
860 shared_region->sr_uuid_copied = FALSE;
861 shared_region->sr_images_count = 0;
862 shared_region->sr_images = NULL;
863 #if __has_feature(ptrauth_calls)
864 shared_region->sr_reslide = reslide;
865 shared_region->sr_num_auth_section = 0;
866 shared_region->sr_next_auth_section = 0;
867 shared_region->sr_auth_section = NULL;
868 #endif /* __has_feature(ptrauth_calls) */
869
870 done:
871 if (shared_region) {
872 SHARED_REGION_TRACE_INFO(
873 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
874 "base=0x%llx,size=0x%llx) <- "
875 "%p mem=(%p,%p) map=%p pmap=%p\n",
876 (void *)VM_KERNEL_ADDRPERM(root_dir),
877 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
878 (long long)base_address,
879 (long long)size,
880 (void *)VM_KERNEL_ADDRPERM(shared_region),
881 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
882 (void *)VM_KERNEL_ADDRPERM(mem_entry),
883 (void *)VM_KERNEL_ADDRPERM(sub_map),
884 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
885 } else {
886 SHARED_REGION_TRACE_INFO(
887 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
888 "base=0x%llx,size=0x%llx) <- NULL",
889 (void *)VM_KERNEL_ADDRPERM(root_dir),
890 cputype, cpu_subtype, is_64bit, is_driverkit,
891 (long long)base_address,
892 (long long)size));
893 }
894 return shared_region;
895 }
896
897 /*
898 * Destroy a now-unused shared region.
899 * The shared region is no longer in the queue and can not be looked up.
900 */
901 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)902 vm_shared_region_destroy(
903 vm_shared_region_t shared_region)
904 {
905 vm_named_entry_t mem_entry;
906 vm_map_t map;
907
908 SHARED_REGION_TRACE_INFO(
909 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
910 (void *)VM_KERNEL_ADDRPERM(shared_region),
911 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
912 shared_region->sr_cpu_type,
913 shared_region->sr_cpu_subtype,
914 shared_region->sr_64bit,
915 shared_region->sr_driverkit));
916
917 assert(shared_region->sr_ref_count == 0);
918 assert(!shared_region->sr_persists);
919
920 mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
921 assert(mem_entry->is_sub_map);
922 assert(!mem_entry->internal);
923 assert(!mem_entry->is_copy);
924 map = mem_entry->backing.map;
925
926 /*
927 * Clean up the pmap first. The virtual addresses that were
928 * entered in this possibly "nested" pmap may have different values
929 * than the VM map's min and max offsets, if the VM sub map was
930 * mapped at a non-zero offset in the processes' main VM maps, which
931 * is usually the case, so the clean-up we do in vm_map_destroy() would
932 * not be enough.
933 */
934 if (map->pmap) {
935 pmap_remove(map->pmap,
936 (vm_map_offset_t)shared_region->sr_base_address,
937 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
938 }
939
940 /*
941 * Release our (one and only) handle on the memory entry.
942 * This will generate a no-senders notification, which will be processed
943 * by ipc_kobject_notify_no_senders(), which will release the one and only
944 * reference on the memory entry and cause it to be destroyed, along
945 * with the VM sub map and its pmap.
946 */
947 mach_memory_entry_port_release(shared_region->sr_mem_entry);
948 mem_entry = NULL;
949 shared_region->sr_mem_entry = IPC_PORT_NULL;
950
951 if (shared_region->sr_timer_call) {
952 thread_call_free(shared_region->sr_timer_call);
953 }
954
955 #if __has_feature(ptrauth_calls)
956 /*
957 * Free the cached copies of slide_info for the AUTH regions.
958 */
959 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
960 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
961 if (si != NULL) {
962 vm_object_deallocate(si->si_slide_object);
963 kfree_data(si->si_slide_info_entry,
964 si->si_slide_info_size);
965 kfree_type(struct vm_shared_region_slide_info, si);
966 shared_region->sr_auth_section[i] = NULL;
967 }
968 }
969 if (shared_region->sr_auth_section != NULL) {
970 assert(shared_region->sr_num_auth_section > 0);
971 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
972 shared_region->sr_auth_section = NULL;
973 shared_region->sr_num_auth_section = 0;
974 }
975 #endif /* __has_feature(ptrauth_calls) */
976
977 /* release the shared region structure... */
978 kfree_type(struct vm_shared_region, shared_region);
979
980 SHARED_REGION_TRACE_DEBUG(
981 ("shared_region: destroy(%p) <-\n",
982 (void *)VM_KERNEL_ADDRPERM(shared_region)));
983 shared_region = NULL;
984 }
985
986 /*
987 * Gets the address of the first (in time) mapping in the shared region.
988 * If used during initial task setup by dyld, task should non-NULL.
989 */
990 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)991 vm_shared_region_start_address(
992 vm_shared_region_t shared_region,
993 mach_vm_offset_t *start_address,
994 task_t task)
995 {
996 kern_return_t kr;
997 mach_vm_offset_t sr_base_address;
998 mach_vm_offset_t sr_first_mapping;
999
1000 SHARED_REGION_TRACE_DEBUG(
1001 ("shared_region: -> start_address(%p)\n",
1002 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1003
1004 vm_shared_region_lock();
1005
1006 /*
1007 * Wait if there's another thread establishing a mapping
1008 * in this shared region right when we're looking at it.
1009 * We want a consistent view of the map...
1010 */
1011 while (shared_region->sr_mapping_in_progress) {
1012 /* wait for our turn... */
1013 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1014 THREAD_UNINT);
1015 }
1016 assert(!shared_region->sr_mapping_in_progress);
1017 assert(shared_region->sr_ref_count > 0);
1018
1019 sr_base_address = shared_region->sr_base_address;
1020 sr_first_mapping = shared_region->sr_first_mapping;
1021
1022 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1023 /* shared region is empty */
1024 kr = KERN_INVALID_ADDRESS;
1025 } else {
1026 kr = KERN_SUCCESS;
1027 *start_address = sr_base_address + sr_first_mapping;
1028 }
1029
1030
1031 uint32_t slide = shared_region->sr_slide;
1032
1033 vm_shared_region_unlock();
1034
1035 /*
1036 * Cache shared region info in the task for telemetry gathering, if we're
1037 * passed in the task. No task lock here as we're still in intial task set up.
1038 */
1039 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1040 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1041 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1042 (char *)&task->task_shared_region_uuid,
1043 sizeof(task->task_shared_region_uuid)) == 0) {
1044 task->task_shared_region_slide = slide;
1045 }
1046 }
1047
1048 SHARED_REGION_TRACE_DEBUG(
1049 ("shared_region: start_address(%p) <- 0x%llx\n",
1050 (void *)VM_KERNEL_ADDRPERM(shared_region),
1051 (long long)shared_region->sr_base_address));
1052
1053 return kr;
1054 }
1055
1056 /*
1057 * Look up a pre-existing mapping in shared region, for replacement.
1058 * Takes an extra object reference if found.
1059 */
1060 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1061 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1062 {
1063 vm_map_entry_t found;
1064
1065 /* find the shared region's map entry to slide */
1066 vm_map_lock_read(map);
1067 if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1068 /* no mapping there */
1069 vm_map_unlock(map);
1070 return KERN_INVALID_ARGUMENT;
1071 }
1072
1073 *entry = *found;
1074 /* extra ref to keep object alive while map is unlocked */
1075 vm_object_reference(VME_OBJECT(found));
1076 vm_map_unlock_read(map);
1077 return KERN_SUCCESS;
1078 }
1079
1080 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1081 shared_region_make_permanent(
1082 vm_shared_region_t sr,
1083 vm_prot_t max_prot)
1084 {
1085 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1086 return false;
1087 }
1088 if (max_prot & VM_PROT_WRITE) {
1089 /*
1090 * Potentially writable mapping: no major issue with allowing
1091 * it to be replaced since its contents could be modified
1092 * anyway.
1093 */
1094 return false;
1095 }
1096 if (max_prot & VM_PROT_EXECUTE) {
1097 /*
1098 * Potentially executable mapping: some software might want
1099 * to try and replace it to interpose their own code when a
1100 * given routine is called or returns, for example.
1101 * So let's not make it "permanent".
1102 */
1103 return false;
1104 }
1105 /*
1106 * Make this mapping "permanent" to prevent it from being deleted
1107 * and/or replaced with another mapping.
1108 */
1109 return true;
1110 }
1111
1112 static bool
shared_region_tpro_protect(vm_shared_region_t sr,vm_prot_t max_prot __unused)1113 shared_region_tpro_protect(
1114 vm_shared_region_t sr,
1115 vm_prot_t max_prot __unused)
1116 {
1117 if (sr->sr_cpu_type != CPU_TYPE_ARM64 ||
1118 (sr->sr_cpu_subtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) {
1119 return false;
1120 }
1121
1122
1123 /*
1124 * Unless otherwise explicitly requested all other mappings do not get
1125 * TPRO protection.
1126 */
1127 return false;
1128 }
1129
1130 #if __has_feature(ptrauth_calls)
1131
1132 /*
1133 * Determine if this task is actually using pointer signing.
1134 */
1135 static boolean_t
task_sign_pointers(task_t task)1136 task_sign_pointers(task_t task)
1137 {
1138 if (task->map &&
1139 task->map->pmap &&
1140 !task->map->pmap->disable_jop) {
1141 return TRUE;
1142 }
1143 return FALSE;
1144 }
1145
1146 /*
1147 * If the shared region contains mappings that are authenticated, then
1148 * remap them into the task private map.
1149 *
1150 * Failures are possible in this routine when jetsam kills a process
1151 * just as dyld is trying to set it up. The vm_map and task shared region
1152 * info get torn down w/o waiting for this thread to finish up.
1153 */
1154 __attribute__((noinline))
1155 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1156 vm_shared_region_auth_remap(vm_shared_region_t sr)
1157 {
1158 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1159 task_t task = current_task();
1160 vm_shared_region_slide_info_t si;
1161 uint_t i;
1162 vm_object_t object;
1163 vm_map_t sr_map;
1164 struct vm_map_entry tmp_entry_store = {0};
1165 vm_map_entry_t tmp_entry = NULL;
1166 vm_map_kernel_flags_t vmk_flags;
1167 vm_map_offset_t map_addr;
1168 kern_return_t kr = KERN_SUCCESS;
1169 boolean_t use_ptr_auth = task_sign_pointers(task);
1170
1171 /*
1172 * Don't do this more than once and avoid any race conditions in finishing it.
1173 */
1174 vm_shared_region_lock();
1175 while (sr->sr_mapping_in_progress) {
1176 /* wait for our turn... */
1177 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1178 }
1179 assert(!sr->sr_mapping_in_progress);
1180 assert(sr->sr_ref_count > 0);
1181
1182 /* Just return if already done. */
1183 if (task->shared_region_auth_remapped) {
1184 vm_shared_region_unlock();
1185 return KERN_SUCCESS;
1186 }
1187
1188 /* let others know to wait while we're working in this shared region */
1189 sr->sr_mapping_in_progress = TRUE;
1190 vm_shared_region_unlock();
1191
1192 /*
1193 * Remap any sections with pointer authentications into the private map.
1194 */
1195 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1196 si = sr->sr_auth_section[i];
1197 assert(si != NULL);
1198 assert(si->si_ptrauth);
1199
1200 /*
1201 * We have mapping that needs to be private.
1202 * Look for an existing slid mapping's pager with matching
1203 * object, offset, slide info and shared_region_id to reuse.
1204 */
1205 object = si->si_slide_object;
1206 sr_pager = shared_region_pager_match(object, si->si_start, si,
1207 use_ptr_auth ? task->jop_pid : 0);
1208 if (sr_pager == MEMORY_OBJECT_NULL) {
1209 printf("%s(): shared_region_pager_match() failed\n", __func__);
1210 kr = KERN_FAILURE;
1211 goto done;
1212 }
1213
1214 /*
1215 * verify matching jop_pid for this task and this pager
1216 */
1217 if (use_ptr_auth) {
1218 shared_region_pager_match_task_key(sr_pager, task);
1219 }
1220
1221 sr_map = vm_shared_region_vm_map(sr);
1222 tmp_entry = NULL;
1223
1224 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1225 if (kr != KERN_SUCCESS) {
1226 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1227 goto done;
1228 }
1229 tmp_entry = &tmp_entry_store;
1230
1231 /*
1232 * Check that the object exactly covers the region to slide.
1233 */
1234 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1235 printf("%s(): doesn't fully cover\n", __func__);
1236 kr = KERN_FAILURE;
1237 goto done;
1238 }
1239
1240 /*
1241 * map the pager over the portion of the mapping that needs sliding
1242 */
1243 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1244 vmk_flags.vmkf_overwrite_immutable = true;
1245 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1246 tmp_entry->max_protection);
1247
1248 map_addr = si->si_slid_address;
1249 kr = vm_map_enter_mem_object(task->map,
1250 &map_addr,
1251 si->si_end - si->si_start,
1252 (mach_vm_offset_t) 0,
1253 vmk_flags,
1254 (ipc_port_t)(uintptr_t) sr_pager,
1255 0,
1256 TRUE,
1257 tmp_entry->protection,
1258 tmp_entry->max_protection,
1259 tmp_entry->inheritance);
1260 memory_object_deallocate(sr_pager);
1261 sr_pager = MEMORY_OBJECT_NULL;
1262 if (kr != KERN_SUCCESS) {
1263 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1264 goto done;
1265 }
1266 assertf(map_addr == si->si_slid_address,
1267 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1268 (uint64_t)map_addr,
1269 (uint64_t)si->si_slid_address,
1270 tmp_entry);
1271
1272 /* Drop the ref count grabbed by find_mapping_to_slide */
1273 vm_object_deallocate(VME_OBJECT(tmp_entry));
1274 tmp_entry = NULL;
1275 }
1276
1277 done:
1278 if (tmp_entry) {
1279 /* Drop the ref count grabbed by find_mapping_to_slide */
1280 vm_object_deallocate(VME_OBJECT(tmp_entry));
1281 tmp_entry = NULL;
1282 }
1283
1284 /*
1285 * Drop any extra reference to the pager in case we're quitting due to an error above.
1286 */
1287 if (sr_pager != MEMORY_OBJECT_NULL) {
1288 memory_object_deallocate(sr_pager);
1289 }
1290
1291 /*
1292 * Mark the region as having it's auth sections remapped.
1293 */
1294 vm_shared_region_lock();
1295 task->shared_region_auth_remapped = TRUE;
1296 sr->sr_mapping_in_progress = FALSE;
1297 thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1298 vm_shared_region_unlock();
1299 return kr;
1300 }
1301 #endif /* __has_feature(ptrauth_calls) */
1302
1303 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1304 vm_shared_region_undo_mappings(
1305 vm_map_t sr_map,
1306 mach_vm_offset_t sr_base_address,
1307 struct _sr_file_mappings *srf_mappings,
1308 struct _sr_file_mappings *srf_mappings_current,
1309 unsigned int srf_current_mappings_count)
1310 {
1311 unsigned int j = 0;
1312 vm_shared_region_t shared_region = NULL;
1313 boolean_t reset_shared_region_state = FALSE;
1314 struct _sr_file_mappings *srfmp;
1315 unsigned int mappings_count;
1316 struct shared_file_mapping_slide_np *mappings;
1317
1318 shared_region = vm_shared_region_get(current_task());
1319 if (shared_region == NULL) {
1320 printf("Failed to undo mappings because of NULL shared region.\n");
1321 return;
1322 }
1323
1324 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1325
1326 if (sr_map == NULL) {
1327 ipc_port_t sr_handle;
1328 vm_named_entry_t sr_mem_entry;
1329
1330 vm_shared_region_lock();
1331 assert(shared_region->sr_ref_count > 0);
1332
1333 while (shared_region->sr_mapping_in_progress) {
1334 /* wait for our turn... */
1335 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1336 THREAD_UNINT);
1337 }
1338 assert(!shared_region->sr_mapping_in_progress);
1339 assert(shared_region->sr_ref_count > 0);
1340 /* let others know we're working in this shared region */
1341 shared_region->sr_mapping_in_progress = TRUE;
1342
1343 vm_shared_region_unlock();
1344
1345 reset_shared_region_state = TRUE;
1346
1347 /* no need to lock because this data is never modified... */
1348 sr_handle = shared_region->sr_mem_entry;
1349 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1350 sr_map = sr_mem_entry->backing.map;
1351 sr_base_address = shared_region->sr_base_address;
1352 }
1353 /*
1354 * Undo the mappings we've established so far.
1355 */
1356 for (srfmp = &srf_mappings[0];
1357 srfmp <= srf_mappings_current;
1358 srfmp++) {
1359 mappings = srfmp->mappings;
1360 mappings_count = srfmp->mappings_count;
1361 if (srfmp == srf_mappings_current) {
1362 mappings_count = srf_current_mappings_count;
1363 }
1364
1365 for (j = 0; j < mappings_count; j++) {
1366 kern_return_t kr2;
1367 mach_vm_offset_t start, end;
1368
1369 if (mappings[j].sms_size == 0) {
1370 /*
1371 * We didn't establish this
1372 * mapping, so nothing to undo.
1373 */
1374 continue;
1375 }
1376 SHARED_REGION_TRACE_INFO(
1377 ("shared_region: mapping[%d]: "
1378 "address:0x%016llx "
1379 "size:0x%016llx "
1380 "offset:0x%016llx "
1381 "maxprot:0x%x prot:0x%x: "
1382 "undoing...\n",
1383 j,
1384 (long long)mappings[j].sms_address,
1385 (long long)mappings[j].sms_size,
1386 (long long)mappings[j].sms_file_offset,
1387 mappings[j].sms_max_prot,
1388 mappings[j].sms_init_prot));
1389 start = (mappings[j].sms_address - sr_base_address);
1390 end = start + mappings[j].sms_size;
1391 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1392 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1393 kr2 = vm_map_remove_guard(sr_map,
1394 start,
1395 end,
1396 VM_MAP_REMOVE_IMMUTABLE,
1397 KMEM_GUARD_NONE).kmr_return;
1398 assert(kr2 == KERN_SUCCESS);
1399 }
1400 }
1401
1402 if (reset_shared_region_state) {
1403 vm_shared_region_lock();
1404 assert(shared_region->sr_ref_count > 0);
1405 assert(shared_region->sr_mapping_in_progress);
1406 /* we're done working on that shared region */
1407 shared_region->sr_mapping_in_progress = FALSE;
1408 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1409 vm_shared_region_unlock();
1410 reset_shared_region_state = FALSE;
1411 }
1412
1413 vm_shared_region_deallocate(shared_region);
1414 }
1415
1416 /*
1417 * First part of vm_shared_region_map_file(). Split out to
1418 * avoid kernel stack overflow.
1419 */
1420 __attribute__((noinline))
1421 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1422 vm_shared_region_map_file_setup(
1423 vm_shared_region_t shared_region,
1424 int sr_file_mappings_count,
1425 struct _sr_file_mappings *sr_file_mappings,
1426 unsigned int *mappings_to_slide_cnt,
1427 struct shared_file_mapping_slide_np **mappings_to_slide,
1428 mach_vm_offset_t *slid_mappings,
1429 memory_object_control_t *slid_file_controls,
1430 mach_vm_offset_t *sfm_min_address,
1431 mach_vm_offset_t *sfm_max_address,
1432 vm_map_t *sr_map_ptr,
1433 vm_map_offset_t *lowest_unnestable_addr_ptr,
1434 unsigned int vmsr_num_slides)
1435 {
1436 kern_return_t kr = KERN_SUCCESS;
1437 memory_object_control_t file_control;
1438 vm_object_t file_object;
1439 ipc_port_t sr_handle;
1440 vm_named_entry_t sr_mem_entry;
1441 vm_map_t sr_map;
1442 mach_vm_offset_t sr_base_address;
1443 unsigned int i = 0;
1444 mach_port_t map_port;
1445 vm_map_offset_t target_address;
1446 vm_object_t object;
1447 vm_object_size_t obj_size;
1448 vm_map_offset_t lowest_unnestable_addr = 0;
1449 vm_map_kernel_flags_t vmk_flags;
1450 mach_vm_offset_t sfm_end;
1451 uint32_t mappings_count;
1452 struct shared_file_mapping_slide_np *mappings;
1453 struct _sr_file_mappings *srfmp;
1454
1455 vm_shared_region_lock();
1456 assert(shared_region->sr_ref_count > 0);
1457
1458 /*
1459 * Make sure we handle only one mapping at a time in a given
1460 * shared region, to avoid race conditions. This should not
1461 * happen frequently...
1462 */
1463 while (shared_region->sr_mapping_in_progress) {
1464 /* wait for our turn... */
1465 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1466 THREAD_UNINT);
1467 }
1468 assert(!shared_region->sr_mapping_in_progress);
1469 assert(shared_region->sr_ref_count > 0);
1470
1471
1472 /* let others know we're working in this shared region */
1473 shared_region->sr_mapping_in_progress = TRUE;
1474
1475 /*
1476 * Did someone race in and map this shared region already?
1477 */
1478 if (shared_region->sr_first_mapping != -1) {
1479 vm_shared_region_unlock();
1480 #if DEVELOPMENT || DEBUG
1481 printf("shared_region: caught race in map and slide\n");
1482 #endif /* DEVELOPMENT || DEBUG */
1483 return KERN_FAILURE;
1484 }
1485
1486 vm_shared_region_unlock();
1487
1488 /* no need to lock because this data is never modified... */
1489 sr_handle = shared_region->sr_mem_entry;
1490 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1491 sr_map = sr_mem_entry->backing.map;
1492 sr_base_address = shared_region->sr_base_address;
1493
1494 SHARED_REGION_TRACE_DEBUG(
1495 ("shared_region: -> map(%p)\n",
1496 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1497
1498 mappings_count = 0;
1499 mappings = NULL;
1500 srfmp = NULL;
1501
1502 /* process all the files to be mapped */
1503 for (srfmp = &sr_file_mappings[0];
1504 srfmp < &sr_file_mappings[sr_file_mappings_count];
1505 srfmp++) {
1506 mappings_count = srfmp->mappings_count;
1507 mappings = srfmp->mappings;
1508 file_control = srfmp->file_control;
1509
1510 if (mappings_count == 0) {
1511 /* no mappings here... */
1512 continue;
1513 }
1514
1515 /*
1516 * The code below can only correctly "slide" (perform relocations) for one
1517 * value of the slide amount. So if a file has a non-zero slide, it has to
1518 * match any previous value. A zero slide value is ok for things that are
1519 * just directly mapped.
1520 */
1521 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1522 shared_region->sr_slide = srfmp->slide;
1523 } else if (shared_region->sr_slide != 0 &&
1524 srfmp->slide != 0 &&
1525 shared_region->sr_slide != srfmp->slide) {
1526 SHARED_REGION_TRACE_ERROR(
1527 ("shared_region: more than 1 non-zero slide value amount "
1528 "slide 1:0x%x slide 2:0x%x\n ",
1529 shared_region->sr_slide, srfmp->slide));
1530 kr = KERN_INVALID_ARGUMENT;
1531 break;
1532 }
1533
1534 #if __arm64__
1535 if ((shared_region->sr_64bit ||
1536 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1537 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1538 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1539 __FUNCTION__, srfmp->slide);
1540 kr = KERN_INVALID_ARGUMENT;
1541 break;
1542 }
1543 #endif /* __arm64__ */
1544
1545 /*
1546 * An FD of -1 means we need to copyin the data to an anonymous object.
1547 */
1548 if (srfmp->fd == -1) {
1549 assert(mappings_count == 1);
1550 SHARED_REGION_TRACE_INFO(
1551 ("shared_region: mapping[0]: "
1552 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1553 "maxprot:0x%x prot:0x%x fd==-1\n",
1554 (long long)mappings[0].sms_address,
1555 (long long)mappings[0].sms_size,
1556 (long long)mappings[0].sms_file_offset,
1557 mappings[0].sms_max_prot,
1558 mappings[0].sms_init_prot));
1559
1560 /*
1561 * We need an anon object to hold the data in the shared region.
1562 * The size needs to be suitable to map into kernel.
1563 */
1564 obj_size = vm_object_round_page(mappings->sms_size);
1565 object = vm_object_allocate(obj_size);
1566 if (object == VM_OBJECT_NULL) {
1567 printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1568 kr = KERN_RESOURCE_SHORTAGE;
1569 break;
1570 }
1571
1572 /*
1573 * map the object into the kernel
1574 */
1575 vm_map_offset_t kaddr = 0;
1576 vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1577 vmk_flags.vmkf_no_copy_on_read = 1;
1578 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1579
1580 kr = vm_map_enter(kernel_map,
1581 &kaddr,
1582 obj_size,
1583 0,
1584 vmk_flags,
1585 object,
1586 0,
1587 FALSE,
1588 (VM_PROT_READ | VM_PROT_WRITE),
1589 (VM_PROT_READ | VM_PROT_WRITE),
1590 VM_INHERIT_NONE);
1591 if (kr != KERN_SUCCESS) {
1592 printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1593 vm_object_deallocate(object);
1594 object = VM_OBJECT_NULL;
1595 break;
1596 }
1597
1598 /*
1599 * We'll need another reference to keep the object alive after
1600 * we vm_map_remove() it from the kernel.
1601 */
1602 vm_object_reference(object);
1603
1604 /*
1605 * Zero out the object's pages, so we can't leak data.
1606 */
1607 bzero((void *)kaddr, obj_size);
1608
1609 /*
1610 * Copyin the data from dyld to the new object.
1611 * Then remove the kernel mapping.
1612 */
1613 int copyin_err =
1614 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1615 vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1616 if (copyin_err) {
1617 printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1618 switch (copyin_err) {
1619 case EPERM:
1620 case EACCES:
1621 kr = KERN_PROTECTION_FAILURE;
1622 break;
1623 case EFAULT:
1624 kr = KERN_INVALID_ADDRESS;
1625 break;
1626 default:
1627 kr = KERN_FAILURE;
1628 break;
1629 }
1630 vm_object_deallocate(object);
1631 object = VM_OBJECT_NULL;
1632 break;
1633 }
1634
1635 /*
1636 * Finally map the object into the shared region.
1637 */
1638 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1639 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1640 vmk_flags.vmkf_already = TRUE;
1641 vmk_flags.vmkf_no_copy_on_read = 1;
1642 vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1643 mappings[0].sms_max_prot);
1644
1645 kr = vm_map_enter(
1646 sr_map,
1647 &target_address,
1648 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1649 0,
1650 vmk_flags,
1651 object,
1652 0,
1653 TRUE,
1654 mappings[0].sms_init_prot & VM_PROT_ALL,
1655 mappings[0].sms_max_prot & VM_PROT_ALL,
1656 VM_INHERIT_DEFAULT);
1657 if (kr != KERN_SUCCESS) {
1658 printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1659 vm_object_deallocate(object);
1660 break;
1661 }
1662
1663 if (mappings[0].sms_address < *sfm_min_address) {
1664 *sfm_min_address = mappings[0].sms_address;
1665 }
1666
1667 if (os_add_overflow(mappings[0].sms_address,
1668 mappings[0].sms_size,
1669 &sfm_end) ||
1670 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1671 mappings[0].sms_address)) {
1672 /* overflow */
1673 kr = KERN_INVALID_ARGUMENT;
1674 break;
1675 }
1676
1677 if (sfm_end > *sfm_max_address) {
1678 *sfm_max_address = sfm_end;
1679 }
1680
1681 continue;
1682 }
1683
1684 /* get the VM object associated with the file to be mapped */
1685 file_object = memory_object_control_to_vm_object(file_control);
1686 assert(file_object);
1687
1688 if (!file_object->object_is_shared_cache) {
1689 vm_object_lock(file_object);
1690 file_object->object_is_shared_cache = true;
1691 vm_object_unlock(file_object);
1692 }
1693
1694 #if CONFIG_SECLUDED_MEMORY
1695 /*
1696 * Camera will need the shared cache, so don't put the pages
1697 * on the secluded queue, assume that's the primary region.
1698 * Also keep DEXT shared cache pages off secluded.
1699 */
1700 if (primary_system_shared_region == NULL ||
1701 primary_system_shared_region == shared_region ||
1702 shared_region->sr_driverkit) {
1703 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1704 }
1705 #endif /* CONFIG_SECLUDED_MEMORY */
1706
1707 /* establish the mappings for that file */
1708 for (i = 0; i < mappings_count; i++) {
1709 SHARED_REGION_TRACE_INFO(
1710 ("shared_region: mapping[%d]: "
1711 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1712 "maxprot:0x%x prot:0x%x\n",
1713 i,
1714 (long long)mappings[i].sms_address,
1715 (long long)mappings[i].sms_size,
1716 (long long)mappings[i].sms_file_offset,
1717 mappings[i].sms_max_prot,
1718 mappings[i].sms_init_prot));
1719
1720 if (mappings[i].sms_address < *sfm_min_address) {
1721 *sfm_min_address = mappings[i].sms_address;
1722 }
1723
1724 if (os_add_overflow(mappings[i].sms_address,
1725 mappings[i].sms_size,
1726 &sfm_end) ||
1727 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1728 mappings[i].sms_address)) {
1729 /* overflow */
1730 kr = KERN_INVALID_ARGUMENT;
1731 break;
1732 }
1733
1734 if (sfm_end > *sfm_max_address) {
1735 *sfm_max_address = sfm_end;
1736 }
1737
1738 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1739 /* zero-filled memory */
1740 map_port = MACH_PORT_NULL;
1741 } else {
1742 /* file-backed memory */
1743 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1744 }
1745
1746 /*
1747 * Remember which mappings need sliding.
1748 */
1749 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1750 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1751 SHARED_REGION_TRACE_INFO(
1752 ("shared_region: mapping[%d]: "
1753 "address:0x%016llx size:0x%016llx "
1754 "offset:0x%016llx "
1755 "maxprot:0x%x prot:0x%x "
1756 "too many mappings to slide...\n",
1757 i,
1758 (long long)mappings[i].sms_address,
1759 (long long)mappings[i].sms_size,
1760 (long long)mappings[i].sms_file_offset,
1761 mappings[i].sms_max_prot,
1762 mappings[i].sms_init_prot));
1763 } else {
1764 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1765 *mappings_to_slide_cnt += 1;
1766 }
1767 }
1768
1769 /* mapping's address is relative to the shared region base */
1770 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1771
1772 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1773 vmk_flags.vmkf_already = TRUE;
1774 /* no copy-on-read for mapped binaries */
1775 vmk_flags.vmkf_no_copy_on_read = 1;
1776 vmk_flags.vmf_permanent = shared_region_make_permanent(
1777 shared_region,
1778 mappings[i].sms_max_prot);
1779 vmk_flags.vmf_tpro = shared_region_tpro_protect(
1780 shared_region,
1781 mappings[i].sms_max_prot);
1782
1783 /* establish that mapping, OK if it's "already" there */
1784 if (map_port == MACH_PORT_NULL) {
1785 /*
1786 * We want to map some anonymous memory in a shared region.
1787 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1788 */
1789 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1790 object = vm_object_allocate(obj_size);
1791 if (object == VM_OBJECT_NULL) {
1792 kr = KERN_RESOURCE_SHORTAGE;
1793 } else {
1794 kr = vm_map_enter(
1795 sr_map,
1796 &target_address,
1797 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1798 0,
1799 vmk_flags,
1800 object,
1801 0,
1802 TRUE,
1803 mappings[i].sms_init_prot & VM_PROT_ALL,
1804 mappings[i].sms_max_prot & VM_PROT_ALL,
1805 VM_INHERIT_DEFAULT);
1806 }
1807 } else {
1808 object = VM_OBJECT_NULL; /* no anonymous memory here */
1809 kr = vm_map_enter_mem_object(
1810 sr_map,
1811 &target_address,
1812 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1813 0,
1814 vmk_flags,
1815 map_port,
1816 mappings[i].sms_file_offset,
1817 TRUE,
1818 mappings[i].sms_init_prot & VM_PROT_ALL,
1819 mappings[i].sms_max_prot & VM_PROT_ALL,
1820 VM_INHERIT_DEFAULT);
1821 }
1822
1823 if (kr == KERN_SUCCESS) {
1824 /*
1825 * Record the first successful mapping(s) in the shared
1826 * region by file. We're protected by "sr_mapping_in_progress"
1827 * here, so no need to lock "shared_region".
1828 *
1829 * Note that if we have an AOT shared cache (ARM) for a
1830 * translated task, then it's always the first file.
1831 * The original "native" (i.e. x86) shared cache is the
1832 * second file.
1833 */
1834
1835 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1836 shared_region->sr_first_mapping = target_address;
1837 }
1838
1839 if (*mappings_to_slide_cnt > 0 &&
1840 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1841 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1842 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1843 }
1844
1845 /*
1846 * Record the lowest writable address in this
1847 * sub map, to log any unexpected unnesting below
1848 * that address (see log_unnest_badness()).
1849 */
1850 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1851 sr_map->is_nested_map &&
1852 (lowest_unnestable_addr == 0 ||
1853 (target_address < lowest_unnestable_addr))) {
1854 lowest_unnestable_addr = target_address;
1855 }
1856 } else {
1857 if (map_port == MACH_PORT_NULL) {
1858 /*
1859 * Get rid of the VM object we just created
1860 * but failed to map.
1861 */
1862 vm_object_deallocate(object);
1863 object = VM_OBJECT_NULL;
1864 }
1865 if (kr == KERN_MEMORY_PRESENT) {
1866 /*
1867 * This exact mapping was already there:
1868 * that's fine.
1869 */
1870 SHARED_REGION_TRACE_INFO(
1871 ("shared_region: mapping[%d]: "
1872 "address:0x%016llx size:0x%016llx "
1873 "offset:0x%016llx "
1874 "maxprot:0x%x prot:0x%x "
1875 "already mapped...\n",
1876 i,
1877 (long long)mappings[i].sms_address,
1878 (long long)mappings[i].sms_size,
1879 (long long)mappings[i].sms_file_offset,
1880 mappings[i].sms_max_prot,
1881 mappings[i].sms_init_prot));
1882 /*
1883 * We didn't establish this mapping ourselves;
1884 * let's reset its size, so that we do not
1885 * attempt to undo it if an error occurs later.
1886 */
1887 mappings[i].sms_size = 0;
1888 kr = KERN_SUCCESS;
1889 } else {
1890 break;
1891 }
1892 }
1893 }
1894
1895 if (kr != KERN_SUCCESS) {
1896 break;
1897 }
1898 }
1899
1900 if (kr != KERN_SUCCESS) {
1901 /* the last mapping we tried (mappings[i]) failed ! */
1902 assert(i < mappings_count);
1903 SHARED_REGION_TRACE_ERROR(
1904 ("shared_region: mapping[%d]: "
1905 "address:0x%016llx size:0x%016llx "
1906 "offset:0x%016llx "
1907 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1908 i,
1909 (long long)mappings[i].sms_address,
1910 (long long)mappings[i].sms_size,
1911 (long long)mappings[i].sms_file_offset,
1912 mappings[i].sms_max_prot,
1913 mappings[i].sms_init_prot,
1914 kr));
1915
1916 /*
1917 * Respect the design of vm_shared_region_undo_mappings
1918 * as we are holding the sr_mapping_in_progress == true here.
1919 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1920 * will be blocked at waiting sr_mapping_in_progress to be false.
1921 */
1922 assert(sr_map != NULL);
1923 /* undo all the previous mappings */
1924 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1925 return kr;
1926 }
1927
1928 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1929 *sr_map_ptr = sr_map;
1930 return KERN_SUCCESS;
1931 }
1932
1933 /* forwared declaration */
1934 __attribute__((noinline))
1935 static void
1936 vm_shared_region_map_file_final(
1937 vm_shared_region_t shared_region,
1938 vm_map_t sr_map,
1939 mach_vm_offset_t sfm_min_address,
1940 mach_vm_offset_t sfm_max_address);
1941
1942 /*
1943 * Establish some mappings of a file in the shared region.
1944 * This is used by "dyld" via the shared_region_map_np() system call
1945 * to populate the shared region with the appropriate shared cache.
1946 *
1947 * One could also call it several times to incrementally load several
1948 * libraries, as long as they do not overlap.
1949 * It will return KERN_SUCCESS if the mappings were successfully established
1950 * or if they were already established identically by another process.
1951 */
1952 __attribute__((noinline))
1953 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1954 vm_shared_region_map_file(
1955 vm_shared_region_t shared_region,
1956 int sr_file_mappings_count,
1957 struct _sr_file_mappings *sr_file_mappings)
1958 {
1959 kern_return_t kr = KERN_SUCCESS;
1960 unsigned int i;
1961 unsigned int mappings_to_slide_cnt = 0;
1962 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1963 mach_vm_offset_t sfm_max_address = 0;
1964 vm_map_t sr_map = NULL;
1965 vm_map_offset_t lowest_unnestable_addr = 0;
1966 unsigned int vmsr_num_slides = 0;
1967 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1968 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1969 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1970 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1971 struct _sr_file_mappings *srfmp;
1972
1973 /*
1974 * Figure out how many of the mappings have slides.
1975 */
1976 for (srfmp = &sr_file_mappings[0];
1977 srfmp < &sr_file_mappings[sr_file_mappings_count];
1978 srfmp++) {
1979 for (i = 0; i < srfmp->mappings_count; ++i) {
1980 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1981 ++vmsr_num_slides;
1982 }
1983 }
1984 }
1985
1986 /* Allocate per slide data structures */
1987 if (vmsr_num_slides > 0) {
1988 slid_mappings =
1989 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1990 slid_file_controls =
1991 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1992 mappings_to_slide =
1993 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1994 }
1995
1996 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1997 &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
1998 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
1999 if (kr != KERN_SUCCESS) {
2000 vm_shared_region_lock();
2001 goto done;
2002 }
2003 assert(vmsr_num_slides == mappings_to_slide_cnt);
2004
2005 /*
2006 * The call above installed direct mappings to the shared cache file.
2007 * Now we go back and overwrite the mappings that need relocation
2008 * with a special shared region pager.
2009 *
2010 * Note that this does copyin() of data, needed by the pager, which
2011 * the previous code just established mappings for. This is why we
2012 * do it in a separate pass.
2013 */
2014 #if __has_feature(ptrauth_calls)
2015 /*
2016 * need to allocate storage needed for any sr_auth_sections
2017 */
2018 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2019 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2020 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2021 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2022 ++shared_region->sr_num_auth_section;
2023 }
2024 }
2025 if (shared_region->sr_num_auth_section > 0) {
2026 shared_region->sr_auth_section =
2027 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2028 Z_WAITOK | Z_ZERO);
2029 }
2030 #endif /* __has_feature(ptrauth_calls) */
2031 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2032 kr = vm_shared_region_slide(shared_region->sr_slide,
2033 mappings_to_slide[i]->sms_file_offset,
2034 mappings_to_slide[i]->sms_size,
2035 mappings_to_slide[i]->sms_slide_start,
2036 mappings_to_slide[i]->sms_slide_size,
2037 slid_mappings[i],
2038 slid_file_controls[i],
2039 mappings_to_slide[i]->sms_max_prot);
2040 if (kr != KERN_SUCCESS) {
2041 SHARED_REGION_TRACE_ERROR(
2042 ("shared_region: region_slide("
2043 "slide:0x%x start:0x%016llx "
2044 "size:0x%016llx) failed 0x%x\n",
2045 shared_region->sr_slide,
2046 (long long)mappings_to_slide[i]->sms_slide_start,
2047 (long long)mappings_to_slide[i]->sms_slide_size,
2048 kr));
2049 vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2050 &sr_file_mappings[0],
2051 &sr_file_mappings[sr_file_mappings_count - 1],
2052 sr_file_mappings_count);
2053 vm_shared_region_lock();
2054 goto done;
2055 }
2056 }
2057
2058 assert(kr == KERN_SUCCESS);
2059
2060 /* adjust the map's "lowest_unnestable_start" */
2061 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2062 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2063 vm_map_lock(sr_map);
2064 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2065 vm_map_unlock(sr_map);
2066 }
2067
2068 vm_shared_region_lock();
2069 assert(shared_region->sr_ref_count > 0);
2070 assert(shared_region->sr_mapping_in_progress);
2071
2072 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2073
2074 done:
2075 /*
2076 * We're done working on that shared region.
2077 * Wake up any waiting threads.
2078 */
2079 shared_region->sr_mapping_in_progress = FALSE;
2080 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2081 vm_shared_region_unlock();
2082
2083 #if __has_feature(ptrauth_calls)
2084 if (kr == KERN_SUCCESS) {
2085 /*
2086 * Since authenticated mappings were just added to the shared region,
2087 * go back and remap them into private mappings for this task.
2088 */
2089 kr = vm_shared_region_auth_remap(shared_region);
2090 }
2091 #endif /* __has_feature(ptrauth_calls) */
2092
2093 /* Cache shared region info needed for telemetry in the task */
2094 task_t task;
2095 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2096 mach_vm_offset_t start_address;
2097 (void)vm_shared_region_start_address(shared_region, &start_address, task);
2098 }
2099
2100 SHARED_REGION_TRACE_DEBUG(
2101 ("shared_region: map(%p) <- 0x%x \n",
2102 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2103 if (vmsr_num_slides > 0) {
2104 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2105 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2106 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2107 mappings_to_slide);
2108 }
2109 return kr;
2110 }
2111
2112 /*
2113 * Final part of vm_shared_region_map_file().
2114 * Kept in separate function to avoid blowing out the stack.
2115 */
2116 __attribute__((noinline))
2117 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2118 vm_shared_region_map_file_final(
2119 vm_shared_region_t shared_region,
2120 vm_map_t sr_map __unused,
2121 mach_vm_offset_t sfm_min_address __unused,
2122 mach_vm_offset_t sfm_max_address __unused)
2123 {
2124 struct _dyld_cache_header sr_cache_header;
2125 int error;
2126 size_t image_array_length;
2127 struct _dyld_cache_image_text_info *sr_image_layout;
2128 boolean_t locally_built = FALSE;
2129
2130
2131 /*
2132 * copy in the shared region UUID to the shared region structure.
2133 * we do this indirectly by first copying in the shared cache header
2134 * and then copying the UUID from there because we'll need to look
2135 * at other content from the shared cache header.
2136 */
2137 if (!shared_region->sr_uuid_copied) {
2138 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2139 (char *)&sr_cache_header,
2140 sizeof(sr_cache_header));
2141 if (error == 0) {
2142 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2143 shared_region->sr_uuid_copied = TRUE;
2144 locally_built = sr_cache_header.locallyBuiltCache;
2145 } else {
2146 #if DEVELOPMENT || DEBUG
2147 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2148 "offset:0 size:0x%016llx) failed with %d\n",
2149 (long long)shared_region->sr_base_address,
2150 (long long)shared_region->sr_first_mapping,
2151 (long long)sizeof(sr_cache_header),
2152 error);
2153 #endif /* DEVELOPMENT || DEBUG */
2154 shared_region->sr_uuid_copied = FALSE;
2155 }
2156 }
2157
2158 /*
2159 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2160 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2161 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2162 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2163 * let the most recent one win.
2164 *
2165 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2166 */
2167 bool is_init_task = (task_pid(current_task()) == 1);
2168 if (shared_region->sr_uuid_copied && is_init_task) {
2169 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2170 if (locally_built) {
2171 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2172 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2173 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2174 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2175 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2176 if (error == 0) {
2177 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2178 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2179 }
2180 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2181 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2182 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2183 sizeof(shared_region->sr_images[index].imageUUID));
2184 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2185 }
2186
2187 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2188 } else {
2189 #if DEVELOPMENT || DEBUG
2190 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2191 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2192 (long long)shared_region->sr_base_address,
2193 (long long)shared_region->sr_first_mapping,
2194 (long long)sr_cache_header.imagesTextOffset,
2195 (long long)image_array_length,
2196 error);
2197 #endif /* DEVELOPMENT || DEBUG */
2198 }
2199 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2200 kfree_data(sr_image_layout, image_array_length);
2201 sr_image_layout = NULL;
2202 }
2203 primary_system_shared_region = shared_region;
2204 }
2205
2206 /*
2207 * If we succeeded, we know the bounds of the shared region.
2208 * Trim our pmaps to only cover this range (if applicable to
2209 * this platform).
2210 */
2211 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2212 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2213 }
2214 }
2215
2216 /*
2217 * Retrieve a task's shared region and grab an extra reference to
2218 * make sure it doesn't disappear while the caller is using it.
2219 * The caller is responsible for consuming that extra reference if
2220 * necessary.
2221 *
2222 * This also tries to trim the pmap for the shared region.
2223 */
2224 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2225 vm_shared_region_trim_and_get(task_t task)
2226 {
2227 vm_shared_region_t shared_region;
2228 ipc_port_t sr_handle;
2229 vm_named_entry_t sr_mem_entry;
2230 vm_map_t sr_map;
2231
2232 /* Get the shared region and the map. */
2233 shared_region = vm_shared_region_get(task);
2234 if (shared_region == NULL) {
2235 return NULL;
2236 }
2237
2238 sr_handle = shared_region->sr_mem_entry;
2239 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2240 sr_map = sr_mem_entry->backing.map;
2241
2242 /* Trim the pmap if possible. */
2243 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2244 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2245 }
2246
2247 return shared_region;
2248 }
2249
2250 /*
2251 * Enter the appropriate shared region into "map" for "task".
2252 * This involves looking up the shared region (and possibly creating a new
2253 * one) for the desired environment, then mapping the VM sub map into the
2254 * task's VM "map", with the appropriate level of pmap-nesting.
2255 */
2256 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2257 vm_shared_region_enter(
2258 struct _vm_map *map,
2259 struct task *task,
2260 boolean_t is_64bit,
2261 void *fsroot,
2262 cpu_type_t cpu,
2263 cpu_subtype_t cpu_subtype,
2264 boolean_t reslide,
2265 boolean_t is_driverkit,
2266 uint32_t rsr_version)
2267 {
2268 kern_return_t kr;
2269 vm_shared_region_t shared_region;
2270 vm_map_offset_t sr_address, sr_offset, target_address;
2271 vm_map_size_t sr_size, mapping_size;
2272 vm_map_offset_t sr_pmap_nesting_start;
2273 vm_map_size_t sr_pmap_nesting_size;
2274 ipc_port_t sr_handle;
2275 vm_prot_t cur_prot, max_prot;
2276 vm_map_kernel_flags_t vmk_flags;
2277
2278 SHARED_REGION_TRACE_DEBUG(
2279 ("shared_region: -> "
2280 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2281 (void *)VM_KERNEL_ADDRPERM(map),
2282 (void *)VM_KERNEL_ADDRPERM(task),
2283 (void *)VM_KERNEL_ADDRPERM(fsroot),
2284 cpu, cpu_subtype, is_64bit, is_driverkit));
2285
2286 /* lookup (create if needed) the shared region for this environment */
2287 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2288 if (shared_region == NULL) {
2289 /* this should not happen ! */
2290 SHARED_REGION_TRACE_ERROR(
2291 ("shared_region: -> "
2292 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2293 "lookup failed !\n",
2294 (void *)VM_KERNEL_ADDRPERM(map),
2295 (void *)VM_KERNEL_ADDRPERM(task),
2296 (void *)VM_KERNEL_ADDRPERM(fsroot),
2297 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2298 //panic("shared_region_enter: lookup failed");
2299 return KERN_FAILURE;
2300 }
2301
2302 kr = KERN_SUCCESS;
2303 /* no need to lock since this data is never modified */
2304 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2305 sr_size = (vm_map_size_t)shared_region->sr_size;
2306 sr_handle = shared_region->sr_mem_entry;
2307 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2308 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2309 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2310
2311 cur_prot = VM_PROT_READ;
2312 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2313 /*
2314 * XXX BINARY COMPATIBILITY
2315 * java6 apparently needs to modify some code in the
2316 * dyld shared cache and needs to be allowed to add
2317 * write access...
2318 */
2319 max_prot = VM_PROT_ALL;
2320 } else {
2321 max_prot = VM_PROT_READ;
2322 /* make it "permanent" to protect against re-mappings */
2323 vmk_flags.vmf_permanent = true;
2324 }
2325
2326 /*
2327 * Start mapping the shared region's VM sub map into the task's VM map.
2328 */
2329 sr_offset = 0;
2330
2331 if (sr_pmap_nesting_start > sr_address) {
2332 /* we need to map a range without pmap-nesting first */
2333 target_address = sr_address;
2334 mapping_size = sr_pmap_nesting_start - sr_address;
2335 kr = vm_map_enter_mem_object(
2336 map,
2337 &target_address,
2338 mapping_size,
2339 0,
2340 vmk_flags,
2341 sr_handle,
2342 sr_offset,
2343 TRUE,
2344 cur_prot,
2345 max_prot,
2346 VM_INHERIT_SHARE);
2347 if (kr != KERN_SUCCESS) {
2348 SHARED_REGION_TRACE_ERROR(
2349 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2350 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2351 (void *)VM_KERNEL_ADDRPERM(map),
2352 (void *)VM_KERNEL_ADDRPERM(task),
2353 (void *)VM_KERNEL_ADDRPERM(fsroot),
2354 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2355 (long long)target_address,
2356 (long long)mapping_size,
2357 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2358 goto done;
2359 }
2360 SHARED_REGION_TRACE_DEBUG(
2361 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2362 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2363 (void *)VM_KERNEL_ADDRPERM(map),
2364 (void *)VM_KERNEL_ADDRPERM(task),
2365 (void *)VM_KERNEL_ADDRPERM(fsroot),
2366 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2367 (long long)target_address, (long long)mapping_size,
2368 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2369 sr_offset += mapping_size;
2370 sr_size -= mapping_size;
2371 }
2372
2373 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2374 vmk_flags.vmkf_nested_pmap = true;
2375 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2376
2377 /*
2378 * Use pmap-nesting to map the majority of the shared region into the task's
2379 * VM space. Very rarely will architectures have a shared region that isn't
2380 * the same size as the pmap-nesting region, or start at a different address
2381 * than the pmap-nesting region, so this code will map the entirety of the
2382 * shared region for most architectures.
2383 */
2384 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2385 target_address = sr_pmap_nesting_start;
2386 kr = vm_map_enter_mem_object(
2387 map,
2388 &target_address,
2389 sr_pmap_nesting_size,
2390 0,
2391 vmk_flags,
2392 sr_handle,
2393 sr_offset,
2394 TRUE,
2395 cur_prot,
2396 max_prot,
2397 VM_INHERIT_SHARE);
2398 if (kr != KERN_SUCCESS) {
2399 SHARED_REGION_TRACE_ERROR(
2400 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2401 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2402 (void *)VM_KERNEL_ADDRPERM(map),
2403 (void *)VM_KERNEL_ADDRPERM(task),
2404 (void *)VM_KERNEL_ADDRPERM(fsroot),
2405 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2406 (long long)target_address,
2407 (long long)sr_pmap_nesting_size,
2408 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2409 goto done;
2410 }
2411 SHARED_REGION_TRACE_DEBUG(
2412 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2413 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2414 (void *)VM_KERNEL_ADDRPERM(map),
2415 (void *)VM_KERNEL_ADDRPERM(task),
2416 (void *)VM_KERNEL_ADDRPERM(fsroot),
2417 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2418 (long long)target_address, (long long)sr_pmap_nesting_size,
2419 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2420
2421 sr_offset += sr_pmap_nesting_size;
2422 sr_size -= sr_pmap_nesting_size;
2423
2424 if (sr_size > 0) {
2425 /* and there's some left to be mapped without pmap-nesting */
2426 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2427 target_address = sr_address + sr_offset;
2428 mapping_size = sr_size;
2429 kr = vm_map_enter_mem_object(
2430 map,
2431 &target_address,
2432 mapping_size,
2433 0,
2434 VM_MAP_KERNEL_FLAGS_FIXED(),
2435 sr_handle,
2436 sr_offset,
2437 TRUE,
2438 cur_prot,
2439 max_prot,
2440 VM_INHERIT_SHARE);
2441 if (kr != KERN_SUCCESS) {
2442 SHARED_REGION_TRACE_ERROR(
2443 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2444 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2445 (void *)VM_KERNEL_ADDRPERM(map),
2446 (void *)VM_KERNEL_ADDRPERM(task),
2447 (void *)VM_KERNEL_ADDRPERM(fsroot),
2448 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2449 (long long)target_address,
2450 (long long)mapping_size,
2451 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2452 goto done;
2453 }
2454 SHARED_REGION_TRACE_DEBUG(
2455 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2456 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2457 (void *)VM_KERNEL_ADDRPERM(map),
2458 (void *)VM_KERNEL_ADDRPERM(task),
2459 (void *)VM_KERNEL_ADDRPERM(fsroot),
2460 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2461 (long long)target_address, (long long)mapping_size,
2462 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2463 sr_offset += mapping_size;
2464 sr_size -= mapping_size;
2465 }
2466 assert(sr_size == 0);
2467
2468 done:
2469 if (kr == KERN_SUCCESS) {
2470 /* let the task use that shared region */
2471 vm_shared_region_set(task, shared_region);
2472 } else {
2473 /* drop our reference since we're not using it */
2474 vm_shared_region_deallocate(shared_region);
2475 vm_shared_region_set(task, NULL);
2476 }
2477
2478 SHARED_REGION_TRACE_DEBUG(
2479 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2480 (void *)VM_KERNEL_ADDRPERM(map),
2481 (void *)VM_KERNEL_ADDRPERM(task),
2482 (void *)VM_KERNEL_ADDRPERM(fsroot),
2483 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2484 kr));
2485 return kr;
2486 }
2487
2488 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2489 vm_shared_region_remove(
2490 task_t task,
2491 vm_shared_region_t sr)
2492 {
2493 vm_map_t map;
2494 mach_vm_offset_t start;
2495 mach_vm_size_t size;
2496 vm_map_kernel_flags_t vmk_flags;
2497 kern_return_t kr;
2498
2499 if (sr == NULL) {
2500 return;
2501 }
2502 map = get_task_map(task);
2503 start = sr->sr_base_address;
2504 size = sr->sr_size;
2505
2506 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2507 vmk_flags.vmkf_overwrite_immutable = true;
2508 vmk_flags.vm_tag = VM_MEMORY_DYLD;
2509
2510 /* range_id is set by mach_vm_map_kernel */
2511 kr = mach_vm_map_kernel(map,
2512 &start,
2513 size,
2514 0, /* mask */
2515 vmk_flags,
2516 MACH_PORT_NULL,
2517 0,
2518 FALSE, /* copy */
2519 VM_PROT_NONE,
2520 VM_PROT_NONE,
2521 VM_INHERIT_DEFAULT);
2522 if (kr != KERN_SUCCESS) {
2523 printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2524 }
2525 }
2526
2527 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2528
2529 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2530 vm_shared_region_sliding_valid(uint32_t slide)
2531 {
2532 kern_return_t kr = KERN_SUCCESS;
2533 vm_shared_region_t sr = vm_shared_region_get(current_task());
2534
2535 /* No region yet? we're fine. */
2536 if (sr == NULL) {
2537 return kr;
2538 }
2539
2540 if (sr->sr_slide != 0 && slide != 0) {
2541 if (slide == sr->sr_slide) {
2542 /*
2543 * Request for sliding when we've
2544 * already done it with exactly the
2545 * same slide value before.
2546 * This isn't wrong technically but
2547 * we don't want to slide again and
2548 * so we return this value.
2549 */
2550 kr = KERN_INVALID_ARGUMENT;
2551 } else {
2552 printf("Mismatched shared region slide\n");
2553 kr = KERN_FAILURE;
2554 }
2555 }
2556 vm_shared_region_deallocate(sr);
2557 return kr;
2558 }
2559
2560 /*
2561 * Actually create (really overwrite) the mapping to part of the shared cache which
2562 * undergoes relocation. This routine reads in the relocation info from dyld and
2563 * verifies it. It then creates a (or finds a matching) shared region pager which
2564 * handles the actual modification of the page contents and installs the mapping
2565 * using that pager.
2566 */
2567 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2568 vm_shared_region_slide_mapping(
2569 vm_shared_region_t sr,
2570 user_addr_t slide_info_addr,
2571 mach_vm_size_t slide_info_size,
2572 mach_vm_offset_t start,
2573 mach_vm_size_t size,
2574 mach_vm_offset_t slid_mapping,
2575 uint32_t slide,
2576 memory_object_control_t sr_file_control,
2577 vm_prot_t prot)
2578 {
2579 kern_return_t kr;
2580 vm_object_t object = VM_OBJECT_NULL;
2581 vm_shared_region_slide_info_t si = NULL;
2582 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2583 struct vm_map_entry tmp_entry_store;
2584 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2585 vm_map_t sr_map;
2586 vm_map_kernel_flags_t vmk_flags;
2587 vm_map_offset_t map_addr;
2588 void *slide_info_entry = NULL;
2589 int error;
2590
2591 assert(sr->sr_slide_in_progress);
2592
2593 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2594 return KERN_INVALID_ARGUMENT;
2595 }
2596
2597 /*
2598 * Copy in and verify the relocation information.
2599 */
2600 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2601 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2602 return KERN_FAILURE;
2603 }
2604 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2605 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2606 return KERN_FAILURE;
2607 }
2608
2609 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2610 if (slide_info_entry == NULL) {
2611 return KERN_RESOURCE_SHORTAGE;
2612 }
2613 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2614 if (error) {
2615 printf("copyin of slide_info failed\n");
2616 kr = KERN_INVALID_ADDRESS;
2617 goto done;
2618 }
2619
2620 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2621 printf("Sanity Check failed for slide_info\n");
2622 goto done;
2623 }
2624
2625 /*
2626 * Allocate and fill in a vm_shared_region_slide_info.
2627 * This will either be used by a new pager, or used to find
2628 * a pre-existing matching pager.
2629 */
2630 object = memory_object_control_to_vm_object(sr_file_control);
2631 if (object == VM_OBJECT_NULL || object->internal) {
2632 object = VM_OBJECT_NULL;
2633 kr = KERN_INVALID_ADDRESS;
2634 goto done;
2635 }
2636
2637 si = kalloc_type(struct vm_shared_region_slide_info,
2638 Z_WAITOK | Z_NOFAIL);
2639 vm_object_lock(object);
2640
2641 vm_object_reference_locked(object); /* for si->slide_object */
2642 object->object_is_shared_cache = TRUE;
2643 vm_object_unlock(object);
2644
2645 si->si_slide_info_entry = slide_info_entry;
2646 si->si_slide_info_size = slide_info_size;
2647
2648 assert(slid_mapping != (mach_vm_offset_t) -1);
2649 si->si_slid_address = slid_mapping + sr->sr_base_address;
2650 si->si_slide_object = object;
2651 si->si_start = start;
2652 si->si_end = si->si_start + size;
2653 si->si_slide = slide;
2654 #if __has_feature(ptrauth_calls)
2655 /*
2656 * If there is authenticated pointer data in this slid mapping,
2657 * then just add the information needed to create new pagers for
2658 * different shared_region_id's later.
2659 */
2660 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2661 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2662 !(prot & VM_PROT_NOAUTH)) {
2663 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2664 printf("Too many auth/private sections for shared region!!\n");
2665 kr = KERN_INVALID_ARGUMENT;
2666 goto done;
2667 }
2668 si->si_ptrauth = TRUE;
2669 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2670 /*
2671 * Remember the shared region, since that's where we'll
2672 * stash this info for all auth pagers to share. Each pager
2673 * will need to take a reference to it.
2674 */
2675 si->si_shared_region = sr;
2676 kr = KERN_SUCCESS;
2677 goto done;
2678 }
2679 si->si_shared_region = NULL;
2680 si->si_ptrauth = FALSE;
2681 #endif /* __has_feature(ptrauth_calls) */
2682
2683 /*
2684 * find the pre-existing shared region's map entry to slide
2685 */
2686 sr_map = vm_shared_region_vm_map(sr);
2687 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2688 if (kr != KERN_SUCCESS) {
2689 goto done;
2690 }
2691 tmp_entry = &tmp_entry_store;
2692
2693 /*
2694 * The object must exactly cover the region to slide.
2695 */
2696 assert(VME_OFFSET(tmp_entry) == start);
2697 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2698
2699 /* create a "shared_region" sliding pager */
2700 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2701 if (sr_pager == MEMORY_OBJECT_NULL) {
2702 kr = KERN_RESOURCE_SHORTAGE;
2703 goto done;
2704 }
2705
2706 #if CONFIG_SECLUDED_MEMORY
2707 /*
2708 * The shared region pagers used by camera or DEXT should have
2709 * pagers that won't go on the secluded queue.
2710 */
2711 if (primary_system_shared_region == NULL ||
2712 primary_system_shared_region == sr ||
2713 sr->sr_driverkit) {
2714 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2715 }
2716 #endif /* CONFIG_SECLUDED_MEMORY */
2717
2718 /* map that pager over the portion of the mapping that needs sliding */
2719 map_addr = tmp_entry->vme_start;
2720 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2721 vmk_flags.vmkf_overwrite_immutable = true;
2722 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2723 tmp_entry->max_protection);
2724 vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2725 prot);
2726 kr = vm_map_enter_mem_object(sr_map,
2727 &map_addr,
2728 (tmp_entry->vme_end - tmp_entry->vme_start),
2729 (mach_vm_offset_t) 0,
2730 vmk_flags,
2731 (ipc_port_t)(uintptr_t) sr_pager,
2732 0,
2733 TRUE,
2734 tmp_entry->protection,
2735 tmp_entry->max_protection,
2736 tmp_entry->inheritance);
2737 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2738 assertf(map_addr == tmp_entry->vme_start,
2739 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2740 (uint64_t)map_addr,
2741 (uint64_t) tmp_entry->vme_start,
2742 tmp_entry);
2743
2744 /* success! */
2745 kr = KERN_SUCCESS;
2746
2747 done:
2748 if (sr_pager != NULL) {
2749 /*
2750 * Release the sr_pager reference obtained by shared_region_pager_setup().
2751 * The mapping, if it succeeded, is now holding a reference on the memory object.
2752 */
2753 memory_object_deallocate(sr_pager);
2754 sr_pager = MEMORY_OBJECT_NULL;
2755 }
2756 if (tmp_entry != NULL) {
2757 /* release extra ref on tmp_entry's VM object */
2758 vm_object_deallocate(VME_OBJECT(tmp_entry));
2759 tmp_entry = VM_MAP_ENTRY_NULL;
2760 }
2761
2762 if (kr != KERN_SUCCESS) {
2763 /* cleanup */
2764 if (si != NULL) {
2765 if (si->si_slide_object) {
2766 vm_object_deallocate(si->si_slide_object);
2767 si->si_slide_object = VM_OBJECT_NULL;
2768 }
2769 kfree_type(struct vm_shared_region_slide_info, si);
2770 si = NULL;
2771 }
2772 if (slide_info_entry != NULL) {
2773 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2774 slide_info_entry = NULL;
2775 }
2776 }
2777 return kr;
2778 }
2779
2780 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2781 vm_shared_region_slide_sanity_check_v1(
2782 vm_shared_region_slide_info_entry_v1_t s_info)
2783 {
2784 uint32_t pageIndex = 0;
2785 uint16_t entryIndex = 0;
2786 uint16_t *toc = NULL;
2787
2788 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2789 for (; pageIndex < s_info->toc_count; pageIndex++) {
2790 entryIndex = (uint16_t)(toc[pageIndex]);
2791
2792 if (entryIndex >= s_info->entry_count) {
2793 printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2794 return KERN_FAILURE;
2795 }
2796 }
2797 return KERN_SUCCESS;
2798 }
2799
2800 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2801 vm_shared_region_slide_sanity_check_v2(
2802 vm_shared_region_slide_info_entry_v2_t s_info,
2803 mach_vm_size_t slide_info_size)
2804 {
2805 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2806 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2807 return KERN_FAILURE;
2808 }
2809 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2810 return KERN_FAILURE;
2811 }
2812
2813 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2814
2815 uint32_t page_starts_count = s_info->page_starts_count;
2816 uint32_t page_extras_count = s_info->page_extras_count;
2817 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2818 if (num_trailing_entries < page_starts_count) {
2819 return KERN_FAILURE;
2820 }
2821
2822 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2823 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2824 if (trailing_size >> 1 != num_trailing_entries) {
2825 return KERN_FAILURE;
2826 }
2827
2828 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2829 if (required_size < sizeof(*s_info)) {
2830 return KERN_FAILURE;
2831 }
2832
2833 if (required_size > slide_info_size) {
2834 return KERN_FAILURE;
2835 }
2836
2837 return KERN_SUCCESS;
2838 }
2839
2840 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2841 vm_shared_region_slide_sanity_check_v3(
2842 vm_shared_region_slide_info_entry_v3_t s_info,
2843 mach_vm_size_t slide_info_size)
2844 {
2845 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2846 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2847 return KERN_FAILURE;
2848 }
2849 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2850 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2851 return KERN_FAILURE;
2852 }
2853
2854 uint32_t page_starts_count = s_info->page_starts_count;
2855 mach_vm_size_t num_trailing_entries = page_starts_count;
2856 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2857 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2858 if (required_size < sizeof(*s_info)) {
2859 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2860 return KERN_FAILURE;
2861 }
2862
2863 if (required_size > slide_info_size) {
2864 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2865 return KERN_FAILURE;
2866 }
2867
2868 return KERN_SUCCESS;
2869 }
2870
2871 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2872 vm_shared_region_slide_sanity_check_v4(
2873 vm_shared_region_slide_info_entry_v4_t s_info,
2874 mach_vm_size_t slide_info_size)
2875 {
2876 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2877 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2878 return KERN_FAILURE;
2879 }
2880 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2881 return KERN_FAILURE;
2882 }
2883
2884 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2885
2886 uint32_t page_starts_count = s_info->page_starts_count;
2887 uint32_t page_extras_count = s_info->page_extras_count;
2888 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2889 if (num_trailing_entries < page_starts_count) {
2890 return KERN_FAILURE;
2891 }
2892
2893 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2894 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2895 if (trailing_size >> 1 != num_trailing_entries) {
2896 return KERN_FAILURE;
2897 }
2898
2899 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2900 if (required_size < sizeof(*s_info)) {
2901 return KERN_FAILURE;
2902 }
2903
2904 if (required_size > slide_info_size) {
2905 return KERN_FAILURE;
2906 }
2907
2908 return KERN_SUCCESS;
2909 }
2910
2911
2912 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2913 vm_shared_region_slide_sanity_check(
2914 vm_shared_region_slide_info_entry_t s_info,
2915 mach_vm_size_t s_info_size)
2916 {
2917 kern_return_t kr;
2918
2919 switch (s_info->version) {
2920 case 1:
2921 kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2922 break;
2923 case 2:
2924 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2925 break;
2926 case 3:
2927 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2928 break;
2929 case 4:
2930 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2931 break;
2932 default:
2933 kr = KERN_FAILURE;
2934 }
2935 return kr;
2936 }
2937
2938 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2939 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2940 {
2941 uint16_t *toc = NULL;
2942 slide_info_entry_toc_t bitmap = NULL;
2943 uint32_t i = 0, j = 0;
2944 uint8_t b = 0;
2945 uint32_t slide = si->si_slide;
2946 int is_64 = task_has_64Bit_addr(current_task());
2947
2948 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2949 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2950
2951 if (pageIndex >= s_info->toc_count) {
2952 printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2953 } else {
2954 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
2955 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2956
2957 if (entryIndex >= s_info->entry_count) {
2958 printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
2959 } else {
2960 bitmap = &slide_info_entries[entryIndex];
2961
2962 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
2963 b = bitmap->entry[i];
2964 if (b != 0) {
2965 for (j = 0; j < 8; ++j) {
2966 if (b & (1 << j)) {
2967 uint32_t *ptr_to_slide;
2968 uint32_t old_value;
2969
2970 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
2971 old_value = *ptr_to_slide;
2972 *ptr_to_slide += slide;
2973 if (is_64 && *ptr_to_slide < old_value) {
2974 /*
2975 * We just slid the low 32 bits of a 64-bit pointer
2976 * and it looks like there should have been a carry-over
2977 * to the upper 32 bits.
2978 * The sliding failed...
2979 */
2980 printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
2981 i, j, b, slide, old_value, *ptr_to_slide);
2982 return KERN_FAILURE;
2983 }
2984 }
2985 }
2986 }
2987 }
2988 }
2989 }
2990
2991 return KERN_SUCCESS;
2992 }
2993
2994 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2995 rebase_chain_32(
2996 uint8_t *page_content,
2997 uint16_t start_offset,
2998 uint32_t slide_amount,
2999 vm_shared_region_slide_info_entry_v2_t s_info)
3000 {
3001 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3002
3003 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3004 const uint32_t value_mask = ~delta_mask;
3005 const uint32_t value_add = (uint32_t)(s_info->value_add);
3006 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3007
3008 uint32_t page_offset = start_offset;
3009 uint32_t delta = 1;
3010
3011 while (delta != 0 && page_offset <= last_page_offset) {
3012 uint8_t *loc;
3013 uint32_t value;
3014
3015 loc = page_content + page_offset;
3016 memcpy(&value, loc, sizeof(value));
3017 delta = (value & delta_mask) >> delta_shift;
3018 value &= value_mask;
3019
3020 if (value != 0) {
3021 value += value_add;
3022 value += slide_amount;
3023 }
3024 memcpy(loc, &value, sizeof(value));
3025 page_offset += delta;
3026 }
3027
3028 /* If the offset went past the end of the page, then the slide data is invalid. */
3029 if (page_offset > last_page_offset) {
3030 return KERN_FAILURE;
3031 }
3032 return KERN_SUCCESS;
3033 }
3034
3035 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3036 rebase_chain_64(
3037 uint8_t *page_content,
3038 uint16_t start_offset,
3039 uint32_t slide_amount,
3040 vm_shared_region_slide_info_entry_v2_t s_info)
3041 {
3042 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3043
3044 const uint64_t delta_mask = s_info->delta_mask;
3045 const uint64_t value_mask = ~delta_mask;
3046 const uint64_t value_add = s_info->value_add;
3047 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3048
3049 uint32_t page_offset = start_offset;
3050 uint32_t delta = 1;
3051
3052 while (delta != 0 && page_offset <= last_page_offset) {
3053 uint8_t *loc;
3054 uint64_t value;
3055
3056 loc = page_content + page_offset;
3057 memcpy(&value, loc, sizeof(value));
3058 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3059 value &= value_mask;
3060
3061 if (value != 0) {
3062 value += value_add;
3063 value += slide_amount;
3064 }
3065 memcpy(loc, &value, sizeof(value));
3066 page_offset += delta;
3067 }
3068
3069 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3070 /* If a pointer straddling the page boundary needs to be adjusted, then
3071 * add the slide to the lower half. The encoding guarantees that the upper
3072 * half on the next page will need no masking.
3073 *
3074 * This assumes a little-endian machine and that the region being slid
3075 * never crosses a 4 GB boundary. */
3076
3077 uint8_t *loc = page_content + page_offset;
3078 uint32_t value;
3079
3080 memcpy(&value, loc, sizeof(value));
3081 value += slide_amount;
3082 memcpy(loc, &value, sizeof(value));
3083 } else if (page_offset > last_page_offset) {
3084 return KERN_FAILURE;
3085 }
3086
3087 return KERN_SUCCESS;
3088 }
3089
3090 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3091 rebase_chain(
3092 boolean_t is_64,
3093 uint32_t pageIndex,
3094 uint8_t *page_content,
3095 uint16_t start_offset,
3096 uint32_t slide_amount,
3097 vm_shared_region_slide_info_entry_v2_t s_info)
3098 {
3099 kern_return_t kr;
3100 if (is_64) {
3101 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3102 } else {
3103 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3104 }
3105
3106 if (kr != KERN_SUCCESS) {
3107 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3108 pageIndex, start_offset, slide_amount);
3109 }
3110 return kr;
3111 }
3112
3113 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3114 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3115 {
3116 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3117 const uint32_t slide_amount = si->si_slide;
3118
3119 /* The high bits of the delta_mask field are nonzero precisely when the shared
3120 * cache is 64-bit. */
3121 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3122
3123 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3124 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3125
3126 uint8_t *page_content = (uint8_t *)vaddr;
3127 uint16_t page_entry;
3128
3129 if (pageIndex >= s_info->page_starts_count) {
3130 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3131 pageIndex, s_info->page_starts_count);
3132 return KERN_FAILURE;
3133 }
3134 page_entry = page_starts[pageIndex];
3135
3136 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3137 return KERN_SUCCESS;
3138 }
3139
3140 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3141 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3142 uint16_t info;
3143
3144 do {
3145 uint16_t page_start_offset;
3146 kern_return_t kr;
3147
3148 if (chain_index >= s_info->page_extras_count) {
3149 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3150 chain_index, s_info->page_extras_count);
3151 return KERN_FAILURE;
3152 }
3153 info = page_extras[chain_index];
3154 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3155
3156 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3157 if (kr != KERN_SUCCESS) {
3158 return KERN_FAILURE;
3159 }
3160
3161 chain_index++;
3162 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3163 } else {
3164 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3165 kern_return_t kr;
3166
3167 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3168 if (kr != KERN_SUCCESS) {
3169 return KERN_FAILURE;
3170 }
3171 }
3172
3173 return KERN_SUCCESS;
3174 }
3175
3176
3177 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3178 vm_shared_region_slide_page_v3(
3179 vm_shared_region_slide_info_t si,
3180 vm_offset_t vaddr,
3181 __unused mach_vm_offset_t uservaddr,
3182 uint32_t pageIndex,
3183 #if !__has_feature(ptrauth_calls)
3184 __unused
3185 #endif /* !__has_feature(ptrauth_calls) */
3186 uint64_t jop_key)
3187 {
3188 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3189 const uint32_t slide_amount = si->si_slide;
3190
3191 uint8_t *page_content = (uint8_t *)vaddr;
3192 uint16_t page_entry;
3193
3194 if (pageIndex >= s_info->page_starts_count) {
3195 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3196 pageIndex, s_info->page_starts_count);
3197 return KERN_FAILURE;
3198 }
3199 page_entry = s_info->page_starts[pageIndex];
3200
3201 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3202 return KERN_SUCCESS;
3203 }
3204
3205 uint8_t* rebaseLocation = page_content;
3206 uint64_t delta = page_entry;
3207 do {
3208 rebaseLocation += delta;
3209 uint64_t value;
3210 memcpy(&value, rebaseLocation, sizeof(value));
3211 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3212
3213 // A pointer is one of :
3214 // {
3215 // uint64_t pointerValue : 51;
3216 // uint64_t offsetToNextPointer : 11;
3217 // uint64_t isBind : 1 = 0;
3218 // uint64_t authenticated : 1 = 0;
3219 // }
3220 // {
3221 // uint32_t offsetFromSharedCacheBase;
3222 // uint16_t diversityData;
3223 // uint16_t hasAddressDiversity : 1;
3224 // uint16_t hasDKey : 1;
3225 // uint16_t hasBKey : 1;
3226 // uint16_t offsetToNextPointer : 11;
3227 // uint16_t isBind : 1;
3228 // uint16_t authenticated : 1 = 1;
3229 // }
3230
3231 bool isBind = (value & (1ULL << 62)) != 0;
3232 if (isBind) {
3233 return KERN_FAILURE;
3234 }
3235
3236 #if __has_feature(ptrauth_calls)
3237 uint16_t diversity_data = (uint16_t)(value >> 32);
3238 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3239 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3240 #endif /* __has_feature(ptrauth_calls) */
3241 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3242
3243 if (isAuthenticated) {
3244 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3245 value = (value & 0xFFFFFFFF) + slide_amount;
3246 // Add in the offset from the mach_header
3247 const uint64_t value_add = s_info->value_add;
3248 value += value_add;
3249
3250 #if __has_feature(ptrauth_calls)
3251 uint64_t discriminator = diversity_data;
3252 if (hasAddressDiversity) {
3253 // First calculate a new discriminator using the address of where we are trying to store the value
3254 uintptr_t pageOffset = rebaseLocation - page_content;
3255 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3256 }
3257
3258 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3259 /*
3260 * these pointers are used in user mode. disable the kernel key diversification
3261 * so we can sign them for use in user mode.
3262 */
3263 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3264 }
3265 #endif /* __has_feature(ptrauth_calls) */
3266 } else {
3267 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3268 // Regular pointer which needs to fit in 51-bits of value.
3269 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3270 // and the bottom 43-bits to be fit in to 51-bits.
3271 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3272 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3273 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3274 value = targetValue + slide_amount;
3275 }
3276
3277 memcpy(rebaseLocation, &value, sizeof(value));
3278 } while (delta != 0);
3279
3280 return KERN_SUCCESS;
3281 }
3282
3283 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3284 rebase_chainv4(
3285 uint8_t *page_content,
3286 uint16_t start_offset,
3287 uint32_t slide_amount,
3288 vm_shared_region_slide_info_entry_v4_t s_info)
3289 {
3290 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3291
3292 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3293 const uint32_t value_mask = ~delta_mask;
3294 const uint32_t value_add = (uint32_t)(s_info->value_add);
3295 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3296
3297 uint32_t page_offset = start_offset;
3298 uint32_t delta = 1;
3299
3300 while (delta != 0 && page_offset <= last_page_offset) {
3301 uint8_t *loc;
3302 uint32_t value;
3303
3304 loc = page_content + page_offset;
3305 memcpy(&value, loc, sizeof(value));
3306 delta = (value & delta_mask) >> delta_shift;
3307 value &= value_mask;
3308
3309 if ((value & 0xFFFF8000) == 0) {
3310 // small positive non-pointer, use as-is
3311 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3312 // small negative non-pointer
3313 value |= 0xC0000000;
3314 } else {
3315 // pointer that needs rebasing
3316 value += value_add;
3317 value += slide_amount;
3318 }
3319 memcpy(loc, &value, sizeof(value));
3320 page_offset += delta;
3321 }
3322
3323 /* If the offset went past the end of the page, then the slide data is invalid. */
3324 if (page_offset > last_page_offset) {
3325 return KERN_FAILURE;
3326 }
3327 return KERN_SUCCESS;
3328 }
3329
3330 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3331 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3332 {
3333 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3334 const uint32_t slide_amount = si->si_slide;
3335
3336 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3337 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3338
3339 uint8_t *page_content = (uint8_t *)vaddr;
3340 uint16_t page_entry;
3341
3342 if (pageIndex >= s_info->page_starts_count) {
3343 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3344 pageIndex, s_info->page_starts_count);
3345 return KERN_FAILURE;
3346 }
3347 page_entry = page_starts[pageIndex];
3348
3349 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3350 return KERN_SUCCESS;
3351 }
3352
3353 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3354 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3355 uint16_t info;
3356
3357 do {
3358 uint16_t page_start_offset;
3359 kern_return_t kr;
3360
3361 if (chain_index >= s_info->page_extras_count) {
3362 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3363 chain_index, s_info->page_extras_count);
3364 return KERN_FAILURE;
3365 }
3366 info = page_extras[chain_index];
3367 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3368
3369 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3370 if (kr != KERN_SUCCESS) {
3371 return KERN_FAILURE;
3372 }
3373
3374 chain_index++;
3375 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3376 } else {
3377 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3378 kern_return_t kr;
3379
3380 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3381 if (kr != KERN_SUCCESS) {
3382 return KERN_FAILURE;
3383 }
3384 }
3385
3386 return KERN_SUCCESS;
3387 }
3388
3389
3390
3391 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3392 vm_shared_region_slide_page(
3393 vm_shared_region_slide_info_t si,
3394 vm_offset_t vaddr,
3395 mach_vm_offset_t uservaddr,
3396 uint32_t pageIndex,
3397 uint64_t jop_key)
3398 {
3399 switch (si->si_slide_info_entry->version) {
3400 case 1:
3401 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3402 case 2:
3403 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3404 case 3:
3405 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3406 case 4:
3407 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3408 default:
3409 return KERN_FAILURE;
3410 }
3411 }
3412
3413 /******************************************************************************/
3414 /* Comm page support */
3415 /******************************************************************************/
3416
3417 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3418 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3419 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3420 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3421 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3422 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3423
3424 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3425 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3426 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3427 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3428 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3429 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3430
3431 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3432 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3433
3434 #if defined(__i386__) || defined(__x86_64__)
3435 /*
3436 * Create a memory entry, VM submap and pmap for one commpage.
3437 */
3438 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3439 _vm_commpage_init(
3440 ipc_port_t *handlep,
3441 vm_map_size_t size)
3442 {
3443 vm_named_entry_t mem_entry;
3444 vm_map_t new_map;
3445
3446 SHARED_REGION_TRACE_DEBUG(
3447 ("commpage: -> _init(0x%llx)\n",
3448 (long long)size));
3449
3450 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3451 if (new_pmap == NULL) {
3452 panic("_vm_commpage_init: could not allocate pmap");
3453 }
3454 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3455
3456 mem_entry = mach_memory_entry_allocate(handlep);
3457 mem_entry->backing.map = new_map;
3458 mem_entry->internal = TRUE;
3459 mem_entry->is_sub_map = TRUE;
3460 mem_entry->offset = 0;
3461 mem_entry->protection = VM_PROT_ALL;
3462 mem_entry->size = size;
3463
3464 SHARED_REGION_TRACE_DEBUG(
3465 ("commpage: _init(0x%llx) <- %p\n",
3466 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3467 }
3468 #endif
3469
3470
3471 /*
3472 * Initialize the comm text pages at boot time
3473 */
3474 void
vm_commpage_text_init(void)3475 vm_commpage_text_init(void)
3476 {
3477 SHARED_REGION_TRACE_DEBUG(
3478 ("commpage text: ->init()\n"));
3479 #if defined(__i386__) || defined(__x86_64__)
3480 /* create the 32 bit comm text page */
3481 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3482 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3483 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3484 commpage_text32_map = commpage_text32_entry->backing.map;
3485 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3486 /* XXX if (cpu_is_64bit_capable()) ? */
3487 /* create the 64-bit comm page */
3488 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3489 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3490 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3491 commpage_text64_map = commpage_text64_entry->backing.map;
3492 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3493 #endif
3494
3495 commpage_text_populate();
3496
3497 /* populate the routines in here */
3498 SHARED_REGION_TRACE_DEBUG(
3499 ("commpage text: init() <-\n"));
3500 }
3501
3502 /*
3503 * Initialize the comm pages at boot time.
3504 */
3505 void
vm_commpage_init(void)3506 vm_commpage_init(void)
3507 {
3508 SHARED_REGION_TRACE_DEBUG(
3509 ("commpage: -> init()\n"));
3510
3511 #if defined(__i386__) || defined(__x86_64__)
3512 /* create the 32-bit comm page */
3513 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3514 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3515 commpage32_map = commpage32_entry->backing.map;
3516
3517 /* XXX if (cpu_is_64bit_capable()) ? */
3518 /* create the 64-bit comm page */
3519 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3520 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3521 commpage64_map = commpage64_entry->backing.map;
3522
3523 #endif /* __i386__ || __x86_64__ */
3524
3525 /* populate them according to this specific platform */
3526 commpage_populate();
3527 __commpage_setup = 1;
3528 #if XNU_TARGET_OS_OSX
3529 if (__system_power_source == 0) {
3530 post_sys_powersource_internal(0, 1);
3531 }
3532 #endif /* XNU_TARGET_OS_OSX */
3533
3534 SHARED_REGION_TRACE_DEBUG(
3535 ("commpage: init() <-\n"));
3536 }
3537
3538 /*
3539 * Enter the appropriate comm page into the task's address space.
3540 * This is called at exec() time via vm_map_exec().
3541 */
3542 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3543 vm_commpage_enter(
3544 vm_map_t map,
3545 task_t task,
3546 boolean_t is64bit)
3547 {
3548 #if defined(__arm64__)
3549 #pragma unused(is64bit)
3550 (void)task;
3551 (void)map;
3552 pmap_insert_commpage(vm_map_pmap(map));
3553 return KERN_SUCCESS;
3554 #else
3555 ipc_port_t commpage_handle, commpage_text_handle;
3556 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3557 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3558 vm_map_kernel_flags_t vmk_flags;
3559 kern_return_t kr;
3560
3561 SHARED_REGION_TRACE_DEBUG(
3562 ("commpage: -> enter(%p,%p)\n",
3563 (void *)VM_KERNEL_ADDRPERM(map),
3564 (void *)VM_KERNEL_ADDRPERM(task)));
3565
3566 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3567 /* the comm page is likely to be beyond the actual end of the VM map */
3568 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3569 vmk_flags.vmkf_beyond_max = TRUE;
3570
3571 /* select the appropriate comm page for this task */
3572 assert(!(is64bit ^ vm_map_is_64bit(map)));
3573 if (is64bit) {
3574 commpage_handle = commpage64_handle;
3575 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3576 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3577 objc_size = _COMM_PAGE64_OBJC_SIZE;
3578 objc_address = _COMM_PAGE64_OBJC_BASE;
3579 commpage_text_handle = commpage_text64_handle;
3580 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3581 } else {
3582 commpage_handle = commpage32_handle;
3583 commpage_address =
3584 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3585 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3586 objc_size = _COMM_PAGE32_OBJC_SIZE;
3587 objc_address = _COMM_PAGE32_OBJC_BASE;
3588 commpage_text_handle = commpage_text32_handle;
3589 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3590 }
3591
3592 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3593 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3594 /* the commpage is properly aligned or sized for pmap-nesting */
3595 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3596 vmk_flags.vmkf_nested_pmap = TRUE;
3597 }
3598
3599 /* map the comm page in the task's address space */
3600 assert(commpage_handle != IPC_PORT_NULL);
3601 kr = vm_map_enter_mem_object(
3602 map,
3603 &commpage_address,
3604 commpage_size,
3605 0,
3606 vmk_flags,
3607 commpage_handle,
3608 0,
3609 FALSE,
3610 VM_PROT_READ,
3611 VM_PROT_READ,
3612 VM_INHERIT_SHARE);
3613 if (kr != KERN_SUCCESS) {
3614 SHARED_REGION_TRACE_ERROR(
3615 ("commpage: enter(%p,0x%llx,0x%llx) "
3616 "commpage %p mapping failed 0x%x\n",
3617 (void *)VM_KERNEL_ADDRPERM(map),
3618 (long long)commpage_address,
3619 (long long)commpage_size,
3620 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3621 }
3622
3623 /* map the comm text page in the task's address space */
3624 assert(commpage_text_handle != IPC_PORT_NULL);
3625 kr = vm_map_enter_mem_object(
3626 map,
3627 &commpage_text_address,
3628 commpage_text_size,
3629 0,
3630 vmk_flags,
3631 commpage_text_handle,
3632 0,
3633 FALSE,
3634 VM_PROT_READ | VM_PROT_EXECUTE,
3635 VM_PROT_READ | VM_PROT_EXECUTE,
3636 VM_INHERIT_SHARE);
3637 if (kr != KERN_SUCCESS) {
3638 SHARED_REGION_TRACE_ERROR(
3639 ("commpage text: enter(%p,0x%llx,0x%llx) "
3640 "commpage text %p mapping failed 0x%x\n",
3641 (void *)VM_KERNEL_ADDRPERM(map),
3642 (long long)commpage_text_address,
3643 (long long)commpage_text_size,
3644 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3645 }
3646
3647 /*
3648 * Since we're here, we also pre-allocate some virtual space for the
3649 * Objective-C run-time, if needed...
3650 */
3651 if (objc_size != 0) {
3652 kr = vm_map_enter_mem_object(
3653 map,
3654 &objc_address,
3655 objc_size,
3656 0,
3657 vmk_flags,
3658 IPC_PORT_NULL,
3659 0,
3660 FALSE,
3661 VM_PROT_ALL,
3662 VM_PROT_ALL,
3663 VM_INHERIT_DEFAULT);
3664 if (kr != KERN_SUCCESS) {
3665 SHARED_REGION_TRACE_ERROR(
3666 ("commpage: enter(%p,0x%llx,0x%llx) "
3667 "objc mapping failed 0x%x\n",
3668 (void *)VM_KERNEL_ADDRPERM(map),
3669 (long long)objc_address,
3670 (long long)objc_size, kr));
3671 }
3672 }
3673
3674 SHARED_REGION_TRACE_DEBUG(
3675 ("commpage: enter(%p,%p) <- 0x%x\n",
3676 (void *)VM_KERNEL_ADDRPERM(map),
3677 (void *)VM_KERNEL_ADDRPERM(task), kr));
3678 return kr;
3679 #endif
3680 }
3681
3682 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3683 vm_shared_region_slide(
3684 uint32_t slide,
3685 mach_vm_offset_t entry_start_address,
3686 mach_vm_size_t entry_size,
3687 mach_vm_offset_t slide_start,
3688 mach_vm_size_t slide_size,
3689 mach_vm_offset_t slid_mapping,
3690 memory_object_control_t sr_file_control,
3691 vm_prot_t prot)
3692 {
3693 vm_shared_region_t sr;
3694 kern_return_t error;
3695
3696 SHARED_REGION_TRACE_DEBUG(
3697 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3698 slide, entry_start_address, entry_size, slide_start, slide_size));
3699
3700 sr = vm_shared_region_get(current_task());
3701 if (sr == NULL) {
3702 printf("%s: no shared region?\n", __FUNCTION__);
3703 SHARED_REGION_TRACE_DEBUG(
3704 ("vm_shared_region_slide: <- %d (no shared region)\n",
3705 KERN_FAILURE));
3706 return KERN_FAILURE;
3707 }
3708
3709 /*
3710 * Protect from concurrent access.
3711 */
3712 vm_shared_region_lock();
3713 while (sr->sr_slide_in_progress) {
3714 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3715 }
3716
3717 sr->sr_slide_in_progress = TRUE;
3718 vm_shared_region_unlock();
3719
3720 error = vm_shared_region_slide_mapping(sr,
3721 (user_addr_t)slide_start,
3722 slide_size,
3723 entry_start_address,
3724 entry_size,
3725 slid_mapping,
3726 slide,
3727 sr_file_control,
3728 prot);
3729 if (error) {
3730 printf("slide_info initialization failed with kr=%d\n", error);
3731 }
3732
3733 vm_shared_region_lock();
3734
3735 assert(sr->sr_slide_in_progress);
3736 sr->sr_slide_in_progress = FALSE;
3737 thread_wakeup(&sr->sr_slide_in_progress);
3738
3739 #if XNU_TARGET_OS_OSX
3740 if (error == KERN_SUCCESS) {
3741 shared_region_completed_slide = TRUE;
3742 }
3743 #endif /* XNU_TARGET_OS_OSX */
3744 vm_shared_region_unlock();
3745
3746 vm_shared_region_deallocate(sr);
3747
3748 SHARED_REGION_TRACE_DEBUG(
3749 ("vm_shared_region_slide: <- %d\n",
3750 error));
3751
3752 return error;
3753 }
3754
3755 /*
3756 * Used during Authenticated Root Volume macOS boot.
3757 * Launchd re-execs itself and wants the new launchd to use
3758 * the shared cache from the new root volume. This call
3759 * makes all the existing shared caches stale to allow
3760 * that to happen.
3761 */
3762 void
vm_shared_region_pivot(void)3763 vm_shared_region_pivot(void)
3764 {
3765 vm_shared_region_t shared_region = NULL;
3766
3767 vm_shared_region_lock();
3768
3769 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3770 assert(shared_region->sr_ref_count > 0);
3771 shared_region->sr_stale = TRUE;
3772 if (shared_region->sr_timer_call) {
3773 /*
3774 * We have a shared region ready to be destroyed
3775 * and just waiting for a delayed timer to fire.
3776 * Marking it stale cements its ineligibility to
3777 * be used ever again. So let's shorten the timer
3778 * aggressively down to 10 milliseconds and get rid of it.
3779 * This is a single quantum and we don't need to go
3780 * shorter than this duration. We want it to be short
3781 * enough, however, because we could have an unmount
3782 * of the volume hosting this shared region just behind
3783 * us.
3784 */
3785 uint64_t deadline;
3786 assert(shared_region->sr_ref_count == 1);
3787
3788 /*
3789 * Free the old timer call. Returns with a reference held.
3790 * If the old timer has fired and is waiting for the vm_shared_region_lock
3791 * lock, we will just return with an additional ref_count i.e. 2.
3792 * The old timer will then fire and just drop the ref count down to 1
3793 * with no other modifications.
3794 */
3795 vm_shared_region_reference_locked(shared_region);
3796
3797 /* set up the timer. Keep the reference from above for this timer.*/
3798 shared_region->sr_timer_call = thread_call_allocate(
3799 (thread_call_func_t) vm_shared_region_timeout,
3800 (thread_call_param_t) shared_region);
3801
3802 /* schedule the timer */
3803 clock_interval_to_deadline(10, /* 10 milliseconds */
3804 NSEC_PER_MSEC,
3805 &deadline);
3806 thread_call_enter_delayed(shared_region->sr_timer_call,
3807 deadline);
3808
3809 SHARED_REGION_TRACE_DEBUG(
3810 ("shared_region: pivot(%p): armed timer\n",
3811 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3812 }
3813 }
3814
3815 vm_shared_region_unlock();
3816 }
3817
3818 /*
3819 * Routine to mark any non-standard slide shared cache region as stale.
3820 * This causes the next "reslide" spawn to create a new shared region.
3821 */
3822 void
vm_shared_region_reslide_stale(boolean_t driverkit)3823 vm_shared_region_reslide_stale(boolean_t driverkit)
3824 {
3825 #if __has_feature(ptrauth_calls)
3826 vm_shared_region_t shared_region = NULL;
3827
3828 vm_shared_region_lock();
3829
3830 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3831 assert(shared_region->sr_ref_count > 0);
3832 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3833 shared_region->sr_stale = TRUE;
3834 vm_shared_region_reslide_count++;
3835 }
3836 }
3837
3838 vm_shared_region_unlock();
3839 #else
3840 (void)driverkit;
3841 #endif /* __has_feature(ptrauth_calls) */
3842 }
3843
3844 /*
3845 * report if the task is using a reslide shared cache region.
3846 */
3847 bool
vm_shared_region_is_reslide(__unused struct task * task)3848 vm_shared_region_is_reslide(__unused struct task *task)
3849 {
3850 bool is_reslide = FALSE;
3851 #if __has_feature(ptrauth_calls)
3852 vm_shared_region_t sr = vm_shared_region_get(task);
3853
3854 if (sr != NULL) {
3855 is_reslide = sr->sr_reslide;
3856 vm_shared_region_deallocate(sr);
3857 }
3858 #endif /* __has_feature(ptrauth_calls) */
3859 return is_reslide;
3860 }
3861
3862 /*
3863 * This is called from powermanagement code to let kernel know the current source of power.
3864 * 0 if it is external source (connected to power )
3865 * 1 if it is internal power source ie battery
3866 */
3867 void
3868 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3869 post_sys_powersource(int i)
3870 #else /* XNU_TARGET_OS_OSX */
3871 post_sys_powersource(__unused int i)
3872 #endif /* XNU_TARGET_OS_OSX */
3873 {
3874 #if XNU_TARGET_OS_OSX
3875 post_sys_powersource_internal(i, 0);
3876 #endif /* XNU_TARGET_OS_OSX */
3877 }
3878
3879
3880 #if XNU_TARGET_OS_OSX
3881 static void
post_sys_powersource_internal(int i,int internal)3882 post_sys_powersource_internal(int i, int internal)
3883 {
3884 if (internal == 0) {
3885 __system_power_source = i;
3886 }
3887 }
3888 #endif /* XNU_TARGET_OS_OSX */
3889
3890 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3891 vm_shared_region_root_dir(
3892 struct vm_shared_region *sr)
3893 {
3894 void *vnode;
3895
3896 vm_shared_region_lock();
3897 vnode = sr->sr_root_dir;
3898 vm_shared_region_unlock();
3899 return vnode;
3900 }
3901