1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109
110 #include <vm/vm_map.h>
111 #include <vm/vm_map_internal.h>
112 #include <vm/vm_shared_region.h>
113
114 #include <vm/vm_protos.h>
115
116 #include <machine/commpage.h>
117 #include <machine/cpu_capabilities.h>
118 #include <sys/random.h>
119 #include <sys/errno.h>
120
121 #if defined(__arm64__)
122 #include <arm/cpu_data_internal.h>
123 #include <arm/misc_protos.h>
124 #endif
125
126 /*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132 #if __has_feature(ptrauth_calls)
133 #include <ptrauth.h>
134 #endif /* __has_feature(ptrauth_calls) */
135
136 /* "dyld" uses this to figure out what the kernel supports */
137 int shared_region_version = 3;
138
139 /* trace level, output is sent to the system log file */
140 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142 /* should local (non-chroot) shared regions persist when no task uses them ? */
143 int shared_region_persistence = 0; /* no by default */
144
145
146 /* delay in seconds before reclaiming an unused shared region */
147 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149 /*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156 struct vm_shared_region *primary_system_shared_region = NULL;
157
158 #if XNU_TARGET_OS_OSX
159 /*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164 boolean_t shared_region_completed_slide = FALSE;
165 #endif /* XNU_TARGET_OS_OSX */
166
167 /* this lock protects all the shared region data structures */
168 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173 #define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 (interruptible))
178
179 /* the list of currently available shared regions (one per environment) */
180 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
181 int vm_shared_region_count = 0;
182 int vm_shared_region_peak = 0;
183 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
184
185 /*
186 * the number of times an event has forced the recalculation of the reslide
187 * shared region slide.
188 */
189 #if __has_feature(ptrauth_calls)
190 int vm_shared_region_reslide_count = 0;
191 #endif /* __has_feature(ptrauth_calls) */
192
193 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
194 static vm_shared_region_t vm_shared_region_create(
195 void *root_dir,
196 cpu_type_t cputype,
197 cpu_subtype_t cpu_subtype,
198 boolean_t is_64bit,
199 int target_page_shift,
200 boolean_t reslide,
201 boolean_t is_driverkit,
202 uint32_t rsr_version);
203 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
204
205 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
206 static void vm_shared_region_timeout(thread_call_param_t param0,
207 thread_call_param_t param1);
208 static kern_return_t vm_shared_region_slide_mapping(
209 vm_shared_region_t sr,
210 user_addr_t slide_info_addr,
211 mach_vm_size_t slide_info_size,
212 mach_vm_offset_t start,
213 mach_vm_size_t size,
214 mach_vm_offset_t slid_mapping,
215 uint32_t slide,
216 memory_object_control_t,
217 vm_prot_t prot); /* forward */
218
219 static int __commpage_setup = 0;
220 #if XNU_TARGET_OS_OSX
221 static int __system_power_source = 1; /* init to extrnal power source */
222 static void post_sys_powersource_internal(int i, int internal);
223 #endif /* XNU_TARGET_OS_OSX */
224
225 extern u_int32_t random(void);
226
227 /*
228 * Retrieve a task's shared region and grab an extra reference to
229 * make sure it doesn't disappear while the caller is using it.
230 * The caller is responsible for consuming that extra reference if
231 * necessary.
232 */
233 vm_shared_region_t
vm_shared_region_get(task_t task)234 vm_shared_region_get(
235 task_t task)
236 {
237 vm_shared_region_t shared_region;
238
239 SHARED_REGION_TRACE_DEBUG(
240 ("shared_region: -> get(%p)\n",
241 (void *)VM_KERNEL_ADDRPERM(task)));
242
243 task_lock(task);
244 vm_shared_region_lock();
245 shared_region = task->shared_region;
246 if (shared_region) {
247 assert(shared_region->sr_ref_count > 0);
248 vm_shared_region_reference_locked(shared_region);
249 }
250 vm_shared_region_unlock();
251 task_unlock(task);
252
253 SHARED_REGION_TRACE_DEBUG(
254 ("shared_region: get(%p) <- %p\n",
255 (void *)VM_KERNEL_ADDRPERM(task),
256 (void *)VM_KERNEL_ADDRPERM(shared_region)));
257
258 return shared_region;
259 }
260
261 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)262 vm_shared_region_vm_map(
263 vm_shared_region_t shared_region)
264 {
265 ipc_port_t sr_handle;
266 vm_named_entry_t sr_mem_entry;
267 vm_map_t sr_map;
268
269 SHARED_REGION_TRACE_DEBUG(
270 ("shared_region: -> vm_map(%p)\n",
271 (void *)VM_KERNEL_ADDRPERM(shared_region)));
272 assert(shared_region->sr_ref_count > 0);
273
274 sr_handle = shared_region->sr_mem_entry;
275 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
276 sr_map = sr_mem_entry->backing.map;
277 assert(sr_mem_entry->is_sub_map);
278
279 SHARED_REGION_TRACE_DEBUG(
280 ("shared_region: vm_map(%p) <- %p\n",
281 (void *)VM_KERNEL_ADDRPERM(shared_region),
282 (void *)VM_KERNEL_ADDRPERM(sr_map)));
283 return sr_map;
284 }
285
286 /*
287 * Set the shared region the process should use.
288 * A NULL new shared region means that we just want to release the old
289 * shared region.
290 * The caller should already have an extra reference on the new shared region
291 * (if any). We release a reference on the old shared region (if any).
292 */
293 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)294 vm_shared_region_set(
295 task_t task,
296 vm_shared_region_t new_shared_region)
297 {
298 vm_shared_region_t old_shared_region;
299
300 SHARED_REGION_TRACE_DEBUG(
301 ("shared_region: -> set(%p, %p)\n",
302 (void *)VM_KERNEL_ADDRPERM(task),
303 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
304
305 task_lock(task);
306 vm_shared_region_lock();
307
308 old_shared_region = task->shared_region;
309 if (new_shared_region) {
310 assert(new_shared_region->sr_ref_count > 0);
311 }
312
313 task->shared_region = new_shared_region;
314
315 vm_shared_region_unlock();
316 task_unlock(task);
317
318 if (old_shared_region) {
319 assert(old_shared_region->sr_ref_count > 0);
320 vm_shared_region_deallocate(old_shared_region);
321 }
322
323 SHARED_REGION_TRACE_DEBUG(
324 ("shared_region: set(%p) <- old=%p new=%p\n",
325 (void *)VM_KERNEL_ADDRPERM(task),
326 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
327 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
328 }
329
330 /*
331 * New arm64 shared regions match with an existing arm64e region.
332 * They just get a private non-authenticating pager.
333 */
334 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)335 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
336 {
337 if (exist == new) {
338 return true;
339 }
340 if (cputype == CPU_TYPE_ARM64 &&
341 exist == CPU_SUBTYPE_ARM64E &&
342 new == CPU_SUBTYPE_ARM64_ALL) {
343 return true;
344 }
345 return false;
346 }
347
348
349 /*
350 * Lookup up the shared region for the desired environment.
351 * If none is found, create a new (empty) one.
352 * Grab an extra reference on the returned shared region, to make sure
353 * it doesn't get destroyed before the caller is done with it. The caller
354 * is responsible for consuming that extra reference if necessary.
355 */
356 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)357 vm_shared_region_lookup(
358 void *root_dir,
359 cpu_type_t cputype,
360 cpu_subtype_t cpu_subtype,
361 boolean_t is_64bit,
362 int target_page_shift,
363 boolean_t reslide,
364 boolean_t is_driverkit,
365 uint32_t rsr_version)
366 {
367 vm_shared_region_t shared_region;
368 vm_shared_region_t new_shared_region;
369
370 SHARED_REGION_TRACE_DEBUG(
371 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
372 (void *)VM_KERNEL_ADDRPERM(root_dir),
373 cputype, cpu_subtype, is_64bit, target_page_shift,
374 reslide, is_driverkit));
375
376 shared_region = NULL;
377 new_shared_region = NULL;
378
379 vm_shared_region_lock();
380 for (;;) {
381 queue_iterate(&vm_shared_region_queue,
382 shared_region,
383 vm_shared_region_t,
384 sr_q) {
385 assert(shared_region->sr_ref_count > 0);
386 if (shared_region->sr_cpu_type == cputype &&
387 match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
388 shared_region->sr_root_dir == root_dir &&
389 shared_region->sr_64bit == is_64bit &&
390 #if __ARM_MIXED_PAGE_SIZE__
391 shared_region->sr_page_shift == target_page_shift &&
392 #endif /* __ARM_MIXED_PAGE_SIZE__ */
393 #if __has_feature(ptrauth_calls)
394 shared_region->sr_reslide == reslide &&
395 #endif /* __has_feature(ptrauth_calls) */
396 shared_region->sr_driverkit == is_driverkit &&
397 shared_region->sr_rsr_version == rsr_version &&
398 !shared_region->sr_stale) {
399 /* found a match ! */
400 vm_shared_region_reference_locked(shared_region);
401 goto done;
402 }
403 }
404 if (new_shared_region == NULL) {
405 /* no match: create a new one */
406 vm_shared_region_unlock();
407 new_shared_region = vm_shared_region_create(root_dir,
408 cputype,
409 cpu_subtype,
410 is_64bit,
411 target_page_shift,
412 reslide,
413 is_driverkit,
414 rsr_version);
415 /* do the lookup again, in case we lost a race */
416 vm_shared_region_lock();
417 continue;
418 }
419 /* still no match: use our new one */
420 shared_region = new_shared_region;
421 new_shared_region = NULL;
422 uint32_t newid = ++vm_shared_region_lastid;
423 if (newid == 0) {
424 panic("shared_region: vm_shared_region_lastid wrapped");
425 }
426 shared_region->sr_id = newid;
427 shared_region->sr_install_time = mach_absolute_time();
428 queue_enter(&vm_shared_region_queue,
429 shared_region,
430 vm_shared_region_t,
431 sr_q);
432 vm_shared_region_count++;
433 if (vm_shared_region_count > vm_shared_region_peak) {
434 vm_shared_region_peak = vm_shared_region_count;
435 }
436 break;
437 }
438
439 done:
440 vm_shared_region_unlock();
441
442 if (new_shared_region) {
443 /*
444 * We lost a race with someone else to create a new shared
445 * region for that environment. Get rid of our unused one.
446 */
447 assert(new_shared_region->sr_ref_count == 1);
448 new_shared_region->sr_ref_count--;
449 vm_shared_region_destroy(new_shared_region);
450 new_shared_region = NULL;
451 }
452
453 SHARED_REGION_TRACE_DEBUG(
454 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
455 (void *)VM_KERNEL_ADDRPERM(root_dir),
456 cputype, cpu_subtype, is_64bit, target_page_shift,
457 reslide, is_driverkit,
458 (void *)VM_KERNEL_ADDRPERM(shared_region)));
459
460 assert(shared_region->sr_ref_count > 0);
461 return shared_region;
462 }
463
464 /*
465 * Take an extra reference on a shared region.
466 * The vm_shared_region_lock should already be held by the caller.
467 */
468 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)469 vm_shared_region_reference_locked(
470 vm_shared_region_t shared_region)
471 {
472 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
473
474 SHARED_REGION_TRACE_DEBUG(
475 ("shared_region: -> reference_locked(%p)\n",
476 (void *)VM_KERNEL_ADDRPERM(shared_region)));
477 assert(shared_region->sr_ref_count > 0);
478 shared_region->sr_ref_count++;
479 assert(shared_region->sr_ref_count != 0);
480
481 if (shared_region->sr_timer_call != NULL) {
482 boolean_t cancelled;
483
484 /* cancel and free any pending timeout */
485 cancelled = thread_call_cancel(shared_region->sr_timer_call);
486 if (cancelled) {
487 thread_call_free(shared_region->sr_timer_call);
488 shared_region->sr_timer_call = NULL;
489 /* release the reference held by the cancelled timer */
490 shared_region->sr_ref_count--;
491 } else {
492 /* the timer will drop the reference and free itself */
493 }
494 }
495
496 SHARED_REGION_TRACE_DEBUG(
497 ("shared_region: reference_locked(%p) <- %d\n",
498 (void *)VM_KERNEL_ADDRPERM(shared_region),
499 shared_region->sr_ref_count));
500 }
501
502 /*
503 * Take a reference on a shared region.
504 */
505 void
vm_shared_region_reference(vm_shared_region_t shared_region)506 vm_shared_region_reference(vm_shared_region_t shared_region)
507 {
508 SHARED_REGION_TRACE_DEBUG(
509 ("shared_region: -> reference(%p)\n",
510 (void *)VM_KERNEL_ADDRPERM(shared_region)));
511
512 vm_shared_region_lock();
513 vm_shared_region_reference_locked(shared_region);
514 vm_shared_region_unlock();
515
516 SHARED_REGION_TRACE_DEBUG(
517 ("shared_region: reference(%p) <- %d\n",
518 (void *)VM_KERNEL_ADDRPERM(shared_region),
519 shared_region->sr_ref_count));
520 }
521
522 /*
523 * Release a reference on the shared region.
524 * Destroy it if there are no references left.
525 */
526 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)527 vm_shared_region_deallocate(
528 vm_shared_region_t shared_region)
529 {
530 SHARED_REGION_TRACE_DEBUG(
531 ("shared_region: -> deallocate(%p)\n",
532 (void *)VM_KERNEL_ADDRPERM(shared_region)));
533
534 vm_shared_region_lock();
535
536 assert(shared_region->sr_ref_count > 0);
537
538 if (shared_region->sr_root_dir == NULL) {
539 /*
540 * Local (i.e. based on the boot volume) shared regions
541 * can persist or not based on the "shared_region_persistence"
542 * sysctl.
543 * Make sure that this one complies.
544 *
545 * See comments in vm_shared_region_slide() for notes about
546 * shared regions we have slid (which are not torn down currently).
547 */
548 if (shared_region_persistence &&
549 !shared_region->sr_persists) {
550 /* make this one persistent */
551 shared_region->sr_ref_count++;
552 shared_region->sr_persists = TRUE;
553 } else if (!shared_region_persistence &&
554 shared_region->sr_persists) {
555 /* make this one no longer persistent */
556 assert(shared_region->sr_ref_count > 1);
557 shared_region->sr_ref_count--;
558 shared_region->sr_persists = FALSE;
559 }
560 }
561
562 assert(shared_region->sr_ref_count > 0);
563 shared_region->sr_ref_count--;
564 SHARED_REGION_TRACE_DEBUG(
565 ("shared_region: deallocate(%p): ref now %d\n",
566 (void *)VM_KERNEL_ADDRPERM(shared_region),
567 shared_region->sr_ref_count));
568
569 if (shared_region->sr_ref_count == 0) {
570 uint64_t deadline;
571
572 /*
573 * Even though a shared region is unused, delay a while before
574 * tearing it down, in case a new app launch can use it.
575 * We don't keep around stale shared regions, nor older RSR ones.
576 */
577 if (shared_region->sr_timer_call == NULL &&
578 shared_region_destroy_delay != 0 &&
579 !shared_region->sr_stale &&
580 !(shared_region->sr_rsr_version != 0 &&
581 shared_region->sr_rsr_version != rsr_get_version())) {
582 /* hold one reference for the timer */
583 assert(!shared_region->sr_mapping_in_progress);
584 shared_region->sr_ref_count++;
585
586 /* set up the timer */
587 shared_region->sr_timer_call = thread_call_allocate(
588 (thread_call_func_t) vm_shared_region_timeout,
589 (thread_call_param_t) shared_region);
590
591 /* schedule the timer */
592 clock_interval_to_deadline(shared_region_destroy_delay,
593 NSEC_PER_SEC,
594 &deadline);
595 thread_call_enter_delayed(shared_region->sr_timer_call,
596 deadline);
597
598 SHARED_REGION_TRACE_DEBUG(
599 ("shared_region: deallocate(%p): armed timer\n",
600 (void *)VM_KERNEL_ADDRPERM(shared_region)));
601
602 vm_shared_region_unlock();
603 } else {
604 /* timer expired: let go of this shared region */
605
606 /* Make sure there's no cached pointer to the region. */
607 if (primary_system_shared_region == shared_region) {
608 primary_system_shared_region = NULL;
609 }
610
611 /*
612 * Remove it from the queue first, so no one can find
613 * it...
614 */
615 queue_remove(&vm_shared_region_queue,
616 shared_region,
617 vm_shared_region_t,
618 sr_q);
619 vm_shared_region_count--;
620 vm_shared_region_unlock();
621
622 /* ... and destroy it */
623 vm_shared_region_destroy(shared_region);
624 shared_region = NULL;
625 }
626 } else {
627 vm_shared_region_unlock();
628 }
629
630 SHARED_REGION_TRACE_DEBUG(
631 ("shared_region: deallocate(%p) <-\n",
632 (void *)VM_KERNEL_ADDRPERM(shared_region)));
633 }
634
635 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)636 vm_shared_region_timeout(
637 thread_call_param_t param0,
638 __unused thread_call_param_t param1)
639 {
640 vm_shared_region_t shared_region;
641
642 shared_region = (vm_shared_region_t) param0;
643
644 vm_shared_region_deallocate(shared_region);
645 }
646
647
648 /*
649 * Create a new (empty) shared region for a new environment.
650 */
651 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)652 vm_shared_region_create(
653 void *root_dir,
654 cpu_type_t cputype,
655 cpu_subtype_t cpu_subtype,
656 boolean_t is_64bit,
657 int target_page_shift,
658 #if !__has_feature(ptrauth_calls)
659 __unused
660 #endif /* __has_feature(ptrauth_calls) */
661 boolean_t reslide,
662 boolean_t is_driverkit,
663 uint32_t rsr_version)
664 {
665 vm_named_entry_t mem_entry;
666 ipc_port_t mem_entry_port;
667 vm_shared_region_t shared_region;
668 vm_map_t sub_map;
669 mach_vm_offset_t base_address, pmap_nesting_start;
670 mach_vm_size_t size, pmap_nesting_size;
671
672 SHARED_REGION_TRACE_INFO(
673 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
674 (void *)VM_KERNEL_ADDRPERM(root_dir),
675 cputype, cpu_subtype, is_64bit, target_page_shift,
676 reslide, is_driverkit));
677
678 base_address = 0;
679 size = 0;
680 mem_entry = NULL;
681 mem_entry_port = IPC_PORT_NULL;
682 sub_map = VM_MAP_NULL;
683
684 /* create a new shared region structure... */
685 shared_region = kalloc_type(struct vm_shared_region,
686 Z_WAITOK | Z_NOFAIL);
687
688 /* figure out the correct settings for the desired environment */
689 if (is_64bit) {
690 switch (cputype) {
691 #if defined(__arm64__)
692 case CPU_TYPE_ARM64:
693 base_address = SHARED_REGION_BASE_ARM64;
694 size = SHARED_REGION_SIZE_ARM64;
695 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
696 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
697 break;
698 #else
699 case CPU_TYPE_I386:
700 base_address = SHARED_REGION_BASE_X86_64;
701 size = SHARED_REGION_SIZE_X86_64;
702 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
703 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
704 break;
705 case CPU_TYPE_POWERPC:
706 base_address = SHARED_REGION_BASE_PPC64;
707 size = SHARED_REGION_SIZE_PPC64;
708 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
709 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
710 break;
711 #endif
712 default:
713 SHARED_REGION_TRACE_ERROR(
714 ("shared_region: create: unknown cpu type %d\n",
715 cputype));
716 kfree_type(struct vm_shared_region, shared_region);
717 shared_region = NULL;
718 goto done;
719 }
720 } else {
721 switch (cputype) {
722 #if defined(__arm64__)
723 case CPU_TYPE_ARM:
724 base_address = SHARED_REGION_BASE_ARM;
725 size = SHARED_REGION_SIZE_ARM;
726 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
727 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
728 break;
729 #else
730 case CPU_TYPE_I386:
731 base_address = SHARED_REGION_BASE_I386;
732 size = SHARED_REGION_SIZE_I386;
733 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
734 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
735 break;
736 case CPU_TYPE_POWERPC:
737 base_address = SHARED_REGION_BASE_PPC;
738 size = SHARED_REGION_SIZE_PPC;
739 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
740 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
741 break;
742 #endif
743 default:
744 SHARED_REGION_TRACE_ERROR(
745 ("shared_region: create: unknown cpu type %d\n",
746 cputype));
747 kfree_type(struct vm_shared_region, shared_region);
748 shared_region = NULL;
749 goto done;
750 }
751 }
752
753 /* create a memory entry structure and a Mach port handle */
754 mem_entry = mach_memory_entry_allocate(&mem_entry_port);
755
756 #if defined(__arm64__)
757 {
758 struct pmap *pmap_nested;
759 int pmap_flags = 0;
760 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
761
762
763 #if __ARM_MIXED_PAGE_SIZE__
764 if (cputype == CPU_TYPE_ARM64 &&
765 target_page_shift == FOURK_PAGE_SHIFT) {
766 /* arm64/4k address space */
767 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
768 }
769 #endif /* __ARM_MIXED_PAGE_SIZE__ */
770
771 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
772 if (pmap_nested != PMAP_NULL) {
773 pmap_set_nested(pmap_nested);
774 sub_map = vm_map_create_options(pmap_nested, 0,
775 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
776
777 if (is_64bit ||
778 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
779 /* enforce 16KB alignment of VM map entries */
780 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
781 }
782 #if __ARM_MIXED_PAGE_SIZE__
783 if (cputype == CPU_TYPE_ARM64 &&
784 target_page_shift == FOURK_PAGE_SHIFT) {
785 /* arm64/4k address space */
786 vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
787 }
788 #endif /* __ARM_MIXED_PAGE_SIZE__ */
789 } else {
790 sub_map = VM_MAP_NULL;
791 }
792 }
793 #else /* defined(__arm64__) */
794 {
795 /* create a VM sub map and its pmap */
796 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
797 if (pmap != NULL) {
798 sub_map = vm_map_create_options(pmap, 0,
799 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
800 } else {
801 sub_map = VM_MAP_NULL;
802 }
803 }
804 #endif /* defined(__arm64__) */
805 if (sub_map == VM_MAP_NULL) {
806 ipc_port_release_send(mem_entry_port);
807 kfree_type(struct vm_shared_region, shared_region);
808 shared_region = NULL;
809 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
810 goto done;
811 }
812
813 /* shared regions should always enforce code-signing */
814 vm_map_cs_enforcement_set(sub_map, true);
815 assert(vm_map_cs_enforcement(sub_map));
816 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
817
818 assert(!sub_map->disable_vmentry_reuse);
819 sub_map->is_nested_map = TRUE;
820
821 /* make the memory entry point to the VM sub map */
822 mem_entry->is_sub_map = TRUE;
823 mem_entry->backing.map = sub_map;
824 mem_entry->size = size;
825 mem_entry->protection = VM_PROT_ALL;
826
827 /* make the shared region point at the memory entry */
828 shared_region->sr_mem_entry = mem_entry_port;
829
830 /* fill in the shared region's environment and settings */
831 shared_region->sr_base_address = base_address;
832 shared_region->sr_size = size;
833 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
834 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
835 shared_region->sr_cpu_type = cputype;
836 shared_region->sr_cpu_subtype = cpu_subtype;
837 shared_region->sr_64bit = (uint8_t)is_64bit;
838 #if __ARM_MIXED_PAGE_SIZE__
839 shared_region->sr_page_shift = (uint8_t)target_page_shift;
840 #endif /* __ARM_MIXED_PAGE_SIZE__ */
841 shared_region->sr_driverkit = (uint8_t)is_driverkit;
842 shared_region->sr_rsr_version = rsr_version;
843 shared_region->sr_root_dir = root_dir;
844
845 queue_init(&shared_region->sr_q);
846 shared_region->sr_mapping_in_progress = FALSE;
847 shared_region->sr_slide_in_progress = FALSE;
848 shared_region->sr_persists = FALSE;
849 shared_region->sr_stale = FALSE;
850 shared_region->sr_timer_call = NULL;
851 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
852
853 /* grab a reference for the caller */
854 shared_region->sr_ref_count = 1;
855
856 shared_region->sr_slide = 0; /* not slid yet */
857
858 /* Initialize UUID and other metadata */
859 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
860 shared_region->sr_uuid_copied = FALSE;
861 shared_region->sr_images_count = 0;
862 shared_region->sr_images = NULL;
863 #if __has_feature(ptrauth_calls)
864 shared_region->sr_reslide = reslide;
865 shared_region->sr_num_auth_section = 0;
866 shared_region->sr_next_auth_section = 0;
867 shared_region->sr_auth_section = NULL;
868 #endif /* __has_feature(ptrauth_calls) */
869
870 done:
871 if (shared_region) {
872 SHARED_REGION_TRACE_INFO(
873 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
874 "base=0x%llx,size=0x%llx) <- "
875 "%p mem=(%p,%p) map=%p pmap=%p\n",
876 (void *)VM_KERNEL_ADDRPERM(root_dir),
877 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
878 (long long)base_address,
879 (long long)size,
880 (void *)VM_KERNEL_ADDRPERM(shared_region),
881 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
882 (void *)VM_KERNEL_ADDRPERM(mem_entry),
883 (void *)VM_KERNEL_ADDRPERM(sub_map),
884 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
885 } else {
886 SHARED_REGION_TRACE_INFO(
887 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
888 "base=0x%llx,size=0x%llx) <- NULL",
889 (void *)VM_KERNEL_ADDRPERM(root_dir),
890 cputype, cpu_subtype, is_64bit, is_driverkit,
891 (long long)base_address,
892 (long long)size));
893 }
894 return shared_region;
895 }
896
897 /*
898 * Destroy a now-unused shared region.
899 * The shared region is no longer in the queue and can not be looked up.
900 */
901 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)902 vm_shared_region_destroy(
903 vm_shared_region_t shared_region)
904 {
905 vm_named_entry_t mem_entry;
906 vm_map_t map;
907
908 SHARED_REGION_TRACE_INFO(
909 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
910 (void *)VM_KERNEL_ADDRPERM(shared_region),
911 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
912 shared_region->sr_cpu_type,
913 shared_region->sr_cpu_subtype,
914 shared_region->sr_64bit,
915 shared_region->sr_driverkit));
916
917 assert(shared_region->sr_ref_count == 0);
918 assert(!shared_region->sr_persists);
919
920 mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
921 assert(mem_entry->is_sub_map);
922 assert(!mem_entry->internal);
923 assert(!mem_entry->is_copy);
924 map = mem_entry->backing.map;
925
926 /*
927 * Clean up the pmap first. The virtual addresses that were
928 * entered in this possibly "nested" pmap may have different values
929 * than the VM map's min and max offsets, if the VM sub map was
930 * mapped at a non-zero offset in the processes' main VM maps, which
931 * is usually the case, so the clean-up we do in vm_map_destroy() would
932 * not be enough.
933 */
934 if (map->pmap) {
935 pmap_remove(map->pmap,
936 (vm_map_offset_t)shared_region->sr_base_address,
937 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
938 }
939
940 /*
941 * Release our (one and only) handle on the memory entry.
942 * This will generate a no-senders notification, which will be processed
943 * by ipc_kobject_notify_no_senders(), which will release the one and only
944 * reference on the memory entry and cause it to be destroyed, along
945 * with the VM sub map and its pmap.
946 */
947 mach_memory_entry_port_release(shared_region->sr_mem_entry);
948 mem_entry = NULL;
949 shared_region->sr_mem_entry = IPC_PORT_NULL;
950
951 if (shared_region->sr_timer_call) {
952 thread_call_free(shared_region->sr_timer_call);
953 }
954
955 #if __has_feature(ptrauth_calls)
956 /*
957 * Free the cached copies of slide_info for the AUTH regions.
958 */
959 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
960 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
961 if (si != NULL) {
962 vm_object_deallocate(si->si_slide_object);
963 kfree_data(si->si_slide_info_entry,
964 si->si_slide_info_size);
965 kfree_type(struct vm_shared_region_slide_info, si);
966 shared_region->sr_auth_section[i] = NULL;
967 }
968 }
969 if (shared_region->sr_auth_section != NULL) {
970 assert(shared_region->sr_num_auth_section > 0);
971 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
972 shared_region->sr_auth_section = NULL;
973 shared_region->sr_num_auth_section = 0;
974 }
975 #endif /* __has_feature(ptrauth_calls) */
976
977 /* release the shared region structure... */
978 kfree_type(struct vm_shared_region, shared_region);
979
980 SHARED_REGION_TRACE_DEBUG(
981 ("shared_region: destroy(%p) <-\n",
982 (void *)VM_KERNEL_ADDRPERM(shared_region)));
983 shared_region = NULL;
984 }
985
986 /*
987 * Gets the address of the first (in time) mapping in the shared region.
988 * If used during initial task setup by dyld, task should non-NULL.
989 */
990 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)991 vm_shared_region_start_address(
992 vm_shared_region_t shared_region,
993 mach_vm_offset_t *start_address,
994 task_t task)
995 {
996 kern_return_t kr;
997 mach_vm_offset_t sr_base_address;
998 mach_vm_offset_t sr_first_mapping;
999
1000 SHARED_REGION_TRACE_DEBUG(
1001 ("shared_region: -> start_address(%p)\n",
1002 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1003
1004 vm_shared_region_lock();
1005
1006 /*
1007 * Wait if there's another thread establishing a mapping
1008 * in this shared region right when we're looking at it.
1009 * We want a consistent view of the map...
1010 */
1011 while (shared_region->sr_mapping_in_progress) {
1012 /* wait for our turn... */
1013 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1014 THREAD_UNINT);
1015 }
1016 assert(!shared_region->sr_mapping_in_progress);
1017 assert(shared_region->sr_ref_count > 0);
1018
1019 sr_base_address = shared_region->sr_base_address;
1020 sr_first_mapping = shared_region->sr_first_mapping;
1021
1022 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1023 /* shared region is empty */
1024 kr = KERN_INVALID_ADDRESS;
1025 } else {
1026 kr = KERN_SUCCESS;
1027 *start_address = sr_base_address + sr_first_mapping;
1028 }
1029
1030
1031 uint32_t slide = shared_region->sr_slide;
1032
1033 vm_shared_region_unlock();
1034
1035 /*
1036 * Cache shared region info in the task for telemetry gathering, if we're
1037 * passed in the task. No task lock here as we're still in intial task set up.
1038 */
1039 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1040 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1041 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1042 (char *)&task->task_shared_region_uuid,
1043 sizeof(task->task_shared_region_uuid)) == 0) {
1044 task->task_shared_region_slide = slide;
1045 }
1046 }
1047
1048 SHARED_REGION_TRACE_DEBUG(
1049 ("shared_region: start_address(%p) <- 0x%llx\n",
1050 (void *)VM_KERNEL_ADDRPERM(shared_region),
1051 (long long)shared_region->sr_base_address));
1052
1053 return kr;
1054 }
1055
1056 /*
1057 * Look up a pre-existing mapping in shared region, for replacement.
1058 * Takes an extra object reference if found.
1059 */
1060 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1061 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1062 {
1063 vm_map_entry_t found;
1064
1065 /* find the shared region's map entry to slide */
1066 vm_map_lock_read(map);
1067 if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1068 /* no mapping there */
1069 vm_map_unlock(map);
1070 return KERN_INVALID_ARGUMENT;
1071 }
1072
1073 *entry = *found;
1074 /* extra ref to keep object alive while map is unlocked */
1075 vm_object_reference(VME_OBJECT(found));
1076 vm_map_unlock_read(map);
1077 return KERN_SUCCESS;
1078 }
1079
1080 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1081 shared_region_make_permanent(
1082 vm_shared_region_t sr,
1083 vm_prot_t max_prot)
1084 {
1085 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1086 return false;
1087 }
1088 if (max_prot & VM_PROT_WRITE) {
1089 /*
1090 * Potentially writable mapping: no major issue with allowing
1091 * it to be replaced since its contents could be modified
1092 * anyway.
1093 */
1094 return false;
1095 }
1096 if (max_prot & VM_PROT_EXECUTE) {
1097 /*
1098 * Potentially executable mapping: some software might want
1099 * to try and replace it to interpose their own code when a
1100 * given routine is called or returns, for example.
1101 * So let's not make it "permanent".
1102 */
1103 return false;
1104 }
1105 /*
1106 * Make this mapping "permanent" to prevent it from being deleted
1107 * and/or replaced with another mapping.
1108 */
1109 return true;
1110 }
1111
1112 #if __has_feature(ptrauth_calls)
1113
1114 /*
1115 * Determine if this task is actually using pointer signing.
1116 */
1117 static boolean_t
task_sign_pointers(task_t task)1118 task_sign_pointers(task_t task)
1119 {
1120 if (task->map &&
1121 task->map->pmap &&
1122 !task->map->pmap->disable_jop) {
1123 return TRUE;
1124 }
1125 return FALSE;
1126 }
1127
1128 /*
1129 * If the shared region contains mappings that are authenticated, then
1130 * remap them into the task private map.
1131 *
1132 * Failures are possible in this routine when jetsam kills a process
1133 * just as dyld is trying to set it up. The vm_map and task shared region
1134 * info get torn down w/o waiting for this thread to finish up.
1135 */
1136 __attribute__((noinline))
1137 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1138 vm_shared_region_auth_remap(vm_shared_region_t sr)
1139 {
1140 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1141 task_t task = current_task();
1142 vm_shared_region_slide_info_t si;
1143 uint_t i;
1144 vm_object_t object;
1145 vm_map_t sr_map;
1146 struct vm_map_entry tmp_entry_store = {0};
1147 vm_map_entry_t tmp_entry = NULL;
1148 vm_map_kernel_flags_t vmk_flags;
1149 vm_map_offset_t map_addr;
1150 kern_return_t kr = KERN_SUCCESS;
1151 boolean_t use_ptr_auth = task_sign_pointers(task);
1152
1153 /*
1154 * Don't do this more than once and avoid any race conditions in finishing it.
1155 */
1156 vm_shared_region_lock();
1157 while (sr->sr_mapping_in_progress) {
1158 /* wait for our turn... */
1159 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1160 }
1161 assert(!sr->sr_mapping_in_progress);
1162 assert(sr->sr_ref_count > 0);
1163
1164 /* Just return if already done. */
1165 if (task->shared_region_auth_remapped) {
1166 vm_shared_region_unlock();
1167 return KERN_SUCCESS;
1168 }
1169
1170 /* let others know to wait while we're working in this shared region */
1171 sr->sr_mapping_in_progress = TRUE;
1172 vm_shared_region_unlock();
1173
1174 /*
1175 * Remap any sections with pointer authentications into the private map.
1176 */
1177 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1178 si = sr->sr_auth_section[i];
1179 assert(si != NULL);
1180 assert(si->si_ptrauth);
1181
1182 /*
1183 * We have mapping that needs to be private.
1184 * Look for an existing slid mapping's pager with matching
1185 * object, offset, slide info and shared_region_id to reuse.
1186 */
1187 object = si->si_slide_object;
1188 sr_pager = shared_region_pager_match(object, si->si_start, si,
1189 use_ptr_auth ? task->jop_pid : 0);
1190 if (sr_pager == MEMORY_OBJECT_NULL) {
1191 printf("%s(): shared_region_pager_match() failed\n", __func__);
1192 kr = KERN_FAILURE;
1193 goto done;
1194 }
1195
1196 /*
1197 * verify matching jop_pid for this task and this pager
1198 */
1199 if (use_ptr_auth) {
1200 shared_region_pager_match_task_key(sr_pager, task);
1201 }
1202
1203 sr_map = vm_shared_region_vm_map(sr);
1204 tmp_entry = NULL;
1205
1206 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1207 if (kr != KERN_SUCCESS) {
1208 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1209 goto done;
1210 }
1211 tmp_entry = &tmp_entry_store;
1212
1213 /*
1214 * Check that the object exactly covers the region to slide.
1215 */
1216 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1217 printf("%s(): doesn't fully cover\n", __func__);
1218 kr = KERN_FAILURE;
1219 goto done;
1220 }
1221
1222 /*
1223 * map the pager over the portion of the mapping that needs sliding
1224 */
1225 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1226 vmk_flags.vmkf_overwrite_immutable = true;
1227 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1228 tmp_entry->max_protection);
1229
1230 map_addr = si->si_slid_address;
1231 kr = vm_map_enter_mem_object(task->map,
1232 &map_addr,
1233 si->si_end - si->si_start,
1234 (mach_vm_offset_t) 0,
1235 vmk_flags,
1236 (ipc_port_t)(uintptr_t) sr_pager,
1237 0,
1238 TRUE,
1239 tmp_entry->protection,
1240 tmp_entry->max_protection,
1241 tmp_entry->inheritance);
1242 memory_object_deallocate(sr_pager);
1243 sr_pager = MEMORY_OBJECT_NULL;
1244 if (kr != KERN_SUCCESS) {
1245 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1246 goto done;
1247 }
1248 assertf(map_addr == si->si_slid_address,
1249 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1250 (uint64_t)map_addr,
1251 (uint64_t)si->si_slid_address,
1252 tmp_entry);
1253
1254 /* Drop the ref count grabbed by find_mapping_to_slide */
1255 vm_object_deallocate(VME_OBJECT(tmp_entry));
1256 tmp_entry = NULL;
1257 }
1258
1259 done:
1260 if (tmp_entry) {
1261 /* Drop the ref count grabbed by find_mapping_to_slide */
1262 vm_object_deallocate(VME_OBJECT(tmp_entry));
1263 tmp_entry = NULL;
1264 }
1265
1266 /*
1267 * Drop any extra reference to the pager in case we're quitting due to an error above.
1268 */
1269 if (sr_pager != MEMORY_OBJECT_NULL) {
1270 memory_object_deallocate(sr_pager);
1271 }
1272
1273 /*
1274 * Mark the region as having it's auth sections remapped.
1275 */
1276 vm_shared_region_lock();
1277 task->shared_region_auth_remapped = TRUE;
1278 sr->sr_mapping_in_progress = FALSE;
1279 thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1280 vm_shared_region_unlock();
1281 return kr;
1282 }
1283 #endif /* __has_feature(ptrauth_calls) */
1284
1285 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1286 vm_shared_region_undo_mappings(
1287 vm_map_t sr_map,
1288 mach_vm_offset_t sr_base_address,
1289 struct _sr_file_mappings *srf_mappings,
1290 struct _sr_file_mappings *srf_mappings_current,
1291 unsigned int srf_current_mappings_count)
1292 {
1293 unsigned int j = 0;
1294 vm_shared_region_t shared_region = NULL;
1295 boolean_t reset_shared_region_state = FALSE;
1296 struct _sr_file_mappings *srfmp;
1297 unsigned int mappings_count;
1298 struct shared_file_mapping_slide_np *mappings;
1299
1300 shared_region = vm_shared_region_get(current_task());
1301 if (shared_region == NULL) {
1302 printf("Failed to undo mappings because of NULL shared region.\n");
1303 return;
1304 }
1305
1306 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1307
1308 if (sr_map == NULL) {
1309 ipc_port_t sr_handle;
1310 vm_named_entry_t sr_mem_entry;
1311
1312 vm_shared_region_lock();
1313 assert(shared_region->sr_ref_count > 0);
1314
1315 while (shared_region->sr_mapping_in_progress) {
1316 /* wait for our turn... */
1317 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1318 THREAD_UNINT);
1319 }
1320 assert(!shared_region->sr_mapping_in_progress);
1321 assert(shared_region->sr_ref_count > 0);
1322 /* let others know we're working in this shared region */
1323 shared_region->sr_mapping_in_progress = TRUE;
1324
1325 vm_shared_region_unlock();
1326
1327 reset_shared_region_state = TRUE;
1328
1329 /* no need to lock because this data is never modified... */
1330 sr_handle = shared_region->sr_mem_entry;
1331 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1332 sr_map = sr_mem_entry->backing.map;
1333 sr_base_address = shared_region->sr_base_address;
1334 }
1335 /*
1336 * Undo the mappings we've established so far.
1337 */
1338 for (srfmp = &srf_mappings[0];
1339 srfmp <= srf_mappings_current;
1340 srfmp++) {
1341 mappings = srfmp->mappings;
1342 mappings_count = srfmp->mappings_count;
1343 if (srfmp == srf_mappings_current) {
1344 mappings_count = srf_current_mappings_count;
1345 }
1346
1347 for (j = 0; j < mappings_count; j++) {
1348 kern_return_t kr2;
1349 mach_vm_offset_t start, end;
1350
1351 if (mappings[j].sms_size == 0) {
1352 /*
1353 * We didn't establish this
1354 * mapping, so nothing to undo.
1355 */
1356 continue;
1357 }
1358 SHARED_REGION_TRACE_INFO(
1359 ("shared_region: mapping[%d]: "
1360 "address:0x%016llx "
1361 "size:0x%016llx "
1362 "offset:0x%016llx "
1363 "maxprot:0x%x prot:0x%x: "
1364 "undoing...\n",
1365 j,
1366 (long long)mappings[j].sms_address,
1367 (long long)mappings[j].sms_size,
1368 (long long)mappings[j].sms_file_offset,
1369 mappings[j].sms_max_prot,
1370 mappings[j].sms_init_prot));
1371 start = (mappings[j].sms_address - sr_base_address);
1372 end = start + mappings[j].sms_size;
1373 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1374 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1375 kr2 = vm_map_remove_guard(sr_map,
1376 start,
1377 end,
1378 VM_MAP_REMOVE_IMMUTABLE,
1379 KMEM_GUARD_NONE).kmr_return;
1380 assert(kr2 == KERN_SUCCESS);
1381 }
1382 }
1383
1384 if (reset_shared_region_state) {
1385 vm_shared_region_lock();
1386 assert(shared_region->sr_ref_count > 0);
1387 assert(shared_region->sr_mapping_in_progress);
1388 /* we're done working on that shared region */
1389 shared_region->sr_mapping_in_progress = FALSE;
1390 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1391 vm_shared_region_unlock();
1392 reset_shared_region_state = FALSE;
1393 }
1394
1395 vm_shared_region_deallocate(shared_region);
1396 }
1397
1398 /*
1399 * First part of vm_shared_region_map_file(). Split out to
1400 * avoid kernel stack overflow.
1401 */
1402 __attribute__((noinline))
1403 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1404 vm_shared_region_map_file_setup(
1405 vm_shared_region_t shared_region,
1406 int sr_file_mappings_count,
1407 struct _sr_file_mappings *sr_file_mappings,
1408 unsigned int *mappings_to_slide_cnt,
1409 struct shared_file_mapping_slide_np **mappings_to_slide,
1410 mach_vm_offset_t *slid_mappings,
1411 memory_object_control_t *slid_file_controls,
1412 mach_vm_offset_t *sfm_min_address,
1413 mach_vm_offset_t *sfm_max_address,
1414 vm_map_t *sr_map_ptr,
1415 vm_map_offset_t *lowest_unnestable_addr_ptr,
1416 unsigned int vmsr_num_slides)
1417 {
1418 kern_return_t kr = KERN_SUCCESS;
1419 memory_object_control_t file_control;
1420 vm_object_t file_object;
1421 ipc_port_t sr_handle;
1422 vm_named_entry_t sr_mem_entry;
1423 vm_map_t sr_map;
1424 mach_vm_offset_t sr_base_address;
1425 unsigned int i = 0;
1426 mach_port_t map_port;
1427 vm_map_offset_t target_address;
1428 vm_object_t object;
1429 vm_object_size_t obj_size;
1430 vm_map_offset_t lowest_unnestable_addr = 0;
1431 vm_map_kernel_flags_t vmk_flags;
1432 mach_vm_offset_t sfm_end;
1433 uint32_t mappings_count;
1434 struct shared_file_mapping_slide_np *mappings;
1435 struct _sr_file_mappings *srfmp;
1436
1437 vm_shared_region_lock();
1438 assert(shared_region->sr_ref_count > 0);
1439
1440 /*
1441 * Make sure we handle only one mapping at a time in a given
1442 * shared region, to avoid race conditions. This should not
1443 * happen frequently...
1444 */
1445 while (shared_region->sr_mapping_in_progress) {
1446 /* wait for our turn... */
1447 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1448 THREAD_UNINT);
1449 }
1450 assert(!shared_region->sr_mapping_in_progress);
1451 assert(shared_region->sr_ref_count > 0);
1452
1453
1454 /* let others know we're working in this shared region */
1455 shared_region->sr_mapping_in_progress = TRUE;
1456
1457 /*
1458 * Did someone race in and map this shared region already?
1459 */
1460 if (shared_region->sr_first_mapping != -1) {
1461 vm_shared_region_unlock();
1462 #if DEVELOPMENT || DEBUG
1463 printf("shared_region: caught race in map and slide\n");
1464 #endif /* DEVELOPMENT || DEBUG */
1465 return KERN_FAILURE;
1466 }
1467
1468 vm_shared_region_unlock();
1469
1470 /* no need to lock because this data is never modified... */
1471 sr_handle = shared_region->sr_mem_entry;
1472 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1473 sr_map = sr_mem_entry->backing.map;
1474 sr_base_address = shared_region->sr_base_address;
1475
1476 SHARED_REGION_TRACE_DEBUG(
1477 ("shared_region: -> map(%p)\n",
1478 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1479
1480 mappings_count = 0;
1481 mappings = NULL;
1482 srfmp = NULL;
1483
1484 /* process all the files to be mapped */
1485 for (srfmp = &sr_file_mappings[0];
1486 srfmp < &sr_file_mappings[sr_file_mappings_count];
1487 srfmp++) {
1488 mappings_count = srfmp->mappings_count;
1489 mappings = srfmp->mappings;
1490 file_control = srfmp->file_control;
1491
1492 if (mappings_count == 0) {
1493 /* no mappings here... */
1494 continue;
1495 }
1496
1497 /*
1498 * The code below can only correctly "slide" (perform relocations) for one
1499 * value of the slide amount. So if a file has a non-zero slide, it has to
1500 * match any previous value. A zero slide value is ok for things that are
1501 * just directly mapped.
1502 */
1503 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1504 shared_region->sr_slide = srfmp->slide;
1505 } else if (shared_region->sr_slide != 0 &&
1506 srfmp->slide != 0 &&
1507 shared_region->sr_slide != srfmp->slide) {
1508 SHARED_REGION_TRACE_ERROR(
1509 ("shared_region: more than 1 non-zero slide value amount "
1510 "slide 1:0x%x slide 2:0x%x\n ",
1511 shared_region->sr_slide, srfmp->slide));
1512 kr = KERN_INVALID_ARGUMENT;
1513 break;
1514 }
1515
1516 #if __arm64__
1517 if ((shared_region->sr_64bit ||
1518 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1519 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1520 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1521 __FUNCTION__, srfmp->slide);
1522 kr = KERN_INVALID_ARGUMENT;
1523 break;
1524 }
1525 #endif /* __arm64__ */
1526
1527 /*
1528 * An FD of -1 means we need to copyin the data to an anonymous object.
1529 */
1530 if (srfmp->fd == -1) {
1531 assert(mappings_count == 1);
1532 SHARED_REGION_TRACE_INFO(
1533 ("shared_region: mapping[0]: "
1534 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1535 "maxprot:0x%x prot:0x%x fd==-1\n",
1536 (long long)mappings[0].sms_address,
1537 (long long)mappings[0].sms_size,
1538 (long long)mappings[0].sms_file_offset,
1539 mappings[0].sms_max_prot,
1540 mappings[0].sms_init_prot));
1541
1542 /*
1543 * We need an anon object to hold the data in the shared region.
1544 * The size needs to be suitable to map into kernel.
1545 */
1546 obj_size = vm_object_round_page(mappings->sms_size);
1547 object = vm_object_allocate(obj_size);
1548 if (object == VM_OBJECT_NULL) {
1549 printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1550 kr = KERN_RESOURCE_SHORTAGE;
1551 break;
1552 }
1553
1554 /*
1555 * map the object into the kernel
1556 */
1557 vm_map_offset_t kaddr = 0;
1558 vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1559 vmk_flags.vmkf_no_copy_on_read = 1;
1560 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1561
1562 kr = vm_map_enter(kernel_map,
1563 &kaddr,
1564 obj_size,
1565 0,
1566 vmk_flags,
1567 object,
1568 0,
1569 FALSE,
1570 (VM_PROT_READ | VM_PROT_WRITE),
1571 (VM_PROT_READ | VM_PROT_WRITE),
1572 VM_INHERIT_NONE);
1573 if (kr != KERN_SUCCESS) {
1574 printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1575 vm_object_deallocate(object);
1576 object = VM_OBJECT_NULL;
1577 break;
1578 }
1579
1580 /*
1581 * We'll need another reference to keep the object alive after
1582 * we vm_map_remove() it from the kernel.
1583 */
1584 vm_object_reference(object);
1585
1586 /*
1587 * Zero out the object's pages, so we can't leak data.
1588 */
1589 bzero((void *)kaddr, obj_size);
1590
1591 /*
1592 * Copyin the data from dyld to the new object.
1593 * Then remove the kernel mapping.
1594 */
1595 int copyin_err =
1596 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1597 vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1598 if (copyin_err) {
1599 printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1600 switch (copyin_err) {
1601 case EPERM:
1602 case EACCES:
1603 kr = KERN_PROTECTION_FAILURE;
1604 break;
1605 case EFAULT:
1606 kr = KERN_INVALID_ADDRESS;
1607 break;
1608 default:
1609 kr = KERN_FAILURE;
1610 break;
1611 }
1612 vm_object_deallocate(object);
1613 object = VM_OBJECT_NULL;
1614 break;
1615 }
1616
1617 /*
1618 * Finally map the object into the shared region.
1619 */
1620 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1621 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1622 vmk_flags.vmkf_already = TRUE;
1623 vmk_flags.vmkf_no_copy_on_read = 1;
1624 vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1625 mappings[0].sms_max_prot);
1626
1627 kr = vm_map_enter(
1628 sr_map,
1629 &target_address,
1630 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1631 0,
1632 vmk_flags,
1633 object,
1634 0,
1635 TRUE,
1636 mappings[0].sms_init_prot & VM_PROT_ALL,
1637 mappings[0].sms_max_prot & VM_PROT_ALL,
1638 VM_INHERIT_DEFAULT);
1639 if (kr != KERN_SUCCESS) {
1640 printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1641 vm_object_deallocate(object);
1642 break;
1643 }
1644
1645 if (mappings[0].sms_address < *sfm_min_address) {
1646 *sfm_min_address = mappings[0].sms_address;
1647 }
1648
1649 if (os_add_overflow(mappings[0].sms_address,
1650 mappings[0].sms_size,
1651 &sfm_end) ||
1652 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1653 mappings[0].sms_address)) {
1654 /* overflow */
1655 kr = KERN_INVALID_ARGUMENT;
1656 break;
1657 }
1658
1659 if (sfm_end > *sfm_max_address) {
1660 *sfm_max_address = sfm_end;
1661 }
1662
1663 continue;
1664 }
1665
1666 /* get the VM object associated with the file to be mapped */
1667 file_object = memory_object_control_to_vm_object(file_control);
1668 assert(file_object);
1669
1670 if (!file_object->object_is_shared_cache) {
1671 vm_object_lock(file_object);
1672 file_object->object_is_shared_cache = true;
1673 vm_object_unlock(file_object);
1674 }
1675
1676 #if CONFIG_SECLUDED_MEMORY
1677 /*
1678 * Camera will need the shared cache, so don't put the pages
1679 * on the secluded queue, assume that's the primary region.
1680 * Also keep DEXT shared cache pages off secluded.
1681 */
1682 if (primary_system_shared_region == NULL ||
1683 primary_system_shared_region == shared_region ||
1684 shared_region->sr_driverkit) {
1685 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1686 }
1687 #endif /* CONFIG_SECLUDED_MEMORY */
1688
1689 /* establish the mappings for that file */
1690 for (i = 0; i < mappings_count; i++) {
1691 SHARED_REGION_TRACE_INFO(
1692 ("shared_region: mapping[%d]: "
1693 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1694 "maxprot:0x%x prot:0x%x\n",
1695 i,
1696 (long long)mappings[i].sms_address,
1697 (long long)mappings[i].sms_size,
1698 (long long)mappings[i].sms_file_offset,
1699 mappings[i].sms_max_prot,
1700 mappings[i].sms_init_prot));
1701
1702 if (mappings[i].sms_address < *sfm_min_address) {
1703 *sfm_min_address = mappings[i].sms_address;
1704 }
1705
1706 if (os_add_overflow(mappings[i].sms_address,
1707 mappings[i].sms_size,
1708 &sfm_end) ||
1709 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1710 mappings[i].sms_address)) {
1711 /* overflow */
1712 kr = KERN_INVALID_ARGUMENT;
1713 break;
1714 }
1715
1716 if (sfm_end > *sfm_max_address) {
1717 *sfm_max_address = sfm_end;
1718 }
1719
1720 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1721 /* zero-filled memory */
1722 map_port = MACH_PORT_NULL;
1723 } else {
1724 /* file-backed memory */
1725 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1726 }
1727
1728 /*
1729 * Remember which mappings need sliding.
1730 */
1731 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1732 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1733 SHARED_REGION_TRACE_INFO(
1734 ("shared_region: mapping[%d]: "
1735 "address:0x%016llx size:0x%016llx "
1736 "offset:0x%016llx "
1737 "maxprot:0x%x prot:0x%x "
1738 "too many mappings to slide...\n",
1739 i,
1740 (long long)mappings[i].sms_address,
1741 (long long)mappings[i].sms_size,
1742 (long long)mappings[i].sms_file_offset,
1743 mappings[i].sms_max_prot,
1744 mappings[i].sms_init_prot));
1745 } else {
1746 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1747 *mappings_to_slide_cnt += 1;
1748 }
1749 }
1750
1751 /* mapping's address is relative to the shared region base */
1752 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1753
1754 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1755 vmk_flags.vmkf_already = TRUE;
1756 /* no copy-on-read for mapped binaries */
1757 vmk_flags.vmkf_no_copy_on_read = 1;
1758 vmk_flags.vmf_permanent = shared_region_make_permanent(
1759 shared_region,
1760 mappings[i].sms_max_prot);
1761
1762
1763 /* establish that mapping, OK if it's "already" there */
1764 if (map_port == MACH_PORT_NULL) {
1765 /*
1766 * We want to map some anonymous memory in a shared region.
1767 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1768 */
1769 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1770 object = vm_object_allocate(obj_size);
1771 if (object == VM_OBJECT_NULL) {
1772 kr = KERN_RESOURCE_SHORTAGE;
1773 } else {
1774 kr = vm_map_enter(
1775 sr_map,
1776 &target_address,
1777 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1778 0,
1779 vmk_flags,
1780 object,
1781 0,
1782 TRUE,
1783 mappings[i].sms_init_prot & VM_PROT_ALL,
1784 mappings[i].sms_max_prot & VM_PROT_ALL,
1785 VM_INHERIT_DEFAULT);
1786 }
1787 } else {
1788 object = VM_OBJECT_NULL; /* no anonymous memory here */
1789 kr = vm_map_enter_mem_object(
1790 sr_map,
1791 &target_address,
1792 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1793 0,
1794 vmk_flags,
1795 map_port,
1796 mappings[i].sms_file_offset,
1797 TRUE,
1798 mappings[i].sms_init_prot & VM_PROT_ALL,
1799 mappings[i].sms_max_prot & VM_PROT_ALL,
1800 VM_INHERIT_DEFAULT);
1801 }
1802
1803 if (kr == KERN_SUCCESS) {
1804 /*
1805 * Record the first successful mapping(s) in the shared
1806 * region by file. We're protected by "sr_mapping_in_progress"
1807 * here, so no need to lock "shared_region".
1808 *
1809 * Note that if we have an AOT shared cache (ARM) for a
1810 * translated task, then it's always the first file.
1811 * The original "native" (i.e. x86) shared cache is the
1812 * second file.
1813 */
1814
1815 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1816 shared_region->sr_first_mapping = target_address;
1817 }
1818
1819 if (*mappings_to_slide_cnt > 0 &&
1820 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1821 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1822 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1823 }
1824
1825 /*
1826 * Record the lowest writable address in this
1827 * sub map, to log any unexpected unnesting below
1828 * that address (see log_unnest_badness()).
1829 */
1830 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1831 sr_map->is_nested_map &&
1832 (lowest_unnestable_addr == 0 ||
1833 (target_address < lowest_unnestable_addr))) {
1834 lowest_unnestable_addr = target_address;
1835 }
1836 } else {
1837 if (map_port == MACH_PORT_NULL) {
1838 /*
1839 * Get rid of the VM object we just created
1840 * but failed to map.
1841 */
1842 vm_object_deallocate(object);
1843 object = VM_OBJECT_NULL;
1844 }
1845 if (kr == KERN_MEMORY_PRESENT) {
1846 /*
1847 * This exact mapping was already there:
1848 * that's fine.
1849 */
1850 SHARED_REGION_TRACE_INFO(
1851 ("shared_region: mapping[%d]: "
1852 "address:0x%016llx size:0x%016llx "
1853 "offset:0x%016llx "
1854 "maxprot:0x%x prot:0x%x "
1855 "already mapped...\n",
1856 i,
1857 (long long)mappings[i].sms_address,
1858 (long long)mappings[i].sms_size,
1859 (long long)mappings[i].sms_file_offset,
1860 mappings[i].sms_max_prot,
1861 mappings[i].sms_init_prot));
1862 /*
1863 * We didn't establish this mapping ourselves;
1864 * let's reset its size, so that we do not
1865 * attempt to undo it if an error occurs later.
1866 */
1867 mappings[i].sms_size = 0;
1868 kr = KERN_SUCCESS;
1869 } else {
1870 break;
1871 }
1872 }
1873 }
1874
1875 if (kr != KERN_SUCCESS) {
1876 break;
1877 }
1878 }
1879
1880 if (kr != KERN_SUCCESS) {
1881 /* the last mapping we tried (mappings[i]) failed ! */
1882 assert(i < mappings_count);
1883 SHARED_REGION_TRACE_ERROR(
1884 ("shared_region: mapping[%d]: "
1885 "address:0x%016llx size:0x%016llx "
1886 "offset:0x%016llx "
1887 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1888 i,
1889 (long long)mappings[i].sms_address,
1890 (long long)mappings[i].sms_size,
1891 (long long)mappings[i].sms_file_offset,
1892 mappings[i].sms_max_prot,
1893 mappings[i].sms_init_prot,
1894 kr));
1895
1896 /*
1897 * Respect the design of vm_shared_region_undo_mappings
1898 * as we are holding the sr_mapping_in_progress == true here.
1899 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1900 * will be blocked at waiting sr_mapping_in_progress to be false.
1901 */
1902 assert(sr_map != NULL);
1903 /* undo all the previous mappings */
1904 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1905 return kr;
1906 }
1907
1908 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1909 *sr_map_ptr = sr_map;
1910 return KERN_SUCCESS;
1911 }
1912
1913 /* forwared declaration */
1914 __attribute__((noinline))
1915 static void
1916 vm_shared_region_map_file_final(
1917 vm_shared_region_t shared_region,
1918 vm_map_t sr_map,
1919 mach_vm_offset_t sfm_min_address,
1920 mach_vm_offset_t sfm_max_address);
1921
1922 /*
1923 * Establish some mappings of a file in the shared region.
1924 * This is used by "dyld" via the shared_region_map_np() system call
1925 * to populate the shared region with the appropriate shared cache.
1926 *
1927 * One could also call it several times to incrementally load several
1928 * libraries, as long as they do not overlap.
1929 * It will return KERN_SUCCESS if the mappings were successfully established
1930 * or if they were already established identically by another process.
1931 */
1932 __attribute__((noinline))
1933 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1934 vm_shared_region_map_file(
1935 vm_shared_region_t shared_region,
1936 int sr_file_mappings_count,
1937 struct _sr_file_mappings *sr_file_mappings)
1938 {
1939 kern_return_t kr = KERN_SUCCESS;
1940 unsigned int i;
1941 unsigned int mappings_to_slide_cnt = 0;
1942 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1943 mach_vm_offset_t sfm_max_address = 0;
1944 vm_map_t sr_map = NULL;
1945 vm_map_offset_t lowest_unnestable_addr = 0;
1946 unsigned int vmsr_num_slides = 0;
1947 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1948 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1949 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1950 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1951 struct _sr_file_mappings *srfmp;
1952
1953 /*
1954 * Figure out how many of the mappings have slides.
1955 */
1956 for (srfmp = &sr_file_mappings[0];
1957 srfmp < &sr_file_mappings[sr_file_mappings_count];
1958 srfmp++) {
1959 for (i = 0; i < srfmp->mappings_count; ++i) {
1960 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1961 ++vmsr_num_slides;
1962 }
1963 }
1964 }
1965
1966 /* Allocate per slide data structures */
1967 if (vmsr_num_slides > 0) {
1968 slid_mappings =
1969 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1970 slid_file_controls =
1971 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1972 mappings_to_slide =
1973 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1974 }
1975
1976 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1977 &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
1978 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
1979 if (kr != KERN_SUCCESS) {
1980 vm_shared_region_lock();
1981 goto done;
1982 }
1983 assert(vmsr_num_slides == mappings_to_slide_cnt);
1984
1985 /*
1986 * The call above installed direct mappings to the shared cache file.
1987 * Now we go back and overwrite the mappings that need relocation
1988 * with a special shared region pager.
1989 *
1990 * Note that this does copyin() of data, needed by the pager, which
1991 * the previous code just established mappings for. This is why we
1992 * do it in a separate pass.
1993 */
1994 #if __has_feature(ptrauth_calls)
1995 /*
1996 * need to allocate storage needed for any sr_auth_sections
1997 */
1998 for (i = 0; i < mappings_to_slide_cnt; ++i) {
1999 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2000 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2001 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2002 ++shared_region->sr_num_auth_section;
2003 }
2004 }
2005 if (shared_region->sr_num_auth_section > 0) {
2006 shared_region->sr_auth_section =
2007 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2008 Z_WAITOK | Z_ZERO);
2009 }
2010 #endif /* __has_feature(ptrauth_calls) */
2011 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2012 kr = vm_shared_region_slide(shared_region->sr_slide,
2013 mappings_to_slide[i]->sms_file_offset,
2014 mappings_to_slide[i]->sms_size,
2015 mappings_to_slide[i]->sms_slide_start,
2016 mappings_to_slide[i]->sms_slide_size,
2017 slid_mappings[i],
2018 slid_file_controls[i],
2019 mappings_to_slide[i]->sms_max_prot);
2020 if (kr != KERN_SUCCESS) {
2021 SHARED_REGION_TRACE_ERROR(
2022 ("shared_region: region_slide("
2023 "slide:0x%x start:0x%016llx "
2024 "size:0x%016llx) failed 0x%x\n",
2025 shared_region->sr_slide,
2026 (long long)mappings_to_slide[i]->sms_slide_start,
2027 (long long)mappings_to_slide[i]->sms_slide_size,
2028 kr));
2029 vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2030 &sr_file_mappings[0],
2031 &sr_file_mappings[sr_file_mappings_count - 1],
2032 sr_file_mappings_count);
2033 vm_shared_region_lock();
2034 goto done;
2035 }
2036 }
2037
2038 assert(kr == KERN_SUCCESS);
2039
2040 /* adjust the map's "lowest_unnestable_start" */
2041 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2042 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2043 vm_map_lock(sr_map);
2044 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2045 vm_map_unlock(sr_map);
2046 }
2047
2048 vm_shared_region_lock();
2049 assert(shared_region->sr_ref_count > 0);
2050 assert(shared_region->sr_mapping_in_progress);
2051
2052 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2053
2054 done:
2055 /*
2056 * We're done working on that shared region.
2057 * Wake up any waiting threads.
2058 */
2059 shared_region->sr_mapping_in_progress = FALSE;
2060 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2061 vm_shared_region_unlock();
2062
2063 #if __has_feature(ptrauth_calls)
2064 if (kr == KERN_SUCCESS) {
2065 /*
2066 * Since authenticated mappings were just added to the shared region,
2067 * go back and remap them into private mappings for this task.
2068 */
2069 kr = vm_shared_region_auth_remap(shared_region);
2070 }
2071 #endif /* __has_feature(ptrauth_calls) */
2072
2073 /* Cache shared region info needed for telemetry in the task */
2074 task_t task;
2075 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2076 mach_vm_offset_t start_address;
2077 (void)vm_shared_region_start_address(shared_region, &start_address, task);
2078 }
2079
2080 SHARED_REGION_TRACE_DEBUG(
2081 ("shared_region: map(%p) <- 0x%x \n",
2082 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2083 if (vmsr_num_slides > 0) {
2084 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2085 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2086 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2087 mappings_to_slide);
2088 }
2089 return kr;
2090 }
2091
2092 /*
2093 * Final part of vm_shared_region_map_file().
2094 * Kept in separate function to avoid blowing out the stack.
2095 */
2096 __attribute__((noinline))
2097 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2098 vm_shared_region_map_file_final(
2099 vm_shared_region_t shared_region,
2100 vm_map_t sr_map __unused,
2101 mach_vm_offset_t sfm_min_address __unused,
2102 mach_vm_offset_t sfm_max_address __unused)
2103 {
2104 struct _dyld_cache_header sr_cache_header;
2105 int error;
2106 size_t image_array_length;
2107 struct _dyld_cache_image_text_info *sr_image_layout;
2108 boolean_t locally_built = FALSE;
2109
2110
2111 /*
2112 * copy in the shared region UUID to the shared region structure.
2113 * we do this indirectly by first copying in the shared cache header
2114 * and then copying the UUID from there because we'll need to look
2115 * at other content from the shared cache header.
2116 */
2117 if (!shared_region->sr_uuid_copied) {
2118 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2119 (char *)&sr_cache_header,
2120 sizeof(sr_cache_header));
2121 if (error == 0) {
2122 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2123 shared_region->sr_uuid_copied = TRUE;
2124 locally_built = sr_cache_header.locallyBuiltCache;
2125 } else {
2126 #if DEVELOPMENT || DEBUG
2127 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2128 "offset:0 size:0x%016llx) failed with %d\n",
2129 (long long)shared_region->sr_base_address,
2130 (long long)shared_region->sr_first_mapping,
2131 (long long)sizeof(sr_cache_header),
2132 error);
2133 #endif /* DEVELOPMENT || DEBUG */
2134 shared_region->sr_uuid_copied = FALSE;
2135 }
2136 }
2137
2138 /*
2139 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2140 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2141 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2142 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2143 * let the most recent one win.
2144 *
2145 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2146 */
2147 bool is_init_task = (task_pid(current_task()) == 1);
2148 if (shared_region->sr_uuid_copied && is_init_task) {
2149 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2150 if (locally_built) {
2151 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2152 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2153 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2154 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2155 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2156 if (error == 0) {
2157 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2158 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2159 }
2160 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2161 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2162 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2163 sizeof(shared_region->sr_images[index].imageUUID));
2164 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2165 }
2166
2167 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2168 } else {
2169 #if DEVELOPMENT || DEBUG
2170 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2171 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2172 (long long)shared_region->sr_base_address,
2173 (long long)shared_region->sr_first_mapping,
2174 (long long)sr_cache_header.imagesTextOffset,
2175 (long long)image_array_length,
2176 error);
2177 #endif /* DEVELOPMENT || DEBUG */
2178 }
2179 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2180 kfree_data(sr_image_layout, image_array_length);
2181 sr_image_layout = NULL;
2182 }
2183 primary_system_shared_region = shared_region;
2184 }
2185
2186 /*
2187 * If we succeeded, we know the bounds of the shared region.
2188 * Trim our pmaps to only cover this range (if applicable to
2189 * this platform).
2190 */
2191 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2192 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2193 }
2194 }
2195
2196 /*
2197 * Retrieve a task's shared region and grab an extra reference to
2198 * make sure it doesn't disappear while the caller is using it.
2199 * The caller is responsible for consuming that extra reference if
2200 * necessary.
2201 *
2202 * This also tries to trim the pmap for the shared region.
2203 */
2204 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2205 vm_shared_region_trim_and_get(task_t task)
2206 {
2207 vm_shared_region_t shared_region;
2208 ipc_port_t sr_handle;
2209 vm_named_entry_t sr_mem_entry;
2210 vm_map_t sr_map;
2211
2212 /* Get the shared region and the map. */
2213 shared_region = vm_shared_region_get(task);
2214 if (shared_region == NULL) {
2215 return NULL;
2216 }
2217
2218 sr_handle = shared_region->sr_mem_entry;
2219 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2220 sr_map = sr_mem_entry->backing.map;
2221
2222 /* Trim the pmap if possible. */
2223 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2224 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2225 }
2226
2227 return shared_region;
2228 }
2229
2230 /*
2231 * Enter the appropriate shared region into "map" for "task".
2232 * This involves looking up the shared region (and possibly creating a new
2233 * one) for the desired environment, then mapping the VM sub map into the
2234 * task's VM "map", with the appropriate level of pmap-nesting.
2235 */
2236 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2237 vm_shared_region_enter(
2238 struct _vm_map *map,
2239 struct task *task,
2240 boolean_t is_64bit,
2241 void *fsroot,
2242 cpu_type_t cpu,
2243 cpu_subtype_t cpu_subtype,
2244 boolean_t reslide,
2245 boolean_t is_driverkit,
2246 uint32_t rsr_version)
2247 {
2248 kern_return_t kr;
2249 vm_shared_region_t shared_region;
2250 vm_map_offset_t sr_address, sr_offset, target_address;
2251 vm_map_size_t sr_size, mapping_size;
2252 vm_map_offset_t sr_pmap_nesting_start;
2253 vm_map_size_t sr_pmap_nesting_size;
2254 ipc_port_t sr_handle;
2255 vm_prot_t cur_prot, max_prot;
2256 vm_map_kernel_flags_t vmk_flags;
2257
2258 SHARED_REGION_TRACE_DEBUG(
2259 ("shared_region: -> "
2260 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2261 (void *)VM_KERNEL_ADDRPERM(map),
2262 (void *)VM_KERNEL_ADDRPERM(task),
2263 (void *)VM_KERNEL_ADDRPERM(fsroot),
2264 cpu, cpu_subtype, is_64bit, is_driverkit));
2265
2266 /* lookup (create if needed) the shared region for this environment */
2267 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2268 if (shared_region == NULL) {
2269 /* this should not happen ! */
2270 SHARED_REGION_TRACE_ERROR(
2271 ("shared_region: -> "
2272 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2273 "lookup failed !\n",
2274 (void *)VM_KERNEL_ADDRPERM(map),
2275 (void *)VM_KERNEL_ADDRPERM(task),
2276 (void *)VM_KERNEL_ADDRPERM(fsroot),
2277 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2278 //panic("shared_region_enter: lookup failed");
2279 return KERN_FAILURE;
2280 }
2281
2282 kr = KERN_SUCCESS;
2283 /* no need to lock since this data is never modified */
2284 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2285 sr_size = (vm_map_size_t)shared_region->sr_size;
2286 sr_handle = shared_region->sr_mem_entry;
2287 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2288 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2289 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2290
2291 cur_prot = VM_PROT_READ;
2292 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2293 /*
2294 * XXX BINARY COMPATIBILITY
2295 * java6 apparently needs to modify some code in the
2296 * dyld shared cache and needs to be allowed to add
2297 * write access...
2298 */
2299 max_prot = VM_PROT_ALL;
2300 } else {
2301 max_prot = VM_PROT_READ;
2302 /* make it "permanent" to protect against re-mappings */
2303 vmk_flags.vmf_permanent = true;
2304 }
2305
2306 /*
2307 * Start mapping the shared region's VM sub map into the task's VM map.
2308 */
2309 sr_offset = 0;
2310
2311 if (sr_pmap_nesting_start > sr_address) {
2312 /* we need to map a range without pmap-nesting first */
2313 target_address = sr_address;
2314 mapping_size = sr_pmap_nesting_start - sr_address;
2315 kr = vm_map_enter_mem_object(
2316 map,
2317 &target_address,
2318 mapping_size,
2319 0,
2320 vmk_flags,
2321 sr_handle,
2322 sr_offset,
2323 TRUE,
2324 cur_prot,
2325 max_prot,
2326 VM_INHERIT_SHARE);
2327 if (kr != KERN_SUCCESS) {
2328 SHARED_REGION_TRACE_ERROR(
2329 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2330 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2331 (void *)VM_KERNEL_ADDRPERM(map),
2332 (void *)VM_KERNEL_ADDRPERM(task),
2333 (void *)VM_KERNEL_ADDRPERM(fsroot),
2334 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2335 (long long)target_address,
2336 (long long)mapping_size,
2337 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2338 goto done;
2339 }
2340 SHARED_REGION_TRACE_DEBUG(
2341 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2342 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2343 (void *)VM_KERNEL_ADDRPERM(map),
2344 (void *)VM_KERNEL_ADDRPERM(task),
2345 (void *)VM_KERNEL_ADDRPERM(fsroot),
2346 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2347 (long long)target_address, (long long)mapping_size,
2348 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2349 sr_offset += mapping_size;
2350 sr_size -= mapping_size;
2351 }
2352
2353 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2354 vmk_flags.vmkf_nested_pmap = true;
2355 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2356
2357 /*
2358 * Use pmap-nesting to map the majority of the shared region into the task's
2359 * VM space. Very rarely will architectures have a shared region that isn't
2360 * the same size as the pmap-nesting region, or start at a different address
2361 * than the pmap-nesting region, so this code will map the entirety of the
2362 * shared region for most architectures.
2363 */
2364 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2365 target_address = sr_pmap_nesting_start;
2366 kr = vm_map_enter_mem_object(
2367 map,
2368 &target_address,
2369 sr_pmap_nesting_size,
2370 0,
2371 vmk_flags,
2372 sr_handle,
2373 sr_offset,
2374 TRUE,
2375 cur_prot,
2376 max_prot,
2377 VM_INHERIT_SHARE);
2378 if (kr != KERN_SUCCESS) {
2379 SHARED_REGION_TRACE_ERROR(
2380 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2381 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2382 (void *)VM_KERNEL_ADDRPERM(map),
2383 (void *)VM_KERNEL_ADDRPERM(task),
2384 (void *)VM_KERNEL_ADDRPERM(fsroot),
2385 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2386 (long long)target_address,
2387 (long long)sr_pmap_nesting_size,
2388 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2389 goto done;
2390 }
2391 SHARED_REGION_TRACE_DEBUG(
2392 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2393 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2394 (void *)VM_KERNEL_ADDRPERM(map),
2395 (void *)VM_KERNEL_ADDRPERM(task),
2396 (void *)VM_KERNEL_ADDRPERM(fsroot),
2397 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2398 (long long)target_address, (long long)sr_pmap_nesting_size,
2399 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2400
2401 sr_offset += sr_pmap_nesting_size;
2402 sr_size -= sr_pmap_nesting_size;
2403
2404 if (sr_size > 0) {
2405 /* and there's some left to be mapped without pmap-nesting */
2406 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2407 target_address = sr_address + sr_offset;
2408 mapping_size = sr_size;
2409 kr = vm_map_enter_mem_object(
2410 map,
2411 &target_address,
2412 mapping_size,
2413 0,
2414 VM_MAP_KERNEL_FLAGS_FIXED(),
2415 sr_handle,
2416 sr_offset,
2417 TRUE,
2418 cur_prot,
2419 max_prot,
2420 VM_INHERIT_SHARE);
2421 if (kr != KERN_SUCCESS) {
2422 SHARED_REGION_TRACE_ERROR(
2423 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2424 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2425 (void *)VM_KERNEL_ADDRPERM(map),
2426 (void *)VM_KERNEL_ADDRPERM(task),
2427 (void *)VM_KERNEL_ADDRPERM(fsroot),
2428 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2429 (long long)target_address,
2430 (long long)mapping_size,
2431 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2432 goto done;
2433 }
2434 SHARED_REGION_TRACE_DEBUG(
2435 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2436 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2437 (void *)VM_KERNEL_ADDRPERM(map),
2438 (void *)VM_KERNEL_ADDRPERM(task),
2439 (void *)VM_KERNEL_ADDRPERM(fsroot),
2440 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2441 (long long)target_address, (long long)mapping_size,
2442 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2443 sr_offset += mapping_size;
2444 sr_size -= mapping_size;
2445 }
2446 assert(sr_size == 0);
2447
2448 done:
2449 if (kr == KERN_SUCCESS) {
2450 /* let the task use that shared region */
2451 vm_shared_region_set(task, shared_region);
2452 } else {
2453 /* drop our reference since we're not using it */
2454 vm_shared_region_deallocate(shared_region);
2455 vm_shared_region_set(task, NULL);
2456 }
2457
2458 SHARED_REGION_TRACE_DEBUG(
2459 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2460 (void *)VM_KERNEL_ADDRPERM(map),
2461 (void *)VM_KERNEL_ADDRPERM(task),
2462 (void *)VM_KERNEL_ADDRPERM(fsroot),
2463 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2464 kr));
2465 return kr;
2466 }
2467
2468 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2469 vm_shared_region_remove(
2470 task_t task,
2471 vm_shared_region_t sr)
2472 {
2473 vm_map_t map;
2474 mach_vm_offset_t start;
2475 mach_vm_size_t size;
2476 vm_map_kernel_flags_t vmk_flags;
2477 kern_return_t kr;
2478
2479 if (sr == NULL) {
2480 return;
2481 }
2482 map = get_task_map(task);
2483 start = sr->sr_base_address;
2484 size = sr->sr_size;
2485
2486 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2487 vmk_flags.vmkf_overwrite_immutable = true;
2488 vmk_flags.vm_tag = VM_MEMORY_DYLD;
2489
2490 /* range_id is set by mach_vm_map_kernel */
2491 kr = mach_vm_map_kernel(map,
2492 &start,
2493 size,
2494 0, /* mask */
2495 vmk_flags,
2496 MACH_PORT_NULL,
2497 0,
2498 FALSE, /* copy */
2499 VM_PROT_NONE,
2500 VM_PROT_NONE,
2501 VM_INHERIT_DEFAULT);
2502 if (kr != KERN_SUCCESS) {
2503 printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2504 }
2505 }
2506
2507 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2508
2509 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2510 vm_shared_region_sliding_valid(uint32_t slide)
2511 {
2512 kern_return_t kr = KERN_SUCCESS;
2513 vm_shared_region_t sr = vm_shared_region_get(current_task());
2514
2515 /* No region yet? we're fine. */
2516 if (sr == NULL) {
2517 return kr;
2518 }
2519
2520 if (sr->sr_slide != 0 && slide != 0) {
2521 if (slide == sr->sr_slide) {
2522 /*
2523 * Request for sliding when we've
2524 * already done it with exactly the
2525 * same slide value before.
2526 * This isn't wrong technically but
2527 * we don't want to slide again and
2528 * so we return this value.
2529 */
2530 kr = KERN_INVALID_ARGUMENT;
2531 } else {
2532 printf("Mismatched shared region slide\n");
2533 kr = KERN_FAILURE;
2534 }
2535 }
2536 vm_shared_region_deallocate(sr);
2537 return kr;
2538 }
2539
2540 /*
2541 * Actually create (really overwrite) the mapping to part of the shared cache which
2542 * undergoes relocation. This routine reads in the relocation info from dyld and
2543 * verifies it. It then creates a (or finds a matching) shared region pager which
2544 * handles the actual modification of the page contents and installs the mapping
2545 * using that pager.
2546 */
2547 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2548 vm_shared_region_slide_mapping(
2549 vm_shared_region_t sr,
2550 user_addr_t slide_info_addr,
2551 mach_vm_size_t slide_info_size,
2552 mach_vm_offset_t start,
2553 mach_vm_size_t size,
2554 mach_vm_offset_t slid_mapping,
2555 uint32_t slide,
2556 memory_object_control_t sr_file_control,
2557 vm_prot_t prot)
2558 {
2559 kern_return_t kr;
2560 vm_object_t object = VM_OBJECT_NULL;
2561 vm_shared_region_slide_info_t si = NULL;
2562 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2563 struct vm_map_entry tmp_entry_store;
2564 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2565 vm_map_t sr_map;
2566 vm_map_kernel_flags_t vmk_flags;
2567 vm_map_offset_t map_addr;
2568 void *slide_info_entry = NULL;
2569 int error;
2570
2571 assert(sr->sr_slide_in_progress);
2572
2573 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2574 return KERN_INVALID_ARGUMENT;
2575 }
2576
2577 /*
2578 * Copy in and verify the relocation information.
2579 */
2580 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2581 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2582 return KERN_FAILURE;
2583 }
2584 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2585 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2586 return KERN_FAILURE;
2587 }
2588
2589 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2590 if (slide_info_entry == NULL) {
2591 return KERN_RESOURCE_SHORTAGE;
2592 }
2593 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2594 if (error) {
2595 printf("copyin of slide_info failed\n");
2596 kr = KERN_INVALID_ADDRESS;
2597 goto done;
2598 }
2599
2600 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2601 printf("Sanity Check failed for slide_info\n");
2602 goto done;
2603 }
2604
2605 /*
2606 * Allocate and fill in a vm_shared_region_slide_info.
2607 * This will either be used by a new pager, or used to find
2608 * a pre-existing matching pager.
2609 */
2610 object = memory_object_control_to_vm_object(sr_file_control);
2611 if (object == VM_OBJECT_NULL || object->internal) {
2612 object = VM_OBJECT_NULL;
2613 kr = KERN_INVALID_ADDRESS;
2614 goto done;
2615 }
2616
2617 si = kalloc_type(struct vm_shared_region_slide_info,
2618 Z_WAITOK | Z_NOFAIL);
2619 vm_object_lock(object);
2620
2621 vm_object_reference_locked(object); /* for si->slide_object */
2622 object->object_is_shared_cache = TRUE;
2623 vm_object_unlock(object);
2624
2625 si->si_slide_info_entry = slide_info_entry;
2626 si->si_slide_info_size = slide_info_size;
2627
2628 assert(slid_mapping != (mach_vm_offset_t) -1);
2629 si->si_slid_address = slid_mapping + sr->sr_base_address;
2630 si->si_slide_object = object;
2631 si->si_start = start;
2632 si->si_end = si->si_start + size;
2633 si->si_slide = slide;
2634 #if __has_feature(ptrauth_calls)
2635 /*
2636 * If there is authenticated pointer data in this slid mapping,
2637 * then just add the information needed to create new pagers for
2638 * different shared_region_id's later.
2639 */
2640 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2641 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2642 !(prot & VM_PROT_NOAUTH)) {
2643 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2644 printf("Too many auth/private sections for shared region!!\n");
2645 kr = KERN_INVALID_ARGUMENT;
2646 goto done;
2647 }
2648 si->si_ptrauth = TRUE;
2649 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2650 /*
2651 * Remember the shared region, since that's where we'll
2652 * stash this info for all auth pagers to share. Each pager
2653 * will need to take a reference to it.
2654 */
2655 si->si_shared_region = sr;
2656 kr = KERN_SUCCESS;
2657 goto done;
2658 }
2659 si->si_shared_region = NULL;
2660 si->si_ptrauth = FALSE;
2661 #else /* __has_feature(ptrauth_calls) */
2662 (void)prot; /* silence unused warning */
2663 #endif /* __has_feature(ptrauth_calls) */
2664
2665 /*
2666 * find the pre-existing shared region's map entry to slide
2667 */
2668 sr_map = vm_shared_region_vm_map(sr);
2669 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2670 if (kr != KERN_SUCCESS) {
2671 goto done;
2672 }
2673 tmp_entry = &tmp_entry_store;
2674
2675 /*
2676 * The object must exactly cover the region to slide.
2677 */
2678 assert(VME_OFFSET(tmp_entry) == start);
2679 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2680
2681 /* create a "shared_region" sliding pager */
2682 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2683 if (sr_pager == MEMORY_OBJECT_NULL) {
2684 kr = KERN_RESOURCE_SHORTAGE;
2685 goto done;
2686 }
2687
2688 #if CONFIG_SECLUDED_MEMORY
2689 /*
2690 * The shared region pagers used by camera or DEXT should have
2691 * pagers that won't go on the secluded queue.
2692 */
2693 if (primary_system_shared_region == NULL ||
2694 primary_system_shared_region == sr ||
2695 sr->sr_driverkit) {
2696 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2697 }
2698 #endif /* CONFIG_SECLUDED_MEMORY */
2699
2700 /* map that pager over the portion of the mapping that needs sliding */
2701 map_addr = tmp_entry->vme_start;
2702 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2703 vmk_flags.vmkf_overwrite_immutable = true;
2704 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2705 tmp_entry->max_protection);
2706
2707 kr = vm_map_enter_mem_object(sr_map,
2708 &map_addr,
2709 (tmp_entry->vme_end - tmp_entry->vme_start),
2710 (mach_vm_offset_t) 0,
2711 vmk_flags,
2712 (ipc_port_t)(uintptr_t) sr_pager,
2713 0,
2714 TRUE,
2715 tmp_entry->protection,
2716 tmp_entry->max_protection,
2717 tmp_entry->inheritance);
2718 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2719 assertf(map_addr == tmp_entry->vme_start,
2720 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2721 (uint64_t)map_addr,
2722 (uint64_t) tmp_entry->vme_start,
2723 tmp_entry);
2724
2725 /* success! */
2726 kr = KERN_SUCCESS;
2727
2728 done:
2729 if (sr_pager != NULL) {
2730 /*
2731 * Release the sr_pager reference obtained by shared_region_pager_setup().
2732 * The mapping, if it succeeded, is now holding a reference on the memory object.
2733 */
2734 memory_object_deallocate(sr_pager);
2735 sr_pager = MEMORY_OBJECT_NULL;
2736 }
2737 if (tmp_entry != NULL) {
2738 /* release extra ref on tmp_entry's VM object */
2739 vm_object_deallocate(VME_OBJECT(tmp_entry));
2740 tmp_entry = VM_MAP_ENTRY_NULL;
2741 }
2742
2743 if (kr != KERN_SUCCESS) {
2744 /* cleanup */
2745 if (si != NULL) {
2746 if (si->si_slide_object) {
2747 vm_object_deallocate(si->si_slide_object);
2748 si->si_slide_object = VM_OBJECT_NULL;
2749 }
2750 kfree_type(struct vm_shared_region_slide_info, si);
2751 si = NULL;
2752 }
2753 if (slide_info_entry != NULL) {
2754 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2755 slide_info_entry = NULL;
2756 }
2757 }
2758 return kr;
2759 }
2760
2761 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2762 vm_shared_region_slide_sanity_check_v1(
2763 vm_shared_region_slide_info_entry_v1_t s_info)
2764 {
2765 uint32_t pageIndex = 0;
2766 uint16_t entryIndex = 0;
2767 uint16_t *toc = NULL;
2768
2769 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2770 for (; pageIndex < s_info->toc_count; pageIndex++) {
2771 entryIndex = (uint16_t)(toc[pageIndex]);
2772
2773 if (entryIndex >= s_info->entry_count) {
2774 printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2775 return KERN_FAILURE;
2776 }
2777 }
2778 return KERN_SUCCESS;
2779 }
2780
2781 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2782 vm_shared_region_slide_sanity_check_v2(
2783 vm_shared_region_slide_info_entry_v2_t s_info,
2784 mach_vm_size_t slide_info_size)
2785 {
2786 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2787 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2788 return KERN_FAILURE;
2789 }
2790 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2791 return KERN_FAILURE;
2792 }
2793
2794 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2795
2796 uint32_t page_starts_count = s_info->page_starts_count;
2797 uint32_t page_extras_count = s_info->page_extras_count;
2798 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2799 if (num_trailing_entries < page_starts_count) {
2800 return KERN_FAILURE;
2801 }
2802
2803 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2804 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2805 if (trailing_size >> 1 != num_trailing_entries) {
2806 return KERN_FAILURE;
2807 }
2808
2809 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2810 if (required_size < sizeof(*s_info)) {
2811 return KERN_FAILURE;
2812 }
2813
2814 if (required_size > slide_info_size) {
2815 return KERN_FAILURE;
2816 }
2817
2818 return KERN_SUCCESS;
2819 }
2820
2821 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2822 vm_shared_region_slide_sanity_check_v3(
2823 vm_shared_region_slide_info_entry_v3_t s_info,
2824 mach_vm_size_t slide_info_size)
2825 {
2826 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2827 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2828 return KERN_FAILURE;
2829 }
2830 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2831 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2832 return KERN_FAILURE;
2833 }
2834
2835 uint32_t page_starts_count = s_info->page_starts_count;
2836 mach_vm_size_t num_trailing_entries = page_starts_count;
2837 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2838 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2839 if (required_size < sizeof(*s_info)) {
2840 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2841 return KERN_FAILURE;
2842 }
2843
2844 if (required_size > slide_info_size) {
2845 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2846 return KERN_FAILURE;
2847 }
2848
2849 return KERN_SUCCESS;
2850 }
2851
2852 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2853 vm_shared_region_slide_sanity_check_v4(
2854 vm_shared_region_slide_info_entry_v4_t s_info,
2855 mach_vm_size_t slide_info_size)
2856 {
2857 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2858 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2859 return KERN_FAILURE;
2860 }
2861 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2862 return KERN_FAILURE;
2863 }
2864
2865 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2866
2867 uint32_t page_starts_count = s_info->page_starts_count;
2868 uint32_t page_extras_count = s_info->page_extras_count;
2869 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2870 if (num_trailing_entries < page_starts_count) {
2871 return KERN_FAILURE;
2872 }
2873
2874 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2875 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2876 if (trailing_size >> 1 != num_trailing_entries) {
2877 return KERN_FAILURE;
2878 }
2879
2880 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2881 if (required_size < sizeof(*s_info)) {
2882 return KERN_FAILURE;
2883 }
2884
2885 if (required_size > slide_info_size) {
2886 return KERN_FAILURE;
2887 }
2888
2889 return KERN_SUCCESS;
2890 }
2891
2892
2893 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2894 vm_shared_region_slide_sanity_check(
2895 vm_shared_region_slide_info_entry_t s_info,
2896 mach_vm_size_t s_info_size)
2897 {
2898 kern_return_t kr;
2899
2900 switch (s_info->version) {
2901 case 1:
2902 kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2903 break;
2904 case 2:
2905 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2906 break;
2907 case 3:
2908 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2909 break;
2910 case 4:
2911 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2912 break;
2913 default:
2914 kr = KERN_FAILURE;
2915 }
2916 return kr;
2917 }
2918
2919 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2920 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2921 {
2922 uint16_t *toc = NULL;
2923 slide_info_entry_toc_t bitmap = NULL;
2924 uint32_t i = 0, j = 0;
2925 uint8_t b = 0;
2926 uint32_t slide = si->si_slide;
2927 int is_64 = task_has_64Bit_addr(current_task());
2928
2929 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2930 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2931
2932 if (pageIndex >= s_info->toc_count) {
2933 printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2934 } else {
2935 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
2936 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2937
2938 if (entryIndex >= s_info->entry_count) {
2939 printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
2940 } else {
2941 bitmap = &slide_info_entries[entryIndex];
2942
2943 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
2944 b = bitmap->entry[i];
2945 if (b != 0) {
2946 for (j = 0; j < 8; ++j) {
2947 if (b & (1 << j)) {
2948 uint32_t *ptr_to_slide;
2949 uint32_t old_value;
2950
2951 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
2952 old_value = *ptr_to_slide;
2953 *ptr_to_slide += slide;
2954 if (is_64 && *ptr_to_slide < old_value) {
2955 /*
2956 * We just slid the low 32 bits of a 64-bit pointer
2957 * and it looks like there should have been a carry-over
2958 * to the upper 32 bits.
2959 * The sliding failed...
2960 */
2961 printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
2962 i, j, b, slide, old_value, *ptr_to_slide);
2963 return KERN_FAILURE;
2964 }
2965 }
2966 }
2967 }
2968 }
2969 }
2970 }
2971
2972 return KERN_SUCCESS;
2973 }
2974
2975 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2976 rebase_chain_32(
2977 uint8_t *page_content,
2978 uint16_t start_offset,
2979 uint32_t slide_amount,
2980 vm_shared_region_slide_info_entry_v2_t s_info)
2981 {
2982 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2983
2984 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
2985 const uint32_t value_mask = ~delta_mask;
2986 const uint32_t value_add = (uint32_t)(s_info->value_add);
2987 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
2988
2989 uint32_t page_offset = start_offset;
2990 uint32_t delta = 1;
2991
2992 while (delta != 0 && page_offset <= last_page_offset) {
2993 uint8_t *loc;
2994 uint32_t value;
2995
2996 loc = page_content + page_offset;
2997 memcpy(&value, loc, sizeof(value));
2998 delta = (value & delta_mask) >> delta_shift;
2999 value &= value_mask;
3000
3001 if (value != 0) {
3002 value += value_add;
3003 value += slide_amount;
3004 }
3005 memcpy(loc, &value, sizeof(value));
3006 page_offset += delta;
3007 }
3008
3009 /* If the offset went past the end of the page, then the slide data is invalid. */
3010 if (page_offset > last_page_offset) {
3011 return KERN_FAILURE;
3012 }
3013 return KERN_SUCCESS;
3014 }
3015
3016 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3017 rebase_chain_64(
3018 uint8_t *page_content,
3019 uint16_t start_offset,
3020 uint32_t slide_amount,
3021 vm_shared_region_slide_info_entry_v2_t s_info)
3022 {
3023 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3024
3025 const uint64_t delta_mask = s_info->delta_mask;
3026 const uint64_t value_mask = ~delta_mask;
3027 const uint64_t value_add = s_info->value_add;
3028 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3029
3030 uint32_t page_offset = start_offset;
3031 uint32_t delta = 1;
3032
3033 while (delta != 0 && page_offset <= last_page_offset) {
3034 uint8_t *loc;
3035 uint64_t value;
3036
3037 loc = page_content + page_offset;
3038 memcpy(&value, loc, sizeof(value));
3039 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3040 value &= value_mask;
3041
3042 if (value != 0) {
3043 value += value_add;
3044 value += slide_amount;
3045 }
3046 memcpy(loc, &value, sizeof(value));
3047 page_offset += delta;
3048 }
3049
3050 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3051 /* If a pointer straddling the page boundary needs to be adjusted, then
3052 * add the slide to the lower half. The encoding guarantees that the upper
3053 * half on the next page will need no masking.
3054 *
3055 * This assumes a little-endian machine and that the region being slid
3056 * never crosses a 4 GB boundary. */
3057
3058 uint8_t *loc = page_content + page_offset;
3059 uint32_t value;
3060
3061 memcpy(&value, loc, sizeof(value));
3062 value += slide_amount;
3063 memcpy(loc, &value, sizeof(value));
3064 } else if (page_offset > last_page_offset) {
3065 return KERN_FAILURE;
3066 }
3067
3068 return KERN_SUCCESS;
3069 }
3070
3071 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3072 rebase_chain(
3073 boolean_t is_64,
3074 uint32_t pageIndex,
3075 uint8_t *page_content,
3076 uint16_t start_offset,
3077 uint32_t slide_amount,
3078 vm_shared_region_slide_info_entry_v2_t s_info)
3079 {
3080 kern_return_t kr;
3081 if (is_64) {
3082 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3083 } else {
3084 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3085 }
3086
3087 if (kr != KERN_SUCCESS) {
3088 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3089 pageIndex, start_offset, slide_amount);
3090 }
3091 return kr;
3092 }
3093
3094 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3095 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3096 {
3097 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3098 const uint32_t slide_amount = si->si_slide;
3099
3100 /* The high bits of the delta_mask field are nonzero precisely when the shared
3101 * cache is 64-bit. */
3102 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3103
3104 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3105 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3106
3107 uint8_t *page_content = (uint8_t *)vaddr;
3108 uint16_t page_entry;
3109
3110 if (pageIndex >= s_info->page_starts_count) {
3111 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3112 pageIndex, s_info->page_starts_count);
3113 return KERN_FAILURE;
3114 }
3115 page_entry = page_starts[pageIndex];
3116
3117 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3118 return KERN_SUCCESS;
3119 }
3120
3121 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3122 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3123 uint16_t info;
3124
3125 do {
3126 uint16_t page_start_offset;
3127 kern_return_t kr;
3128
3129 if (chain_index >= s_info->page_extras_count) {
3130 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3131 chain_index, s_info->page_extras_count);
3132 return KERN_FAILURE;
3133 }
3134 info = page_extras[chain_index];
3135 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3136
3137 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3138 if (kr != KERN_SUCCESS) {
3139 return KERN_FAILURE;
3140 }
3141
3142 chain_index++;
3143 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3144 } else {
3145 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3146 kern_return_t kr;
3147
3148 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3149 if (kr != KERN_SUCCESS) {
3150 return KERN_FAILURE;
3151 }
3152 }
3153
3154 return KERN_SUCCESS;
3155 }
3156
3157
3158 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3159 vm_shared_region_slide_page_v3(
3160 vm_shared_region_slide_info_t si,
3161 vm_offset_t vaddr,
3162 __unused mach_vm_offset_t uservaddr,
3163 uint32_t pageIndex,
3164 #if !__has_feature(ptrauth_calls)
3165 __unused
3166 #endif /* !__has_feature(ptrauth_calls) */
3167 uint64_t jop_key)
3168 {
3169 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3170 const uint32_t slide_amount = si->si_slide;
3171
3172 uint8_t *page_content = (uint8_t *)vaddr;
3173 uint16_t page_entry;
3174
3175 if (pageIndex >= s_info->page_starts_count) {
3176 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3177 pageIndex, s_info->page_starts_count);
3178 return KERN_FAILURE;
3179 }
3180 page_entry = s_info->page_starts[pageIndex];
3181
3182 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3183 return KERN_SUCCESS;
3184 }
3185
3186 uint8_t* rebaseLocation = page_content;
3187 uint64_t delta = page_entry;
3188 do {
3189 rebaseLocation += delta;
3190 uint64_t value;
3191 memcpy(&value, rebaseLocation, sizeof(value));
3192 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3193
3194 // A pointer is one of :
3195 // {
3196 // uint64_t pointerValue : 51;
3197 // uint64_t offsetToNextPointer : 11;
3198 // uint64_t isBind : 1 = 0;
3199 // uint64_t authenticated : 1 = 0;
3200 // }
3201 // {
3202 // uint32_t offsetFromSharedCacheBase;
3203 // uint16_t diversityData;
3204 // uint16_t hasAddressDiversity : 1;
3205 // uint16_t hasDKey : 1;
3206 // uint16_t hasBKey : 1;
3207 // uint16_t offsetToNextPointer : 11;
3208 // uint16_t isBind : 1;
3209 // uint16_t authenticated : 1 = 1;
3210 // }
3211
3212 bool isBind = (value & (1ULL << 62)) != 0;
3213 if (isBind) {
3214 return KERN_FAILURE;
3215 }
3216
3217 #if __has_feature(ptrauth_calls)
3218 uint16_t diversity_data = (uint16_t)(value >> 32);
3219 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3220 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3221 #endif /* __has_feature(ptrauth_calls) */
3222 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3223
3224 if (isAuthenticated) {
3225 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3226 value = (value & 0xFFFFFFFF) + slide_amount;
3227 // Add in the offset from the mach_header
3228 const uint64_t value_add = s_info->value_add;
3229 value += value_add;
3230
3231 #if __has_feature(ptrauth_calls)
3232 uint64_t discriminator = diversity_data;
3233 if (hasAddressDiversity) {
3234 // First calculate a new discriminator using the address of where we are trying to store the value
3235 uintptr_t pageOffset = rebaseLocation - page_content;
3236 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3237 }
3238
3239 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3240 /*
3241 * these pointers are used in user mode. disable the kernel key diversification
3242 * so we can sign them for use in user mode.
3243 */
3244 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3245 }
3246 #endif /* __has_feature(ptrauth_calls) */
3247 } else {
3248 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3249 // Regular pointer which needs to fit in 51-bits of value.
3250 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3251 // and the bottom 43-bits to be fit in to 51-bits.
3252 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3253 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3254 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3255 value = targetValue + slide_amount;
3256 }
3257
3258 memcpy(rebaseLocation, &value, sizeof(value));
3259 } while (delta != 0);
3260
3261 return KERN_SUCCESS;
3262 }
3263
3264 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3265 rebase_chainv4(
3266 uint8_t *page_content,
3267 uint16_t start_offset,
3268 uint32_t slide_amount,
3269 vm_shared_region_slide_info_entry_v4_t s_info)
3270 {
3271 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3272
3273 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3274 const uint32_t value_mask = ~delta_mask;
3275 const uint32_t value_add = (uint32_t)(s_info->value_add);
3276 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3277
3278 uint32_t page_offset = start_offset;
3279 uint32_t delta = 1;
3280
3281 while (delta != 0 && page_offset <= last_page_offset) {
3282 uint8_t *loc;
3283 uint32_t value;
3284
3285 loc = page_content + page_offset;
3286 memcpy(&value, loc, sizeof(value));
3287 delta = (value & delta_mask) >> delta_shift;
3288 value &= value_mask;
3289
3290 if ((value & 0xFFFF8000) == 0) {
3291 // small positive non-pointer, use as-is
3292 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3293 // small negative non-pointer
3294 value |= 0xC0000000;
3295 } else {
3296 // pointer that needs rebasing
3297 value += value_add;
3298 value += slide_amount;
3299 }
3300 memcpy(loc, &value, sizeof(value));
3301 page_offset += delta;
3302 }
3303
3304 /* If the offset went past the end of the page, then the slide data is invalid. */
3305 if (page_offset > last_page_offset) {
3306 return KERN_FAILURE;
3307 }
3308 return KERN_SUCCESS;
3309 }
3310
3311 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3312 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3313 {
3314 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3315 const uint32_t slide_amount = si->si_slide;
3316
3317 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3318 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3319
3320 uint8_t *page_content = (uint8_t *)vaddr;
3321 uint16_t page_entry;
3322
3323 if (pageIndex >= s_info->page_starts_count) {
3324 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3325 pageIndex, s_info->page_starts_count);
3326 return KERN_FAILURE;
3327 }
3328 page_entry = page_starts[pageIndex];
3329
3330 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3331 return KERN_SUCCESS;
3332 }
3333
3334 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3335 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3336 uint16_t info;
3337
3338 do {
3339 uint16_t page_start_offset;
3340 kern_return_t kr;
3341
3342 if (chain_index >= s_info->page_extras_count) {
3343 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3344 chain_index, s_info->page_extras_count);
3345 return KERN_FAILURE;
3346 }
3347 info = page_extras[chain_index];
3348 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3349
3350 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3351 if (kr != KERN_SUCCESS) {
3352 return KERN_FAILURE;
3353 }
3354
3355 chain_index++;
3356 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3357 } else {
3358 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3359 kern_return_t kr;
3360
3361 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3362 if (kr != KERN_SUCCESS) {
3363 return KERN_FAILURE;
3364 }
3365 }
3366
3367 return KERN_SUCCESS;
3368 }
3369
3370
3371
3372 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3373 vm_shared_region_slide_page(
3374 vm_shared_region_slide_info_t si,
3375 vm_offset_t vaddr,
3376 mach_vm_offset_t uservaddr,
3377 uint32_t pageIndex,
3378 uint64_t jop_key)
3379 {
3380 switch (si->si_slide_info_entry->version) {
3381 case 1:
3382 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3383 case 2:
3384 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3385 case 3:
3386 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3387 case 4:
3388 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3389 default:
3390 return KERN_FAILURE;
3391 }
3392 }
3393
3394 /******************************************************************************/
3395 /* Comm page support */
3396 /******************************************************************************/
3397
3398 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3399 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3400 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3401 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3402 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3403 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3404
3405 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3406 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3407 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3408 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3409 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3410 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3411
3412 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3413 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3414
3415 #if defined(__i386__) || defined(__x86_64__)
3416 /*
3417 * Create a memory entry, VM submap and pmap for one commpage.
3418 */
3419 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3420 _vm_commpage_init(
3421 ipc_port_t *handlep,
3422 vm_map_size_t size)
3423 {
3424 vm_named_entry_t mem_entry;
3425 vm_map_t new_map;
3426
3427 SHARED_REGION_TRACE_DEBUG(
3428 ("commpage: -> _init(0x%llx)\n",
3429 (long long)size));
3430
3431 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3432 if (new_pmap == NULL) {
3433 panic("_vm_commpage_init: could not allocate pmap");
3434 }
3435 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3436
3437 mem_entry = mach_memory_entry_allocate(handlep);
3438 mem_entry->backing.map = new_map;
3439 mem_entry->internal = TRUE;
3440 mem_entry->is_sub_map = TRUE;
3441 mem_entry->offset = 0;
3442 mem_entry->protection = VM_PROT_ALL;
3443 mem_entry->size = size;
3444
3445 SHARED_REGION_TRACE_DEBUG(
3446 ("commpage: _init(0x%llx) <- %p\n",
3447 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3448 }
3449 #endif
3450
3451
3452 /*
3453 * Initialize the comm text pages at boot time
3454 */
3455 void
vm_commpage_text_init(void)3456 vm_commpage_text_init(void)
3457 {
3458 SHARED_REGION_TRACE_DEBUG(
3459 ("commpage text: ->init()\n"));
3460 #if defined(__i386__) || defined(__x86_64__)
3461 /* create the 32 bit comm text page */
3462 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3463 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3464 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3465 commpage_text32_map = commpage_text32_entry->backing.map;
3466 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3467 /* XXX if (cpu_is_64bit_capable()) ? */
3468 /* create the 64-bit comm page */
3469 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3470 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3471 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3472 commpage_text64_map = commpage_text64_entry->backing.map;
3473 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3474 #endif
3475
3476 commpage_text_populate();
3477
3478 /* populate the routines in here */
3479 SHARED_REGION_TRACE_DEBUG(
3480 ("commpage text: init() <-\n"));
3481 }
3482
3483 /*
3484 * Initialize the comm pages at boot time.
3485 */
3486 void
vm_commpage_init(void)3487 vm_commpage_init(void)
3488 {
3489 SHARED_REGION_TRACE_DEBUG(
3490 ("commpage: -> init()\n"));
3491
3492 #if defined(__i386__) || defined(__x86_64__)
3493 /* create the 32-bit comm page */
3494 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3495 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3496 commpage32_map = commpage32_entry->backing.map;
3497
3498 /* XXX if (cpu_is_64bit_capable()) ? */
3499 /* create the 64-bit comm page */
3500 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3501 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3502 commpage64_map = commpage64_entry->backing.map;
3503
3504 #endif /* __i386__ || __x86_64__ */
3505
3506 /* populate them according to this specific platform */
3507 commpage_populate();
3508 __commpage_setup = 1;
3509 #if XNU_TARGET_OS_OSX
3510 if (__system_power_source == 0) {
3511 post_sys_powersource_internal(0, 1);
3512 }
3513 #endif /* XNU_TARGET_OS_OSX */
3514
3515 SHARED_REGION_TRACE_DEBUG(
3516 ("commpage: init() <-\n"));
3517 }
3518
3519 /*
3520 * Enter the appropriate comm page into the task's address space.
3521 * This is called at exec() time via vm_map_exec().
3522 */
3523 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3524 vm_commpage_enter(
3525 vm_map_t map,
3526 task_t task,
3527 boolean_t is64bit)
3528 {
3529 #if defined(__arm64__)
3530 #pragma unused(is64bit)
3531 (void)task;
3532 (void)map;
3533 pmap_insert_commpage(vm_map_pmap(map));
3534 return KERN_SUCCESS;
3535 #else
3536 ipc_port_t commpage_handle, commpage_text_handle;
3537 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3538 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3539 vm_map_kernel_flags_t vmk_flags;
3540 kern_return_t kr;
3541
3542 SHARED_REGION_TRACE_DEBUG(
3543 ("commpage: -> enter(%p,%p)\n",
3544 (void *)VM_KERNEL_ADDRPERM(map),
3545 (void *)VM_KERNEL_ADDRPERM(task)));
3546
3547 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3548 /* the comm page is likely to be beyond the actual end of the VM map */
3549 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3550 vmk_flags.vmkf_beyond_max = TRUE;
3551
3552 /* select the appropriate comm page for this task */
3553 assert(!(is64bit ^ vm_map_is_64bit(map)));
3554 if (is64bit) {
3555 commpage_handle = commpage64_handle;
3556 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3557 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3558 objc_size = _COMM_PAGE64_OBJC_SIZE;
3559 objc_address = _COMM_PAGE64_OBJC_BASE;
3560 commpage_text_handle = commpage_text64_handle;
3561 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3562 } else {
3563 commpage_handle = commpage32_handle;
3564 commpage_address =
3565 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3566 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3567 objc_size = _COMM_PAGE32_OBJC_SIZE;
3568 objc_address = _COMM_PAGE32_OBJC_BASE;
3569 commpage_text_handle = commpage_text32_handle;
3570 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3571 }
3572
3573 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3574 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3575 /* the commpage is properly aligned or sized for pmap-nesting */
3576 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3577 vmk_flags.vmkf_nested_pmap = TRUE;
3578 }
3579
3580 /* map the comm page in the task's address space */
3581 assert(commpage_handle != IPC_PORT_NULL);
3582 kr = vm_map_enter_mem_object(
3583 map,
3584 &commpage_address,
3585 commpage_size,
3586 0,
3587 vmk_flags,
3588 commpage_handle,
3589 0,
3590 FALSE,
3591 VM_PROT_READ,
3592 VM_PROT_READ,
3593 VM_INHERIT_SHARE);
3594 if (kr != KERN_SUCCESS) {
3595 SHARED_REGION_TRACE_ERROR(
3596 ("commpage: enter(%p,0x%llx,0x%llx) "
3597 "commpage %p mapping failed 0x%x\n",
3598 (void *)VM_KERNEL_ADDRPERM(map),
3599 (long long)commpage_address,
3600 (long long)commpage_size,
3601 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3602 }
3603
3604 /* map the comm text page in the task's address space */
3605 assert(commpage_text_handle != IPC_PORT_NULL);
3606 kr = vm_map_enter_mem_object(
3607 map,
3608 &commpage_text_address,
3609 commpage_text_size,
3610 0,
3611 vmk_flags,
3612 commpage_text_handle,
3613 0,
3614 FALSE,
3615 VM_PROT_READ | VM_PROT_EXECUTE,
3616 VM_PROT_READ | VM_PROT_EXECUTE,
3617 VM_INHERIT_SHARE);
3618 if (kr != KERN_SUCCESS) {
3619 SHARED_REGION_TRACE_ERROR(
3620 ("commpage text: enter(%p,0x%llx,0x%llx) "
3621 "commpage text %p mapping failed 0x%x\n",
3622 (void *)VM_KERNEL_ADDRPERM(map),
3623 (long long)commpage_text_address,
3624 (long long)commpage_text_size,
3625 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3626 }
3627
3628 /*
3629 * Since we're here, we also pre-allocate some virtual space for the
3630 * Objective-C run-time, if needed...
3631 */
3632 if (objc_size != 0) {
3633 kr = vm_map_enter_mem_object(
3634 map,
3635 &objc_address,
3636 objc_size,
3637 0,
3638 vmk_flags,
3639 IPC_PORT_NULL,
3640 0,
3641 FALSE,
3642 VM_PROT_ALL,
3643 VM_PROT_ALL,
3644 VM_INHERIT_DEFAULT);
3645 if (kr != KERN_SUCCESS) {
3646 SHARED_REGION_TRACE_ERROR(
3647 ("commpage: enter(%p,0x%llx,0x%llx) "
3648 "objc mapping failed 0x%x\n",
3649 (void *)VM_KERNEL_ADDRPERM(map),
3650 (long long)objc_address,
3651 (long long)objc_size, kr));
3652 }
3653 }
3654
3655 SHARED_REGION_TRACE_DEBUG(
3656 ("commpage: enter(%p,%p) <- 0x%x\n",
3657 (void *)VM_KERNEL_ADDRPERM(map),
3658 (void *)VM_KERNEL_ADDRPERM(task), kr));
3659 return kr;
3660 #endif
3661 }
3662
3663 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3664 vm_shared_region_slide(
3665 uint32_t slide,
3666 mach_vm_offset_t entry_start_address,
3667 mach_vm_size_t entry_size,
3668 mach_vm_offset_t slide_start,
3669 mach_vm_size_t slide_size,
3670 mach_vm_offset_t slid_mapping,
3671 memory_object_control_t sr_file_control,
3672 vm_prot_t prot)
3673 {
3674 vm_shared_region_t sr;
3675 kern_return_t error;
3676
3677 SHARED_REGION_TRACE_DEBUG(
3678 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3679 slide, entry_start_address, entry_size, slide_start, slide_size));
3680
3681 sr = vm_shared_region_get(current_task());
3682 if (sr == NULL) {
3683 printf("%s: no shared region?\n", __FUNCTION__);
3684 SHARED_REGION_TRACE_DEBUG(
3685 ("vm_shared_region_slide: <- %d (no shared region)\n",
3686 KERN_FAILURE));
3687 return KERN_FAILURE;
3688 }
3689
3690 /*
3691 * Protect from concurrent access.
3692 */
3693 vm_shared_region_lock();
3694 while (sr->sr_slide_in_progress) {
3695 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3696 }
3697
3698 sr->sr_slide_in_progress = TRUE;
3699 vm_shared_region_unlock();
3700
3701 error = vm_shared_region_slide_mapping(sr,
3702 (user_addr_t)slide_start,
3703 slide_size,
3704 entry_start_address,
3705 entry_size,
3706 slid_mapping,
3707 slide,
3708 sr_file_control,
3709 prot);
3710 if (error) {
3711 printf("slide_info initialization failed with kr=%d\n", error);
3712 }
3713
3714 vm_shared_region_lock();
3715
3716 assert(sr->sr_slide_in_progress);
3717 sr->sr_slide_in_progress = FALSE;
3718 thread_wakeup(&sr->sr_slide_in_progress);
3719
3720 #if XNU_TARGET_OS_OSX
3721 if (error == KERN_SUCCESS) {
3722 shared_region_completed_slide = TRUE;
3723 }
3724 #endif /* XNU_TARGET_OS_OSX */
3725 vm_shared_region_unlock();
3726
3727 vm_shared_region_deallocate(sr);
3728
3729 SHARED_REGION_TRACE_DEBUG(
3730 ("vm_shared_region_slide: <- %d\n",
3731 error));
3732
3733 return error;
3734 }
3735
3736 /*
3737 * Used during Authenticated Root Volume macOS boot.
3738 * Launchd re-execs itself and wants the new launchd to use
3739 * the shared cache from the new root volume. This call
3740 * makes all the existing shared caches stale to allow
3741 * that to happen.
3742 */
3743 void
vm_shared_region_pivot(void)3744 vm_shared_region_pivot(void)
3745 {
3746 vm_shared_region_t shared_region = NULL;
3747
3748 vm_shared_region_lock();
3749
3750 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3751 assert(shared_region->sr_ref_count > 0);
3752 shared_region->sr_stale = TRUE;
3753 if (shared_region->sr_timer_call) {
3754 /*
3755 * We have a shared region ready to be destroyed
3756 * and just waiting for a delayed timer to fire.
3757 * Marking it stale cements its ineligibility to
3758 * be used ever again. So let's shorten the timer
3759 * aggressively down to 10 milliseconds and get rid of it.
3760 * This is a single quantum and we don't need to go
3761 * shorter than this duration. We want it to be short
3762 * enough, however, because we could have an unmount
3763 * of the volume hosting this shared region just behind
3764 * us.
3765 */
3766 uint64_t deadline;
3767 assert(shared_region->sr_ref_count == 1);
3768
3769 /*
3770 * Free the old timer call. Returns with a reference held.
3771 * If the old timer has fired and is waiting for the vm_shared_region_lock
3772 * lock, we will just return with an additional ref_count i.e. 2.
3773 * The old timer will then fire and just drop the ref count down to 1
3774 * with no other modifications.
3775 */
3776 vm_shared_region_reference_locked(shared_region);
3777
3778 /* set up the timer. Keep the reference from above for this timer.*/
3779 shared_region->sr_timer_call = thread_call_allocate(
3780 (thread_call_func_t) vm_shared_region_timeout,
3781 (thread_call_param_t) shared_region);
3782
3783 /* schedule the timer */
3784 clock_interval_to_deadline(10, /* 10 milliseconds */
3785 NSEC_PER_MSEC,
3786 &deadline);
3787 thread_call_enter_delayed(shared_region->sr_timer_call,
3788 deadline);
3789
3790 SHARED_REGION_TRACE_DEBUG(
3791 ("shared_region: pivot(%p): armed timer\n",
3792 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3793 }
3794 }
3795
3796 vm_shared_region_unlock();
3797 }
3798
3799 /*
3800 * Routine to mark any non-standard slide shared cache region as stale.
3801 * This causes the next "reslide" spawn to create a new shared region.
3802 */
3803 void
vm_shared_region_reslide_stale(boolean_t driverkit)3804 vm_shared_region_reslide_stale(boolean_t driverkit)
3805 {
3806 #if __has_feature(ptrauth_calls)
3807 vm_shared_region_t shared_region = NULL;
3808
3809 vm_shared_region_lock();
3810
3811 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3812 assert(shared_region->sr_ref_count > 0);
3813 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3814 shared_region->sr_stale = TRUE;
3815 vm_shared_region_reslide_count++;
3816 }
3817 }
3818
3819 vm_shared_region_unlock();
3820 #else
3821 (void)driverkit;
3822 #endif /* __has_feature(ptrauth_calls) */
3823 }
3824
3825 /*
3826 * report if the task is using a reslide shared cache region.
3827 */
3828 bool
vm_shared_region_is_reslide(__unused struct task * task)3829 vm_shared_region_is_reslide(__unused struct task *task)
3830 {
3831 bool is_reslide = FALSE;
3832 #if __has_feature(ptrauth_calls)
3833 vm_shared_region_t sr = vm_shared_region_get(task);
3834
3835 if (sr != NULL) {
3836 is_reslide = sr->sr_reslide;
3837 vm_shared_region_deallocate(sr);
3838 }
3839 #endif /* __has_feature(ptrauth_calls) */
3840 return is_reslide;
3841 }
3842
3843 /*
3844 * This is called from powermanagement code to let kernel know the current source of power.
3845 * 0 if it is external source (connected to power )
3846 * 1 if it is internal power source ie battery
3847 */
3848 void
3849 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3850 post_sys_powersource(int i)
3851 #else /* XNU_TARGET_OS_OSX */
3852 post_sys_powersource(__unused int i)
3853 #endif /* XNU_TARGET_OS_OSX */
3854 {
3855 #if XNU_TARGET_OS_OSX
3856 post_sys_powersource_internal(i, 0);
3857 #endif /* XNU_TARGET_OS_OSX */
3858 }
3859
3860
3861 #if XNU_TARGET_OS_OSX
3862 static void
post_sys_powersource_internal(int i,int internal)3863 post_sys_powersource_internal(int i, int internal)
3864 {
3865 if (internal == 0) {
3866 __system_power_source = i;
3867 }
3868 }
3869 #endif /* XNU_TARGET_OS_OSX */
3870
3871 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3872 vm_shared_region_root_dir(
3873 struct vm_shared_region *sr)
3874 {
3875 void *vnode;
3876
3877 vm_shared_region_lock();
3878 vnode = sr->sr_root_dir;
3879 vm_shared_region_unlock();
3880 return vnode;
3881 }
3882