1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109
110 #include <vm/vm_map.h>
111 #include <vm/vm_map_internal.h>
112 #include <vm/vm_shared_region.h>
113
114 #include <vm/vm_protos.h>
115
116 #include <machine/commpage.h>
117 #include <machine/cpu_capabilities.h>
118 #include <sys/random.h>
119 #include <sys/errno.h>
120
121 #if defined(__arm64__)
122 #include <arm/cpu_data_internal.h>
123 #include <arm/misc_protos.h>
124 #endif
125
126 /*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132 #if __has_feature(ptrauth_calls)
133 #include <ptrauth.h>
134 #endif /* __has_feature(ptrauth_calls) */
135
136 /* "dyld" uses this to figure out what the kernel supports */
137 int shared_region_version = 3;
138
139 /* trace level, output is sent to the system log file */
140 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142 /* should local (non-chroot) shared regions persist when no task uses them ? */
143 int shared_region_persistence = 0; /* no by default */
144
145
146 /* delay in seconds before reclaiming an unused shared region */
147 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149 /*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156 struct vm_shared_region *primary_system_shared_region = NULL;
157
158 #if XNU_TARGET_OS_OSX
159 /*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164 boolean_t shared_region_completed_slide = FALSE;
165 #endif /* XNU_TARGET_OS_OSX */
166
167 /* this lock protects all the shared region data structures */
168 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173 #define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 (interruptible))
178
179 /* the list of currently available shared regions (one per environment) */
180 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
181 int vm_shared_region_count = 0;
182 int vm_shared_region_peak = 0;
183 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
184
185 /*
186 * the number of times an event has forced the recalculation of the reslide
187 * shared region slide.
188 */
189 #if __has_feature(ptrauth_calls)
190 int vm_shared_region_reslide_count = 0;
191 #endif /* __has_feature(ptrauth_calls) */
192
193 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
194 static vm_shared_region_t vm_shared_region_create(
195 void *root_dir,
196 cpu_type_t cputype,
197 cpu_subtype_t cpu_subtype,
198 boolean_t is_64bit,
199 int target_page_shift,
200 boolean_t reslide,
201 boolean_t is_driverkit,
202 uint32_t rsr_version);
203 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
204
205 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
206 static void vm_shared_region_timeout(thread_call_param_t param0,
207 thread_call_param_t param1);
208 static kern_return_t vm_shared_region_slide_mapping(
209 vm_shared_region_t sr,
210 user_addr_t slide_info_addr,
211 mach_vm_size_t slide_info_size,
212 mach_vm_offset_t start,
213 mach_vm_size_t size,
214 mach_vm_offset_t slid_mapping,
215 uint32_t slide,
216 memory_object_control_t,
217 vm_prot_t prot); /* forward */
218
219 static int __commpage_setup = 0;
220 #if XNU_TARGET_OS_OSX
221 static int __system_power_source = 1; /* init to extrnal power source */
222 static void post_sys_powersource_internal(int i, int internal);
223 #endif /* XNU_TARGET_OS_OSX */
224
225 extern u_int32_t random(void);
226
227 /*
228 * Retrieve a task's shared region and grab an extra reference to
229 * make sure it doesn't disappear while the caller is using it.
230 * The caller is responsible for consuming that extra reference if
231 * necessary.
232 */
233 vm_shared_region_t
vm_shared_region_get(task_t task)234 vm_shared_region_get(
235 task_t task)
236 {
237 vm_shared_region_t shared_region;
238
239 SHARED_REGION_TRACE_DEBUG(
240 ("shared_region: -> get(%p)\n",
241 (void *)VM_KERNEL_ADDRPERM(task)));
242
243 task_lock(task);
244 vm_shared_region_lock();
245 shared_region = task->shared_region;
246 if (shared_region) {
247 assert(shared_region->sr_ref_count > 0);
248 vm_shared_region_reference_locked(shared_region);
249 }
250 vm_shared_region_unlock();
251 task_unlock(task);
252
253 SHARED_REGION_TRACE_DEBUG(
254 ("shared_region: get(%p) <- %p\n",
255 (void *)VM_KERNEL_ADDRPERM(task),
256 (void *)VM_KERNEL_ADDRPERM(shared_region)));
257
258 return shared_region;
259 }
260
261 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)262 vm_shared_region_vm_map(
263 vm_shared_region_t shared_region)
264 {
265 ipc_port_t sr_handle;
266 vm_named_entry_t sr_mem_entry;
267 vm_map_t sr_map;
268
269 SHARED_REGION_TRACE_DEBUG(
270 ("shared_region: -> vm_map(%p)\n",
271 (void *)VM_KERNEL_ADDRPERM(shared_region)));
272 assert(shared_region->sr_ref_count > 0);
273
274 sr_handle = shared_region->sr_mem_entry;
275 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
276 sr_map = sr_mem_entry->backing.map;
277 assert(sr_mem_entry->is_sub_map);
278
279 SHARED_REGION_TRACE_DEBUG(
280 ("shared_region: vm_map(%p) <- %p\n",
281 (void *)VM_KERNEL_ADDRPERM(shared_region),
282 (void *)VM_KERNEL_ADDRPERM(sr_map)));
283 return sr_map;
284 }
285
286 /*
287 * Set the shared region the process should use.
288 * A NULL new shared region means that we just want to release the old
289 * shared region.
290 * The caller should already have an extra reference on the new shared region
291 * (if any). We release a reference on the old shared region (if any).
292 */
293 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)294 vm_shared_region_set(
295 task_t task,
296 vm_shared_region_t new_shared_region)
297 {
298 vm_shared_region_t old_shared_region;
299
300 SHARED_REGION_TRACE_DEBUG(
301 ("shared_region: -> set(%p, %p)\n",
302 (void *)VM_KERNEL_ADDRPERM(task),
303 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
304
305 task_lock(task);
306 vm_shared_region_lock();
307
308 old_shared_region = task->shared_region;
309 if (new_shared_region) {
310 assert(new_shared_region->sr_ref_count > 0);
311 }
312
313 task->shared_region = new_shared_region;
314
315 vm_shared_region_unlock();
316 task_unlock(task);
317
318 if (old_shared_region) {
319 assert(old_shared_region->sr_ref_count > 0);
320 vm_shared_region_deallocate(old_shared_region);
321 }
322
323 SHARED_REGION_TRACE_DEBUG(
324 ("shared_region: set(%p) <- old=%p new=%p\n",
325 (void *)VM_KERNEL_ADDRPERM(task),
326 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
327 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
328 }
329
330 /*
331 * New arm64 shared regions match with an existing arm64e region.
332 * They just get a private non-authenticating pager.
333 */
334 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)335 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
336 {
337 if (exist == new) {
338 return true;
339 }
340 if (cputype == CPU_TYPE_ARM64 &&
341 exist == CPU_SUBTYPE_ARM64E &&
342 new == CPU_SUBTYPE_ARM64_ALL) {
343 return true;
344 }
345 return false;
346 }
347
348
349 /*
350 * Lookup up the shared region for the desired environment.
351 * If none is found, create a new (empty) one.
352 * Grab an extra reference on the returned shared region, to make sure
353 * it doesn't get destroyed before the caller is done with it. The caller
354 * is responsible for consuming that extra reference if necessary.
355 */
356 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)357 vm_shared_region_lookup(
358 void *root_dir,
359 cpu_type_t cputype,
360 cpu_subtype_t cpu_subtype,
361 boolean_t is_64bit,
362 int target_page_shift,
363 boolean_t reslide,
364 boolean_t is_driverkit,
365 uint32_t rsr_version)
366 {
367 vm_shared_region_t shared_region;
368 vm_shared_region_t new_shared_region;
369
370 SHARED_REGION_TRACE_DEBUG(
371 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
372 (void *)VM_KERNEL_ADDRPERM(root_dir),
373 cputype, cpu_subtype, is_64bit, target_page_shift,
374 reslide, is_driverkit));
375
376 shared_region = NULL;
377 new_shared_region = NULL;
378
379 vm_shared_region_lock();
380 for (;;) {
381 queue_iterate(&vm_shared_region_queue,
382 shared_region,
383 vm_shared_region_t,
384 sr_q) {
385 assert(shared_region->sr_ref_count > 0);
386 if (shared_region->sr_cpu_type == cputype &&
387 match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
388 shared_region->sr_root_dir == root_dir &&
389 shared_region->sr_64bit == is_64bit &&
390 #if __ARM_MIXED_PAGE_SIZE__
391 shared_region->sr_page_shift == target_page_shift &&
392 #endif /* __ARM_MIXED_PAGE_SIZE__ */
393 #if __has_feature(ptrauth_calls)
394 shared_region->sr_reslide == reslide &&
395 #endif /* __has_feature(ptrauth_calls) */
396 shared_region->sr_driverkit == is_driverkit &&
397 shared_region->sr_rsr_version == rsr_version &&
398 !shared_region->sr_stale) {
399 /* found a match ! */
400 vm_shared_region_reference_locked(shared_region);
401 goto done;
402 }
403 }
404 if (new_shared_region == NULL) {
405 /* no match: create a new one */
406 vm_shared_region_unlock();
407 new_shared_region = vm_shared_region_create(root_dir,
408 cputype,
409 cpu_subtype,
410 is_64bit,
411 target_page_shift,
412 reslide,
413 is_driverkit,
414 rsr_version);
415 /* do the lookup again, in case we lost a race */
416 vm_shared_region_lock();
417 continue;
418 }
419 /* still no match: use our new one */
420 shared_region = new_shared_region;
421 new_shared_region = NULL;
422 uint32_t newid = ++vm_shared_region_lastid;
423 if (newid == 0) {
424 panic("shared_region: vm_shared_region_lastid wrapped");
425 }
426 shared_region->sr_id = newid;
427 shared_region->sr_install_time = mach_absolute_time();
428 queue_enter(&vm_shared_region_queue,
429 shared_region,
430 vm_shared_region_t,
431 sr_q);
432 vm_shared_region_count++;
433 if (vm_shared_region_count > vm_shared_region_peak) {
434 vm_shared_region_peak = vm_shared_region_count;
435 }
436 break;
437 }
438
439 done:
440 vm_shared_region_unlock();
441
442 if (new_shared_region) {
443 /*
444 * We lost a race with someone else to create a new shared
445 * region for that environment. Get rid of our unused one.
446 */
447 assert(new_shared_region->sr_ref_count == 1);
448 new_shared_region->sr_ref_count--;
449 vm_shared_region_destroy(new_shared_region);
450 new_shared_region = NULL;
451 }
452
453 SHARED_REGION_TRACE_DEBUG(
454 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
455 (void *)VM_KERNEL_ADDRPERM(root_dir),
456 cputype, cpu_subtype, is_64bit, target_page_shift,
457 reslide, is_driverkit,
458 (void *)VM_KERNEL_ADDRPERM(shared_region)));
459
460 assert(shared_region->sr_ref_count > 0);
461 return shared_region;
462 }
463
464 /*
465 * Take an extra reference on a shared region.
466 * The vm_shared_region_lock should already be held by the caller.
467 */
468 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)469 vm_shared_region_reference_locked(
470 vm_shared_region_t shared_region)
471 {
472 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
473
474 SHARED_REGION_TRACE_DEBUG(
475 ("shared_region: -> reference_locked(%p)\n",
476 (void *)VM_KERNEL_ADDRPERM(shared_region)));
477 assert(shared_region->sr_ref_count > 0);
478 shared_region->sr_ref_count++;
479 assert(shared_region->sr_ref_count != 0);
480
481 if (shared_region->sr_timer_call != NULL) {
482 boolean_t cancelled;
483
484 /* cancel and free any pending timeout */
485 cancelled = thread_call_cancel(shared_region->sr_timer_call);
486 if (cancelled) {
487 thread_call_free(shared_region->sr_timer_call);
488 shared_region->sr_timer_call = NULL;
489 /* release the reference held by the cancelled timer */
490 shared_region->sr_ref_count--;
491 } else {
492 /* the timer will drop the reference and free itself */
493 }
494 }
495
496 SHARED_REGION_TRACE_DEBUG(
497 ("shared_region: reference_locked(%p) <- %d\n",
498 (void *)VM_KERNEL_ADDRPERM(shared_region),
499 shared_region->sr_ref_count));
500 }
501
502 /*
503 * Take a reference on a shared region.
504 */
505 void
vm_shared_region_reference(vm_shared_region_t shared_region)506 vm_shared_region_reference(vm_shared_region_t shared_region)
507 {
508 SHARED_REGION_TRACE_DEBUG(
509 ("shared_region: -> reference(%p)\n",
510 (void *)VM_KERNEL_ADDRPERM(shared_region)));
511
512 vm_shared_region_lock();
513 vm_shared_region_reference_locked(shared_region);
514 vm_shared_region_unlock();
515
516 SHARED_REGION_TRACE_DEBUG(
517 ("shared_region: reference(%p) <- %d\n",
518 (void *)VM_KERNEL_ADDRPERM(shared_region),
519 shared_region->sr_ref_count));
520 }
521
522 /*
523 * Release a reference on the shared region.
524 * Destroy it if there are no references left.
525 */
526 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)527 vm_shared_region_deallocate(
528 vm_shared_region_t shared_region)
529 {
530 SHARED_REGION_TRACE_DEBUG(
531 ("shared_region: -> deallocate(%p)\n",
532 (void *)VM_KERNEL_ADDRPERM(shared_region)));
533
534 vm_shared_region_lock();
535
536 assert(shared_region->sr_ref_count > 0);
537
538 if (shared_region->sr_root_dir == NULL) {
539 /*
540 * Local (i.e. based on the boot volume) shared regions
541 * can persist or not based on the "shared_region_persistence"
542 * sysctl.
543 * Make sure that this one complies.
544 *
545 * See comments in vm_shared_region_slide() for notes about
546 * shared regions we have slid (which are not torn down currently).
547 */
548 if (shared_region_persistence &&
549 !shared_region->sr_persists) {
550 /* make this one persistent */
551 shared_region->sr_ref_count++;
552 shared_region->sr_persists = TRUE;
553 } else if (!shared_region_persistence &&
554 shared_region->sr_persists) {
555 /* make this one no longer persistent */
556 assert(shared_region->sr_ref_count > 1);
557 shared_region->sr_ref_count--;
558 shared_region->sr_persists = FALSE;
559 }
560 }
561
562 assert(shared_region->sr_ref_count > 0);
563 shared_region->sr_ref_count--;
564 SHARED_REGION_TRACE_DEBUG(
565 ("shared_region: deallocate(%p): ref now %d\n",
566 (void *)VM_KERNEL_ADDRPERM(shared_region),
567 shared_region->sr_ref_count));
568
569 if (shared_region->sr_ref_count == 0) {
570 uint64_t deadline;
571
572 /*
573 * Even though a shared region is unused, delay a while before
574 * tearing it down, in case a new app launch can use it.
575 * We don't keep around stale shared regions, nor older RSR ones.
576 */
577 if (shared_region->sr_timer_call == NULL &&
578 shared_region_destroy_delay != 0 &&
579 !shared_region->sr_stale &&
580 !(shared_region->sr_rsr_version != 0 &&
581 shared_region->sr_rsr_version != rsr_get_version())) {
582 /* hold one reference for the timer */
583 assert(!shared_region->sr_mapping_in_progress);
584 shared_region->sr_ref_count++;
585
586 /* set up the timer */
587 shared_region->sr_timer_call = thread_call_allocate(
588 (thread_call_func_t) vm_shared_region_timeout,
589 (thread_call_param_t) shared_region);
590
591 /* schedule the timer */
592 clock_interval_to_deadline(shared_region_destroy_delay,
593 NSEC_PER_SEC,
594 &deadline);
595 thread_call_enter_delayed(shared_region->sr_timer_call,
596 deadline);
597
598 SHARED_REGION_TRACE_DEBUG(
599 ("shared_region: deallocate(%p): armed timer\n",
600 (void *)VM_KERNEL_ADDRPERM(shared_region)));
601
602 vm_shared_region_unlock();
603 } else {
604 /* timer expired: let go of this shared region */
605
606 /* Make sure there's no cached pointer to the region. */
607 if (primary_system_shared_region == shared_region) {
608 primary_system_shared_region = NULL;
609 }
610
611 /*
612 * Remove it from the queue first, so no one can find
613 * it...
614 */
615 queue_remove(&vm_shared_region_queue,
616 shared_region,
617 vm_shared_region_t,
618 sr_q);
619 vm_shared_region_count--;
620 vm_shared_region_unlock();
621
622 /* ... and destroy it */
623 vm_shared_region_destroy(shared_region);
624 shared_region = NULL;
625 }
626 } else {
627 vm_shared_region_unlock();
628 }
629
630 SHARED_REGION_TRACE_DEBUG(
631 ("shared_region: deallocate(%p) <-\n",
632 (void *)VM_KERNEL_ADDRPERM(shared_region)));
633 }
634
635 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)636 vm_shared_region_timeout(
637 thread_call_param_t param0,
638 __unused thread_call_param_t param1)
639 {
640 vm_shared_region_t shared_region;
641
642 shared_region = (vm_shared_region_t) param0;
643
644 vm_shared_region_deallocate(shared_region);
645 }
646
647
648 /*
649 * Create a new (empty) shared region for a new environment.
650 */
651 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)652 vm_shared_region_create(
653 void *root_dir,
654 cpu_type_t cputype,
655 cpu_subtype_t cpu_subtype,
656 boolean_t is_64bit,
657 int target_page_shift,
658 #if !__has_feature(ptrauth_calls)
659 __unused
660 #endif /* __has_feature(ptrauth_calls) */
661 boolean_t reslide,
662 boolean_t is_driverkit,
663 uint32_t rsr_version)
664 {
665 vm_named_entry_t mem_entry;
666 ipc_port_t mem_entry_port;
667 vm_shared_region_t shared_region;
668 vm_map_t sub_map;
669 mach_vm_offset_t base_address, pmap_nesting_start;
670 mach_vm_size_t size, pmap_nesting_size;
671
672 SHARED_REGION_TRACE_INFO(
673 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
674 (void *)VM_KERNEL_ADDRPERM(root_dir),
675 cputype, cpu_subtype, is_64bit, target_page_shift,
676 reslide, is_driverkit));
677
678 base_address = 0;
679 size = 0;
680 mem_entry = NULL;
681 mem_entry_port = IPC_PORT_NULL;
682 sub_map = VM_MAP_NULL;
683
684 /* create a new shared region structure... */
685 shared_region = kalloc_type(struct vm_shared_region,
686 Z_WAITOK | Z_NOFAIL);
687
688 /* figure out the correct settings for the desired environment */
689 if (is_64bit) {
690 switch (cputype) {
691 #if defined(__arm64__)
692 case CPU_TYPE_ARM64:
693 base_address = SHARED_REGION_BASE_ARM64;
694 size = SHARED_REGION_SIZE_ARM64;
695 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
696 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
697 break;
698 #else
699 case CPU_TYPE_I386:
700 base_address = SHARED_REGION_BASE_X86_64;
701 size = SHARED_REGION_SIZE_X86_64;
702 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
703 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
704 break;
705 case CPU_TYPE_POWERPC:
706 base_address = SHARED_REGION_BASE_PPC64;
707 size = SHARED_REGION_SIZE_PPC64;
708 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
709 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
710 break;
711 #endif
712 default:
713 SHARED_REGION_TRACE_ERROR(
714 ("shared_region: create: unknown cpu type %d\n",
715 cputype));
716 kfree_type(struct vm_shared_region, shared_region);
717 shared_region = NULL;
718 goto done;
719 }
720 } else {
721 switch (cputype) {
722 #if defined(__arm64__)
723 case CPU_TYPE_ARM:
724 base_address = SHARED_REGION_BASE_ARM;
725 size = SHARED_REGION_SIZE_ARM;
726 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
727 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
728 break;
729 #else
730 case CPU_TYPE_I386:
731 base_address = SHARED_REGION_BASE_I386;
732 size = SHARED_REGION_SIZE_I386;
733 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
734 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
735 break;
736 case CPU_TYPE_POWERPC:
737 base_address = SHARED_REGION_BASE_PPC;
738 size = SHARED_REGION_SIZE_PPC;
739 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
740 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
741 break;
742 #endif
743 default:
744 SHARED_REGION_TRACE_ERROR(
745 ("shared_region: create: unknown cpu type %d\n",
746 cputype));
747 kfree_type(struct vm_shared_region, shared_region);
748 shared_region = NULL;
749 goto done;
750 }
751 }
752
753 /* create a memory entry structure and a Mach port handle */
754 mem_entry = mach_memory_entry_allocate(&mem_entry_port);
755
756 #if defined(__arm64__)
757 {
758 struct pmap *pmap_nested;
759 int pmap_flags = 0;
760 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
761
762
763 #if __ARM_MIXED_PAGE_SIZE__
764 if (cputype == CPU_TYPE_ARM64 &&
765 target_page_shift == FOURK_PAGE_SHIFT) {
766 /* arm64/4k address space */
767 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
768 }
769 #endif /* __ARM_MIXED_PAGE_SIZE__ */
770
771 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
772 if (pmap_nested != PMAP_NULL) {
773 pmap_set_nested(pmap_nested);
774 sub_map = vm_map_create_options(pmap_nested, 0,
775 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
776
777 if (is_64bit ||
778 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
779 /* enforce 16KB alignment of VM map entries */
780 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
781 }
782 #if __ARM_MIXED_PAGE_SIZE__
783 if (cputype == CPU_TYPE_ARM64 &&
784 target_page_shift == FOURK_PAGE_SHIFT) {
785 /* arm64/4k address space */
786 vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
787 }
788 #endif /* __ARM_MIXED_PAGE_SIZE__ */
789 } else {
790 sub_map = VM_MAP_NULL;
791 }
792 }
793 #else /* defined(__arm64__) */
794 {
795 /* create a VM sub map and its pmap */
796 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
797 if (pmap != NULL) {
798 sub_map = vm_map_create_options(pmap, 0,
799 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
800 } else {
801 sub_map = VM_MAP_NULL;
802 }
803 }
804 #endif /* defined(__arm64__) */
805 if (sub_map == VM_MAP_NULL) {
806 ipc_port_release_send(mem_entry_port);
807 kfree_type(struct vm_shared_region, shared_region);
808 shared_region = NULL;
809 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
810 goto done;
811 }
812
813 /* shared regions should always enforce code-signing */
814 vm_map_cs_enforcement_set(sub_map, true);
815 assert(vm_map_cs_enforcement(sub_map));
816 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
817
818 assert(!sub_map->disable_vmentry_reuse);
819 sub_map->is_nested_map = TRUE;
820
821 /* make the memory entry point to the VM sub map */
822 mem_entry->is_sub_map = TRUE;
823 mem_entry->backing.map = sub_map;
824 mem_entry->size = size;
825 mem_entry->protection = VM_PROT_ALL;
826
827 /* make the shared region point at the memory entry */
828 shared_region->sr_mem_entry = mem_entry_port;
829
830 /* fill in the shared region's environment and settings */
831 shared_region->sr_base_address = base_address;
832 shared_region->sr_size = size;
833 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
834 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
835 shared_region->sr_cpu_type = cputype;
836 shared_region->sr_cpu_subtype = cpu_subtype;
837 shared_region->sr_64bit = (uint8_t)is_64bit;
838 #if __ARM_MIXED_PAGE_SIZE__
839 shared_region->sr_page_shift = (uint8_t)target_page_shift;
840 #endif /* __ARM_MIXED_PAGE_SIZE__ */
841 shared_region->sr_driverkit = (uint8_t)is_driverkit;
842 shared_region->sr_rsr_version = rsr_version;
843 shared_region->sr_root_dir = root_dir;
844
845 queue_init(&shared_region->sr_q);
846 shared_region->sr_mapping_in_progress = FALSE;
847 shared_region->sr_slide_in_progress = FALSE;
848 shared_region->sr_persists = FALSE;
849 shared_region->sr_stale = FALSE;
850 shared_region->sr_timer_call = NULL;
851 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
852
853 /* grab a reference for the caller */
854 shared_region->sr_ref_count = 1;
855
856 shared_region->sr_slide = 0; /* not slid yet */
857
858 /* Initialize UUID and other metadata */
859 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
860 shared_region->sr_uuid_copied = FALSE;
861 shared_region->sr_images_count = 0;
862 shared_region->sr_images = NULL;
863 #if __has_feature(ptrauth_calls)
864 shared_region->sr_reslide = reslide;
865 shared_region->sr_num_auth_section = 0;
866 shared_region->sr_next_auth_section = 0;
867 shared_region->sr_auth_section = NULL;
868 #endif /* __has_feature(ptrauth_calls) */
869
870 done:
871 if (shared_region) {
872 SHARED_REGION_TRACE_INFO(
873 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
874 "base=0x%llx,size=0x%llx) <- "
875 "%p mem=(%p,%p) map=%p pmap=%p\n",
876 (void *)VM_KERNEL_ADDRPERM(root_dir),
877 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
878 (long long)base_address,
879 (long long)size,
880 (void *)VM_KERNEL_ADDRPERM(shared_region),
881 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
882 (void *)VM_KERNEL_ADDRPERM(mem_entry),
883 (void *)VM_KERNEL_ADDRPERM(sub_map),
884 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
885 } else {
886 SHARED_REGION_TRACE_INFO(
887 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
888 "base=0x%llx,size=0x%llx) <- NULL",
889 (void *)VM_KERNEL_ADDRPERM(root_dir),
890 cputype, cpu_subtype, is_64bit, is_driverkit,
891 (long long)base_address,
892 (long long)size));
893 }
894 return shared_region;
895 }
896
897 /*
898 * Destroy a now-unused shared region.
899 * The shared region is no longer in the queue and can not be looked up.
900 */
901 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)902 vm_shared_region_destroy(
903 vm_shared_region_t shared_region)
904 {
905 vm_named_entry_t mem_entry;
906 vm_map_t map;
907
908 SHARED_REGION_TRACE_INFO(
909 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
910 (void *)VM_KERNEL_ADDRPERM(shared_region),
911 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
912 shared_region->sr_cpu_type,
913 shared_region->sr_cpu_subtype,
914 shared_region->sr_64bit,
915 shared_region->sr_driverkit));
916
917 assert(shared_region->sr_ref_count == 0);
918 assert(!shared_region->sr_persists);
919
920 mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
921 assert(mem_entry->is_sub_map);
922 assert(!mem_entry->internal);
923 assert(!mem_entry->is_copy);
924 map = mem_entry->backing.map;
925
926 /*
927 * Clean up the pmap first. The virtual addresses that were
928 * entered in this possibly "nested" pmap may have different values
929 * than the VM map's min and max offsets, if the VM sub map was
930 * mapped at a non-zero offset in the processes' main VM maps, which
931 * is usually the case, so the clean-up we do in vm_map_destroy() would
932 * not be enough.
933 */
934 if (map->pmap) {
935 pmap_remove(map->pmap,
936 (vm_map_offset_t)shared_region->sr_base_address,
937 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
938 }
939
940 /*
941 * Release our (one and only) handle on the memory entry.
942 * This will generate a no-senders notification, which will be processed
943 * by ipc_kobject_notify_no_senders(), which will release the one and only
944 * reference on the memory entry and cause it to be destroyed, along
945 * with the VM sub map and its pmap.
946 */
947 mach_memory_entry_port_release(shared_region->sr_mem_entry);
948 mem_entry = NULL;
949 shared_region->sr_mem_entry = IPC_PORT_NULL;
950
951 if (shared_region->sr_timer_call) {
952 thread_call_free(shared_region->sr_timer_call);
953 }
954
955 #if __has_feature(ptrauth_calls)
956 /*
957 * Free the cached copies of slide_info for the AUTH regions.
958 */
959 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
960 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
961 if (si != NULL) {
962 vm_object_deallocate(si->si_slide_object);
963 kfree_data(si->si_slide_info_entry,
964 si->si_slide_info_size);
965 kfree_type(struct vm_shared_region_slide_info, si);
966 shared_region->sr_auth_section[i] = NULL;
967 }
968 }
969 if (shared_region->sr_auth_section != NULL) {
970 assert(shared_region->sr_num_auth_section > 0);
971 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
972 shared_region->sr_auth_section = NULL;
973 shared_region->sr_num_auth_section = 0;
974 }
975 #endif /* __has_feature(ptrauth_calls) */
976
977 /* release the shared region structure... */
978 kfree_type(struct vm_shared_region, shared_region);
979
980 SHARED_REGION_TRACE_DEBUG(
981 ("shared_region: destroy(%p) <-\n",
982 (void *)VM_KERNEL_ADDRPERM(shared_region)));
983 shared_region = NULL;
984 }
985
986 /*
987 * Gets the address of the first (in time) mapping in the shared region.
988 * If used during initial task setup by dyld, task should non-NULL.
989 */
990 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)991 vm_shared_region_start_address(
992 vm_shared_region_t shared_region,
993 mach_vm_offset_t *start_address,
994 task_t task)
995 {
996 kern_return_t kr;
997 mach_vm_offset_t sr_base_address;
998 mach_vm_offset_t sr_first_mapping;
999
1000 SHARED_REGION_TRACE_DEBUG(
1001 ("shared_region: -> start_address(%p)\n",
1002 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1003
1004 vm_shared_region_lock();
1005
1006 /*
1007 * Wait if there's another thread establishing a mapping
1008 * in this shared region right when we're looking at it.
1009 * We want a consistent view of the map...
1010 */
1011 while (shared_region->sr_mapping_in_progress) {
1012 /* wait for our turn... */
1013 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1014 THREAD_UNINT);
1015 }
1016 assert(!shared_region->sr_mapping_in_progress);
1017 assert(shared_region->sr_ref_count > 0);
1018
1019 sr_base_address = shared_region->sr_base_address;
1020 sr_first_mapping = shared_region->sr_first_mapping;
1021
1022 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1023 /* shared region is empty */
1024 kr = KERN_INVALID_ADDRESS;
1025 } else {
1026 kr = KERN_SUCCESS;
1027 *start_address = sr_base_address + sr_first_mapping;
1028 }
1029
1030
1031 uint32_t slide = shared_region->sr_slide;
1032
1033 vm_shared_region_unlock();
1034
1035 /*
1036 * Cache shared region info in the task for telemetry gathering, if we're
1037 * passed in the task. No task lock here as we're still in intial task set up.
1038 */
1039 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1040 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1041 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1042 (char *)&task->task_shared_region_uuid,
1043 sizeof(task->task_shared_region_uuid)) == 0) {
1044 task->task_shared_region_slide = slide;
1045 }
1046 }
1047
1048 SHARED_REGION_TRACE_DEBUG(
1049 ("shared_region: start_address(%p) <- 0x%llx\n",
1050 (void *)VM_KERNEL_ADDRPERM(shared_region),
1051 (long long)shared_region->sr_base_address));
1052
1053 return kr;
1054 }
1055
1056 /*
1057 * Look up a pre-existing mapping in shared region, for replacement.
1058 * Takes an extra object reference if found.
1059 */
1060 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1061 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1062 {
1063 vm_map_entry_t found;
1064
1065 /* find the shared region's map entry to slide */
1066 vm_map_lock_read(map);
1067 if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1068 /* no mapping there */
1069 vm_map_unlock(map);
1070 return KERN_INVALID_ARGUMENT;
1071 }
1072
1073 *entry = *found;
1074 /* extra ref to keep object alive while map is unlocked */
1075 vm_object_reference(VME_OBJECT(found));
1076 vm_map_unlock_read(map);
1077 return KERN_SUCCESS;
1078 }
1079
1080 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1081 shared_region_make_permanent(
1082 vm_shared_region_t sr,
1083 vm_prot_t max_prot)
1084 {
1085 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1086 return false;
1087 }
1088 if (max_prot & VM_PROT_WRITE) {
1089 /*
1090 * Potentially writable mapping: no major issue with allowing
1091 * it to be replaced since its contents could be modified
1092 * anyway.
1093 */
1094 return false;
1095 }
1096 if (max_prot & VM_PROT_EXECUTE) {
1097 /*
1098 * Potentially executable mapping: some software might want
1099 * to try and replace it to interpose their own code when a
1100 * given routine is called or returns, for example.
1101 * So let's not make it "permanent".
1102 */
1103 return false;
1104 }
1105 /*
1106 * Make this mapping "permanent" to prevent it from being deleted
1107 * and/or replaced with another mapping.
1108 */
1109 return true;
1110 }
1111
1112 #if __has_feature(ptrauth_calls)
1113
1114 /*
1115 * Determine if this task is actually using pointer signing.
1116 */
1117 static boolean_t
task_sign_pointers(task_t task)1118 task_sign_pointers(task_t task)
1119 {
1120 if (task->map &&
1121 task->map->pmap &&
1122 !task->map->pmap->disable_jop) {
1123 return TRUE;
1124 }
1125 return FALSE;
1126 }
1127
1128 /*
1129 * If the shared region contains mappings that are authenticated, then
1130 * remap them into the task private map.
1131 *
1132 * Failures are possible in this routine when jetsam kills a process
1133 * just as dyld is trying to set it up. The vm_map and task shared region
1134 * info get torn down w/o waiting for this thread to finish up.
1135 */
1136 __attribute__((noinline))
1137 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1138 vm_shared_region_auth_remap(vm_shared_region_t sr)
1139 {
1140 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1141 task_t task = current_task();
1142 vm_shared_region_slide_info_t si;
1143 uint_t i;
1144 vm_object_t object;
1145 vm_map_t sr_map;
1146 struct vm_map_entry tmp_entry_store = {0};
1147 vm_map_entry_t tmp_entry = NULL;
1148 int vm_flags;
1149 vm_map_kernel_flags_t vmk_flags;
1150 vm_map_offset_t map_addr;
1151 kern_return_t kr = KERN_SUCCESS;
1152 boolean_t use_ptr_auth = task_sign_pointers(task);
1153
1154 /*
1155 * Don't do this more than once and avoid any race conditions in finishing it.
1156 */
1157 vm_shared_region_lock();
1158 while (sr->sr_mapping_in_progress) {
1159 /* wait for our turn... */
1160 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1161 }
1162 assert(!sr->sr_mapping_in_progress);
1163 assert(sr->sr_ref_count > 0);
1164
1165 /* Just return if already done. */
1166 if (task->shared_region_auth_remapped) {
1167 vm_shared_region_unlock();
1168 return KERN_SUCCESS;
1169 }
1170
1171 /* let others know to wait while we're working in this shared region */
1172 sr->sr_mapping_in_progress = TRUE;
1173 vm_shared_region_unlock();
1174
1175 /*
1176 * Remap any sections with pointer authentications into the private map.
1177 */
1178 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1179 si = sr->sr_auth_section[i];
1180 assert(si != NULL);
1181 assert(si->si_ptrauth);
1182
1183 /*
1184 * We have mapping that needs to be private.
1185 * Look for an existing slid mapping's pager with matching
1186 * object, offset, slide info and shared_region_id to reuse.
1187 */
1188 object = si->si_slide_object;
1189 sr_pager = shared_region_pager_match(object, si->si_start, si,
1190 use_ptr_auth ? task->jop_pid : 0);
1191 if (sr_pager == MEMORY_OBJECT_NULL) {
1192 printf("%s(): shared_region_pager_match() failed\n", __func__);
1193 kr = KERN_FAILURE;
1194 goto done;
1195 }
1196
1197 /*
1198 * verify matching jop_pid for this task and this pager
1199 */
1200 if (use_ptr_auth) {
1201 shared_region_pager_match_task_key(sr_pager, task);
1202 }
1203
1204 sr_map = vm_shared_region_vm_map(sr);
1205 tmp_entry = NULL;
1206
1207 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1208 if (kr != KERN_SUCCESS) {
1209 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1210 goto done;
1211 }
1212 tmp_entry = &tmp_entry_store;
1213
1214 /*
1215 * Check that the object exactly covers the region to slide.
1216 */
1217 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1218 printf("%s(): doesn't fully cover\n", __func__);
1219 kr = KERN_FAILURE;
1220 goto done;
1221 }
1222
1223 /*
1224 * map the pager over the portion of the mapping that needs sliding
1225 */
1226 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
1227 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1228 vmk_flags.vmkf_overwrite_immutable = TRUE;
1229 vmk_flags.vmkf_permanent = shared_region_make_permanent(sr,
1230 tmp_entry->max_protection);
1231
1232 map_addr = si->si_slid_address;
1233 kr = vm_map_enter_mem_object(task->map,
1234 &map_addr,
1235 si->si_end - si->si_start,
1236 (mach_vm_offset_t) 0,
1237 vm_flags,
1238 vmk_flags,
1239 VM_KERN_MEMORY_NONE,
1240 (ipc_port_t)(uintptr_t) sr_pager,
1241 0,
1242 TRUE,
1243 tmp_entry->protection,
1244 tmp_entry->max_protection,
1245 tmp_entry->inheritance);
1246 memory_object_deallocate(sr_pager);
1247 sr_pager = MEMORY_OBJECT_NULL;
1248 if (kr != KERN_SUCCESS) {
1249 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1250 goto done;
1251 }
1252 assertf(map_addr == si->si_slid_address,
1253 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1254 (uint64_t)map_addr,
1255 (uint64_t)si->si_slid_address,
1256 tmp_entry);
1257
1258 /* Drop the ref count grabbed by find_mapping_to_slide */
1259 vm_object_deallocate(VME_OBJECT(tmp_entry));
1260 tmp_entry = NULL;
1261 }
1262
1263 done:
1264 if (tmp_entry) {
1265 /* Drop the ref count grabbed by find_mapping_to_slide */
1266 vm_object_deallocate(VME_OBJECT(tmp_entry));
1267 tmp_entry = NULL;
1268 }
1269
1270 /*
1271 * Drop any extra reference to the pager in case we're quitting due to an error above.
1272 */
1273 if (sr_pager != MEMORY_OBJECT_NULL) {
1274 memory_object_deallocate(sr_pager);
1275 }
1276
1277 /*
1278 * Mark the region as having it's auth sections remapped.
1279 */
1280 vm_shared_region_lock();
1281 task->shared_region_auth_remapped = TRUE;
1282 sr->sr_mapping_in_progress = FALSE;
1283 thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1284 vm_shared_region_unlock();
1285 return kr;
1286 }
1287 #endif /* __has_feature(ptrauth_calls) */
1288
1289 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1290 vm_shared_region_undo_mappings(
1291 vm_map_t sr_map,
1292 mach_vm_offset_t sr_base_address,
1293 struct _sr_file_mappings *srf_mappings,
1294 struct _sr_file_mappings *srf_mappings_current,
1295 unsigned int srf_current_mappings_count)
1296 {
1297 unsigned int j = 0;
1298 vm_shared_region_t shared_region = NULL;
1299 boolean_t reset_shared_region_state = FALSE;
1300 struct _sr_file_mappings *srfmp;
1301 unsigned int mappings_count;
1302 struct shared_file_mapping_slide_np *mappings;
1303
1304 shared_region = vm_shared_region_get(current_task());
1305 if (shared_region == NULL) {
1306 printf("Failed to undo mappings because of NULL shared region.\n");
1307 return;
1308 }
1309
1310 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1311
1312 if (sr_map == NULL) {
1313 ipc_port_t sr_handle;
1314 vm_named_entry_t sr_mem_entry;
1315
1316 vm_shared_region_lock();
1317 assert(shared_region->sr_ref_count > 0);
1318
1319 while (shared_region->sr_mapping_in_progress) {
1320 /* wait for our turn... */
1321 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1322 THREAD_UNINT);
1323 }
1324 assert(!shared_region->sr_mapping_in_progress);
1325 assert(shared_region->sr_ref_count > 0);
1326 /* let others know we're working in this shared region */
1327 shared_region->sr_mapping_in_progress = TRUE;
1328
1329 vm_shared_region_unlock();
1330
1331 reset_shared_region_state = TRUE;
1332
1333 /* no need to lock because this data is never modified... */
1334 sr_handle = shared_region->sr_mem_entry;
1335 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1336 sr_map = sr_mem_entry->backing.map;
1337 sr_base_address = shared_region->sr_base_address;
1338 }
1339 /*
1340 * Undo the mappings we've established so far.
1341 */
1342 for (srfmp = &srf_mappings[0];
1343 srfmp <= srf_mappings_current;
1344 srfmp++) {
1345 mappings = srfmp->mappings;
1346 mappings_count = srfmp->mappings_count;
1347 if (srfmp == srf_mappings_current) {
1348 mappings_count = srf_current_mappings_count;
1349 }
1350
1351 for (j = 0; j < mappings_count; j++) {
1352 kern_return_t kr2;
1353 mach_vm_offset_t start, end;
1354
1355 if (mappings[j].sms_size == 0) {
1356 /*
1357 * We didn't establish this
1358 * mapping, so nothing to undo.
1359 */
1360 continue;
1361 }
1362 SHARED_REGION_TRACE_INFO(
1363 ("shared_region: mapping[%d]: "
1364 "address:0x%016llx "
1365 "size:0x%016llx "
1366 "offset:0x%016llx "
1367 "maxprot:0x%x prot:0x%x: "
1368 "undoing...\n",
1369 j,
1370 (long long)mappings[j].sms_address,
1371 (long long)mappings[j].sms_size,
1372 (long long)mappings[j].sms_file_offset,
1373 mappings[j].sms_max_prot,
1374 mappings[j].sms_init_prot));
1375 start = (mappings[j].sms_address - sr_base_address);
1376 end = start + mappings[j].sms_size;
1377 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1378 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1379 kr2 = vm_map_remove_guard(sr_map,
1380 start,
1381 end,
1382 VM_MAP_REMOVE_IMMUTABLE,
1383 KMEM_GUARD_NONE).kmr_return;
1384 assert(kr2 == KERN_SUCCESS);
1385 }
1386 }
1387
1388 if (reset_shared_region_state) {
1389 vm_shared_region_lock();
1390 assert(shared_region->sr_ref_count > 0);
1391 assert(shared_region->sr_mapping_in_progress);
1392 /* we're done working on that shared region */
1393 shared_region->sr_mapping_in_progress = FALSE;
1394 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1395 vm_shared_region_unlock();
1396 reset_shared_region_state = FALSE;
1397 }
1398
1399 vm_shared_region_deallocate(shared_region);
1400 }
1401
1402 /*
1403 * First part of vm_shared_region_map_file(). Split out to
1404 * avoid kernel stack overflow.
1405 */
1406 __attribute__((noinline))
1407 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1408 vm_shared_region_map_file_setup(
1409 vm_shared_region_t shared_region,
1410 int sr_file_mappings_count,
1411 struct _sr_file_mappings *sr_file_mappings,
1412 unsigned int *mappings_to_slide_cnt,
1413 struct shared_file_mapping_slide_np **mappings_to_slide,
1414 mach_vm_offset_t *slid_mappings,
1415 memory_object_control_t *slid_file_controls,
1416 mach_vm_offset_t *sfm_min_address,
1417 mach_vm_offset_t *sfm_max_address,
1418 vm_map_t *sr_map_ptr,
1419 vm_map_offset_t *lowest_unnestable_addr_ptr,
1420 unsigned int vmsr_num_slides)
1421 {
1422 kern_return_t kr = KERN_SUCCESS;
1423 memory_object_control_t file_control;
1424 vm_object_t file_object;
1425 ipc_port_t sr_handle;
1426 vm_named_entry_t sr_mem_entry;
1427 vm_map_t sr_map;
1428 mach_vm_offset_t sr_base_address;
1429 unsigned int i = 0;
1430 mach_port_t map_port;
1431 vm_map_offset_t target_address;
1432 vm_object_t object;
1433 vm_object_size_t obj_size;
1434 vm_map_offset_t lowest_unnestable_addr = 0;
1435 vm_map_kernel_flags_t vmk_flags;
1436 mach_vm_offset_t sfm_end;
1437 uint32_t mappings_count;
1438 struct shared_file_mapping_slide_np *mappings;
1439 struct _sr_file_mappings *srfmp;
1440
1441 vm_shared_region_lock();
1442 assert(shared_region->sr_ref_count > 0);
1443
1444 /*
1445 * Make sure we handle only one mapping at a time in a given
1446 * shared region, to avoid race conditions. This should not
1447 * happen frequently...
1448 */
1449 while (shared_region->sr_mapping_in_progress) {
1450 /* wait for our turn... */
1451 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1452 THREAD_UNINT);
1453 }
1454 assert(!shared_region->sr_mapping_in_progress);
1455 assert(shared_region->sr_ref_count > 0);
1456
1457
1458 /* let others know we're working in this shared region */
1459 shared_region->sr_mapping_in_progress = TRUE;
1460
1461 /*
1462 * Did someone race in and map this shared region already?
1463 */
1464 if (shared_region->sr_first_mapping != -1) {
1465 vm_shared_region_unlock();
1466 #if DEVELOPMENT || DEBUG
1467 printf("shared_region: caught race in map and slide\n");
1468 #endif /* DEVELOPMENT || DEBUG */
1469 return KERN_FAILURE;
1470 }
1471
1472 vm_shared_region_unlock();
1473
1474 /* no need to lock because this data is never modified... */
1475 sr_handle = shared_region->sr_mem_entry;
1476 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1477 sr_map = sr_mem_entry->backing.map;
1478 sr_base_address = shared_region->sr_base_address;
1479
1480 SHARED_REGION_TRACE_DEBUG(
1481 ("shared_region: -> map(%p)\n",
1482 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1483
1484 mappings_count = 0;
1485 mappings = NULL;
1486 srfmp = NULL;
1487
1488 /* process all the files to be mapped */
1489 for (srfmp = &sr_file_mappings[0];
1490 srfmp < &sr_file_mappings[sr_file_mappings_count];
1491 srfmp++) {
1492 mappings_count = srfmp->mappings_count;
1493 mappings = srfmp->mappings;
1494 file_control = srfmp->file_control;
1495
1496 if (mappings_count == 0) {
1497 /* no mappings here... */
1498 continue;
1499 }
1500
1501 /*
1502 * The code below can only correctly "slide" (perform relocations) for one
1503 * value of the slide amount. So if a file has a non-zero slide, it has to
1504 * match any previous value. A zero slide value is ok for things that are
1505 * just directly mapped.
1506 */
1507 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1508 shared_region->sr_slide = srfmp->slide;
1509 } else if (shared_region->sr_slide != 0 &&
1510 srfmp->slide != 0 &&
1511 shared_region->sr_slide != srfmp->slide) {
1512 SHARED_REGION_TRACE_ERROR(
1513 ("shared_region: more than 1 non-zero slide value amount "
1514 "slide 1:0x%x slide 2:0x%x\n ",
1515 shared_region->sr_slide, srfmp->slide));
1516 kr = KERN_INVALID_ARGUMENT;
1517 break;
1518 }
1519
1520 #if __arm64__
1521 if ((shared_region->sr_64bit ||
1522 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1523 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1524 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1525 __FUNCTION__, srfmp->slide);
1526 kr = KERN_INVALID_ARGUMENT;
1527 break;
1528 }
1529 #endif /* __arm64__ */
1530
1531 /*
1532 * An FD of -1 means we need to copyin the data to an anonymous object.
1533 */
1534 if (srfmp->fd == -1) {
1535 assert(mappings_count == 1);
1536 SHARED_REGION_TRACE_INFO(
1537 ("shared_region: mapping[0]: "
1538 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1539 "maxprot:0x%x prot:0x%x fd==-1\n",
1540 (long long)mappings[0].sms_address,
1541 (long long)mappings[0].sms_size,
1542 (long long)mappings[0].sms_file_offset,
1543 mappings[0].sms_max_prot,
1544 mappings[0].sms_init_prot));
1545
1546 /*
1547 * We need an anon object to hold the data in the shared region.
1548 * The size needs to be suitable to map into kernel.
1549 */
1550 obj_size = vm_object_round_page(mappings->sms_size);
1551 object = vm_object_allocate(obj_size);
1552 if (object == VM_OBJECT_NULL) {
1553 printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1554 kr = KERN_RESOURCE_SHORTAGE;
1555 break;
1556 }
1557
1558 /*
1559 * map the object into the kernel
1560 */
1561 vm_map_offset_t kaddr = 0;
1562 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1563 vmk_flags.vmkf_no_copy_on_read = 1;
1564 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1565 kr = vm_map_enter(kernel_map,
1566 &kaddr,
1567 obj_size,
1568 0,
1569 VM_FLAGS_ANYWHERE,
1570 vmk_flags,
1571 VM_KERN_MEMORY_NONE,
1572 object,
1573 0,
1574 FALSE,
1575 (VM_PROT_READ | VM_PROT_WRITE),
1576 (VM_PROT_READ | VM_PROT_WRITE),
1577 VM_INHERIT_NONE);
1578 if (kr != KERN_SUCCESS) {
1579 printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1580 vm_object_deallocate(object);
1581 object = VM_OBJECT_NULL;
1582 break;
1583 }
1584
1585 /*
1586 * We'll need another reference to keep the object alive after
1587 * we vm_map_remove() it from the kernel.
1588 */
1589 vm_object_reference(object);
1590
1591 /*
1592 * Zero out the object's pages, so we can't leak data.
1593 */
1594 bzero((void *)kaddr, obj_size);
1595
1596 /*
1597 * Copyin the data from dyld to the new object.
1598 * Then remove the kernel mapping.
1599 */
1600 int copyin_err =
1601 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1602 vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1603 if (copyin_err) {
1604 printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1605 switch (copyin_err) {
1606 case EPERM:
1607 case EACCES:
1608 kr = KERN_PROTECTION_FAILURE;
1609 break;
1610 case EFAULT:
1611 kr = KERN_INVALID_ADDRESS;
1612 break;
1613 default:
1614 kr = KERN_FAILURE;
1615 break;
1616 }
1617 vm_object_deallocate(object);
1618 object = VM_OBJECT_NULL;
1619 break;
1620 }
1621
1622 /*
1623 * Finally map the object into the shared region.
1624 */
1625 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1626 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1627 vmk_flags.vmkf_already = TRUE;
1628 vmk_flags.vmkf_no_copy_on_read = 1;
1629 vmk_flags.vmkf_permanent = shared_region_make_permanent(shared_region,
1630 mappings[0].sms_max_prot);
1631 kr = vm_map_enter(
1632 sr_map,
1633 &target_address,
1634 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1635 0,
1636 VM_FLAGS_FIXED,
1637 vmk_flags,
1638 VM_KERN_MEMORY_NONE,
1639 object,
1640 0,
1641 TRUE,
1642 mappings[0].sms_init_prot & VM_PROT_ALL,
1643 mappings[0].sms_max_prot & VM_PROT_ALL,
1644 VM_INHERIT_DEFAULT);
1645 if (kr != KERN_SUCCESS) {
1646 printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1647 vm_object_deallocate(object);
1648 break;
1649 }
1650
1651 if (mappings[0].sms_address < *sfm_min_address) {
1652 *sfm_min_address = mappings[0].sms_address;
1653 }
1654
1655 if (os_add_overflow(mappings[0].sms_address,
1656 mappings[0].sms_size,
1657 &sfm_end) ||
1658 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1659 mappings[0].sms_address)) {
1660 /* overflow */
1661 kr = KERN_INVALID_ARGUMENT;
1662 break;
1663 }
1664
1665 if (sfm_end > *sfm_max_address) {
1666 *sfm_max_address = sfm_end;
1667 }
1668
1669 continue;
1670 }
1671
1672 /* get the VM object associated with the file to be mapped */
1673 file_object = memory_object_control_to_vm_object(file_control);
1674 assert(file_object);
1675
1676 #if CONFIG_SECLUDED_MEMORY
1677 /*
1678 * Camera will need the shared cache, so don't put the pages
1679 * on the secluded queue, assume that's the primary region.
1680 * Also keep DEXT shared cache pages off secluded.
1681 */
1682 if (primary_system_shared_region == NULL ||
1683 primary_system_shared_region == shared_region ||
1684 shared_region->sr_driverkit) {
1685 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1686 }
1687 #endif /* CONFIG_SECLUDED_MEMORY */
1688
1689 /* establish the mappings for that file */
1690 for (i = 0; i < mappings_count; i++) {
1691 SHARED_REGION_TRACE_INFO(
1692 ("shared_region: mapping[%d]: "
1693 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1694 "maxprot:0x%x prot:0x%x\n",
1695 i,
1696 (long long)mappings[i].sms_address,
1697 (long long)mappings[i].sms_size,
1698 (long long)mappings[i].sms_file_offset,
1699 mappings[i].sms_max_prot,
1700 mappings[i].sms_init_prot));
1701
1702 if (mappings[i].sms_address < *sfm_min_address) {
1703 *sfm_min_address = mappings[i].sms_address;
1704 }
1705
1706 if (os_add_overflow(mappings[i].sms_address,
1707 mappings[i].sms_size,
1708 &sfm_end) ||
1709 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1710 mappings[i].sms_address)) {
1711 /* overflow */
1712 kr = KERN_INVALID_ARGUMENT;
1713 break;
1714 }
1715
1716 if (sfm_end > *sfm_max_address) {
1717 *sfm_max_address = sfm_end;
1718 }
1719
1720 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1721 /* zero-filled memory */
1722 map_port = MACH_PORT_NULL;
1723 } else {
1724 /* file-backed memory */
1725 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1726 }
1727
1728 /*
1729 * Remember which mappings need sliding.
1730 */
1731 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1732 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1733 SHARED_REGION_TRACE_INFO(
1734 ("shared_region: mapping[%d]: "
1735 "address:0x%016llx size:0x%016llx "
1736 "offset:0x%016llx "
1737 "maxprot:0x%x prot:0x%x "
1738 "too many mappings to slide...\n",
1739 i,
1740 (long long)mappings[i].sms_address,
1741 (long long)mappings[i].sms_size,
1742 (long long)mappings[i].sms_file_offset,
1743 mappings[i].sms_max_prot,
1744 mappings[i].sms_init_prot));
1745 } else {
1746 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1747 *mappings_to_slide_cnt += 1;
1748 }
1749 }
1750
1751 /* mapping's address is relative to the shared region base */
1752 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1753
1754 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1755 vmk_flags.vmkf_already = TRUE;
1756 /* no copy-on-read for mapped binaries */
1757 vmk_flags.vmkf_no_copy_on_read = 1;
1758 vmk_flags.vmkf_permanent = shared_region_make_permanent(
1759 shared_region,
1760 mappings[i].sms_max_prot);
1761
1762
1763 /* establish that mapping, OK if it's "already" there */
1764 if (map_port == MACH_PORT_NULL) {
1765 /*
1766 * We want to map some anonymous memory in a shared region.
1767 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1768 */
1769 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1770 object = vm_object_allocate(obj_size);
1771 if (object == VM_OBJECT_NULL) {
1772 kr = KERN_RESOURCE_SHORTAGE;
1773 } else {
1774 kr = vm_map_enter(
1775 sr_map,
1776 &target_address,
1777 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1778 0,
1779 VM_FLAGS_FIXED,
1780 vmk_flags,
1781 VM_KERN_MEMORY_NONE,
1782 object,
1783 0,
1784 TRUE,
1785 mappings[i].sms_init_prot & VM_PROT_ALL,
1786 mappings[i].sms_max_prot & VM_PROT_ALL,
1787 VM_INHERIT_DEFAULT);
1788 }
1789 } else {
1790 object = VM_OBJECT_NULL; /* no anonymous memory here */
1791 kr = vm_map_enter_mem_object(
1792 sr_map,
1793 &target_address,
1794 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1795 0,
1796 VM_FLAGS_FIXED,
1797 vmk_flags,
1798 VM_KERN_MEMORY_NONE,
1799 map_port,
1800 mappings[i].sms_file_offset,
1801 TRUE,
1802 mappings[i].sms_init_prot & VM_PROT_ALL,
1803 mappings[i].sms_max_prot & VM_PROT_ALL,
1804 VM_INHERIT_DEFAULT);
1805 }
1806
1807 if (kr == KERN_SUCCESS) {
1808 /*
1809 * Record the first successful mapping(s) in the shared
1810 * region by file. We're protected by "sr_mapping_in_progress"
1811 * here, so no need to lock "shared_region".
1812 *
1813 * Note that if we have an AOT shared cache (ARM) for a
1814 * translated task, then it's always the first file.
1815 * The original "native" (i.e. x86) shared cache is the
1816 * second file.
1817 */
1818
1819 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1820 shared_region->sr_first_mapping = target_address;
1821 }
1822
1823 if (*mappings_to_slide_cnt > 0 &&
1824 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1825 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1826 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1827 }
1828
1829 /*
1830 * Record the lowest writable address in this
1831 * sub map, to log any unexpected unnesting below
1832 * that address (see log_unnest_badness()).
1833 */
1834 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1835 sr_map->is_nested_map &&
1836 (lowest_unnestable_addr == 0 ||
1837 (target_address < lowest_unnestable_addr))) {
1838 lowest_unnestable_addr = target_address;
1839 }
1840 } else {
1841 if (map_port == MACH_PORT_NULL) {
1842 /*
1843 * Get rid of the VM object we just created
1844 * but failed to map.
1845 */
1846 vm_object_deallocate(object);
1847 object = VM_OBJECT_NULL;
1848 }
1849 if (kr == KERN_MEMORY_PRESENT) {
1850 /*
1851 * This exact mapping was already there:
1852 * that's fine.
1853 */
1854 SHARED_REGION_TRACE_INFO(
1855 ("shared_region: mapping[%d]: "
1856 "address:0x%016llx size:0x%016llx "
1857 "offset:0x%016llx "
1858 "maxprot:0x%x prot:0x%x "
1859 "already mapped...\n",
1860 i,
1861 (long long)mappings[i].sms_address,
1862 (long long)mappings[i].sms_size,
1863 (long long)mappings[i].sms_file_offset,
1864 mappings[i].sms_max_prot,
1865 mappings[i].sms_init_prot));
1866 /*
1867 * We didn't establish this mapping ourselves;
1868 * let's reset its size, so that we do not
1869 * attempt to undo it if an error occurs later.
1870 */
1871 mappings[i].sms_size = 0;
1872 kr = KERN_SUCCESS;
1873 } else {
1874 break;
1875 }
1876 }
1877 }
1878
1879 if (kr != KERN_SUCCESS) {
1880 break;
1881 }
1882 }
1883
1884 if (kr != KERN_SUCCESS) {
1885 /* the last mapping we tried (mappings[i]) failed ! */
1886 assert(i < mappings_count);
1887 SHARED_REGION_TRACE_ERROR(
1888 ("shared_region: mapping[%d]: "
1889 "address:0x%016llx size:0x%016llx "
1890 "offset:0x%016llx "
1891 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1892 i,
1893 (long long)mappings[i].sms_address,
1894 (long long)mappings[i].sms_size,
1895 (long long)mappings[i].sms_file_offset,
1896 mappings[i].sms_max_prot,
1897 mappings[i].sms_init_prot,
1898 kr));
1899
1900 /*
1901 * Respect the design of vm_shared_region_undo_mappings
1902 * as we are holding the sr_mapping_in_progress == true here.
1903 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1904 * will be blocked at waiting sr_mapping_in_progress to be false.
1905 */
1906 assert(sr_map != NULL);
1907 /* undo all the previous mappings */
1908 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1909 return kr;
1910 }
1911
1912 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1913 *sr_map_ptr = sr_map;
1914 return KERN_SUCCESS;
1915 }
1916
1917 /* forwared declaration */
1918 __attribute__((noinline))
1919 static void
1920 vm_shared_region_map_file_final(
1921 vm_shared_region_t shared_region,
1922 vm_map_t sr_map,
1923 mach_vm_offset_t sfm_min_address,
1924 mach_vm_offset_t sfm_max_address);
1925
1926 /*
1927 * Establish some mappings of a file in the shared region.
1928 * This is used by "dyld" via the shared_region_map_np() system call
1929 * to populate the shared region with the appropriate shared cache.
1930 *
1931 * One could also call it several times to incrementally load several
1932 * libraries, as long as they do not overlap.
1933 * It will return KERN_SUCCESS if the mappings were successfully established
1934 * or if they were already established identically by another process.
1935 */
1936 __attribute__((noinline))
1937 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1938 vm_shared_region_map_file(
1939 vm_shared_region_t shared_region,
1940 int sr_file_mappings_count,
1941 struct _sr_file_mappings *sr_file_mappings)
1942 {
1943 kern_return_t kr = KERN_SUCCESS;
1944 unsigned int i;
1945 unsigned int mappings_to_slide_cnt = 0;
1946 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1947 mach_vm_offset_t sfm_max_address = 0;
1948 vm_map_t sr_map = NULL;
1949 vm_map_offset_t lowest_unnestable_addr = 0;
1950 unsigned int vmsr_num_slides = 0;
1951 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1952 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1953 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1954 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1955 struct _sr_file_mappings *srfmp;
1956
1957 /*
1958 * Figure out how many of the mappings have slides.
1959 */
1960 for (srfmp = &sr_file_mappings[0];
1961 srfmp < &sr_file_mappings[sr_file_mappings_count];
1962 srfmp++) {
1963 for (i = 0; i < srfmp->mappings_count; ++i) {
1964 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1965 ++vmsr_num_slides;
1966 }
1967 }
1968 }
1969
1970 /* Allocate per slide data structures */
1971 if (vmsr_num_slides > 0) {
1972 slid_mappings =
1973 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1974 slid_file_controls =
1975 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1976 mappings_to_slide =
1977 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1978 }
1979
1980 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1981 &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
1982 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
1983 if (kr != KERN_SUCCESS) {
1984 vm_shared_region_lock();
1985 goto done;
1986 }
1987 assert(vmsr_num_slides == mappings_to_slide_cnt);
1988
1989 /*
1990 * The call above installed direct mappings to the shared cache file.
1991 * Now we go back and overwrite the mappings that need relocation
1992 * with a special shared region pager.
1993 *
1994 * Note that this does copyin() of data, needed by the pager, which
1995 * the previous code just established mappings for. This is why we
1996 * do it in a separate pass.
1997 */
1998 #if __has_feature(ptrauth_calls)
1999 /*
2000 * need to allocate storage needed for any sr_auth_sections
2001 */
2002 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2003 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2004 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2005 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2006 ++shared_region->sr_num_auth_section;
2007 }
2008 }
2009 if (shared_region->sr_num_auth_section > 0) {
2010 shared_region->sr_auth_section =
2011 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2012 Z_WAITOK | Z_ZERO);
2013 }
2014 #endif /* __has_feature(ptrauth_calls) */
2015 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2016 kr = vm_shared_region_slide(shared_region->sr_slide,
2017 mappings_to_slide[i]->sms_file_offset,
2018 mappings_to_slide[i]->sms_size,
2019 mappings_to_slide[i]->sms_slide_start,
2020 mappings_to_slide[i]->sms_slide_size,
2021 slid_mappings[i],
2022 slid_file_controls[i],
2023 mappings_to_slide[i]->sms_max_prot);
2024 if (kr != KERN_SUCCESS) {
2025 SHARED_REGION_TRACE_ERROR(
2026 ("shared_region: region_slide("
2027 "slide:0x%x start:0x%016llx "
2028 "size:0x%016llx) failed 0x%x\n",
2029 shared_region->sr_slide,
2030 (long long)mappings_to_slide[i]->sms_slide_start,
2031 (long long)mappings_to_slide[i]->sms_slide_size,
2032 kr));
2033 vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2034 &sr_file_mappings[0],
2035 &sr_file_mappings[sr_file_mappings_count - 1],
2036 sr_file_mappings_count);
2037 vm_shared_region_lock();
2038 goto done;
2039 }
2040 }
2041
2042 assert(kr == KERN_SUCCESS);
2043
2044 /* adjust the map's "lowest_unnestable_start" */
2045 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2046 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2047 vm_map_lock(sr_map);
2048 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2049 vm_map_unlock(sr_map);
2050 }
2051
2052 vm_shared_region_lock();
2053 assert(shared_region->sr_ref_count > 0);
2054 assert(shared_region->sr_mapping_in_progress);
2055
2056 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2057
2058 done:
2059 /*
2060 * We're done working on that shared region.
2061 * Wake up any waiting threads.
2062 */
2063 shared_region->sr_mapping_in_progress = FALSE;
2064 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2065 vm_shared_region_unlock();
2066
2067 #if __has_feature(ptrauth_calls)
2068 if (kr == KERN_SUCCESS) {
2069 /*
2070 * Since authenticated mappings were just added to the shared region,
2071 * go back and remap them into private mappings for this task.
2072 */
2073 kr = vm_shared_region_auth_remap(shared_region);
2074 }
2075 #endif /* __has_feature(ptrauth_calls) */
2076
2077 /* Cache shared region info needed for telemetry in the task */
2078 task_t task;
2079 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2080 mach_vm_offset_t start_address;
2081 (void)vm_shared_region_start_address(shared_region, &start_address, task);
2082 }
2083
2084 SHARED_REGION_TRACE_DEBUG(
2085 ("shared_region: map(%p) <- 0x%x \n",
2086 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2087 if (vmsr_num_slides > 0) {
2088 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2089 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2090 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2091 mappings_to_slide);
2092 }
2093 return kr;
2094 }
2095
2096 /*
2097 * Final part of vm_shared_region_map_file().
2098 * Kept in separate function to avoid blowing out the stack.
2099 */
2100 __attribute__((noinline))
2101 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map,mach_vm_offset_t sfm_min_address,mach_vm_offset_t sfm_max_address)2102 vm_shared_region_map_file_final(
2103 vm_shared_region_t shared_region,
2104 vm_map_t sr_map,
2105 mach_vm_offset_t sfm_min_address,
2106 mach_vm_offset_t sfm_max_address)
2107 {
2108 struct _dyld_cache_header sr_cache_header;
2109 int error;
2110 size_t image_array_length;
2111 struct _dyld_cache_image_text_info *sr_image_layout;
2112 boolean_t locally_built = FALSE;
2113
2114
2115 /*
2116 * copy in the shared region UUID to the shared region structure.
2117 * we do this indirectly by first copying in the shared cache header
2118 * and then copying the UUID from there because we'll need to look
2119 * at other content from the shared cache header.
2120 */
2121 if (!shared_region->sr_uuid_copied) {
2122 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2123 (char *)&sr_cache_header,
2124 sizeof(sr_cache_header));
2125 if (error == 0) {
2126 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2127 shared_region->sr_uuid_copied = TRUE;
2128 locally_built = sr_cache_header.locallyBuiltCache;
2129 } else {
2130 #if DEVELOPMENT || DEBUG
2131 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2132 "offset:0 size:0x%016llx) failed with %d\n",
2133 (long long)shared_region->sr_base_address,
2134 (long long)shared_region->sr_first_mapping,
2135 (long long)sizeof(sr_cache_header),
2136 error);
2137 #endif /* DEVELOPMENT || DEBUG */
2138 shared_region->sr_uuid_copied = FALSE;
2139 }
2140 }
2141
2142 /*
2143 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2144 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2145 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2146 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2147 * let the most recent one win.
2148 *
2149 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2150 */
2151 bool is_init_task = (task_pid(current_task()) == 1);
2152 if (shared_region->sr_uuid_copied && is_init_task) {
2153 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2154 if (locally_built) {
2155 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2156 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2157 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2158 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2159 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2160 if (error == 0) {
2161 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2162 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2163 }
2164 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2165 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2166 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2167 sizeof(shared_region->sr_images[index].imageUUID));
2168 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2169 }
2170
2171 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2172 } else {
2173 #if DEVELOPMENT || DEBUG
2174 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2175 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2176 (long long)shared_region->sr_base_address,
2177 (long long)shared_region->sr_first_mapping,
2178 (long long)sr_cache_header.imagesTextOffset,
2179 (long long)image_array_length,
2180 error);
2181 #endif /* DEVELOPMENT || DEBUG */
2182 }
2183 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2184 kfree_data(sr_image_layout, image_array_length);
2185 sr_image_layout = NULL;
2186 }
2187 primary_system_shared_region = shared_region;
2188 }
2189
2190 /*
2191 * If we succeeded, we know the bounds of the shared region.
2192 * Trim our pmaps to only cover this range (if applicable to
2193 * this platform).
2194 */
2195 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2196 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2197 }
2198 }
2199
2200 /*
2201 * Retrieve a task's shared region and grab an extra reference to
2202 * make sure it doesn't disappear while the caller is using it.
2203 * The caller is responsible for consuming that extra reference if
2204 * necessary.
2205 *
2206 * This also tries to trim the pmap for the shared region.
2207 */
2208 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2209 vm_shared_region_trim_and_get(task_t task)
2210 {
2211 vm_shared_region_t shared_region;
2212 ipc_port_t sr_handle;
2213 vm_named_entry_t sr_mem_entry;
2214 vm_map_t sr_map;
2215
2216 /* Get the shared region and the map. */
2217 shared_region = vm_shared_region_get(task);
2218 if (shared_region == NULL) {
2219 return NULL;
2220 }
2221
2222 sr_handle = shared_region->sr_mem_entry;
2223 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2224 sr_map = sr_mem_entry->backing.map;
2225
2226 /* Trim the pmap if possible. */
2227 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2228 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2229 }
2230
2231 return shared_region;
2232 }
2233
2234 /*
2235 * Enter the appropriate shared region into "map" for "task".
2236 * This involves looking up the shared region (and possibly creating a new
2237 * one) for the desired environment, then mapping the VM sub map into the
2238 * task's VM "map", with the appropriate level of pmap-nesting.
2239 */
2240 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2241 vm_shared_region_enter(
2242 struct _vm_map *map,
2243 struct task *task,
2244 boolean_t is_64bit,
2245 void *fsroot,
2246 cpu_type_t cpu,
2247 cpu_subtype_t cpu_subtype,
2248 boolean_t reslide,
2249 boolean_t is_driverkit,
2250 uint32_t rsr_version)
2251 {
2252 kern_return_t kr;
2253 vm_shared_region_t shared_region;
2254 vm_map_offset_t sr_address, sr_offset, target_address;
2255 vm_map_size_t sr_size, mapping_size;
2256 vm_map_offset_t sr_pmap_nesting_start;
2257 vm_map_size_t sr_pmap_nesting_size;
2258 ipc_port_t sr_handle;
2259 vm_prot_t cur_prot, max_prot;
2260 vm_map_kernel_flags_t vmk_flags;
2261
2262 SHARED_REGION_TRACE_DEBUG(
2263 ("shared_region: -> "
2264 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2265 (void *)VM_KERNEL_ADDRPERM(map),
2266 (void *)VM_KERNEL_ADDRPERM(task),
2267 (void *)VM_KERNEL_ADDRPERM(fsroot),
2268 cpu, cpu_subtype, is_64bit, is_driverkit));
2269
2270 /* lookup (create if needed) the shared region for this environment */
2271 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2272 if (shared_region == NULL) {
2273 /* this should not happen ! */
2274 SHARED_REGION_TRACE_ERROR(
2275 ("shared_region: -> "
2276 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2277 "lookup failed !\n",
2278 (void *)VM_KERNEL_ADDRPERM(map),
2279 (void *)VM_KERNEL_ADDRPERM(task),
2280 (void *)VM_KERNEL_ADDRPERM(fsroot),
2281 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2282 //panic("shared_region_enter: lookup failed");
2283 return KERN_FAILURE;
2284 }
2285
2286 kr = KERN_SUCCESS;
2287 /* no need to lock since this data is never modified */
2288 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2289 sr_size = (vm_map_size_t)shared_region->sr_size;
2290 sr_handle = shared_region->sr_mem_entry;
2291 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2292 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2293 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2294
2295 cur_prot = VM_PROT_READ;
2296 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2297 /*
2298 * XXX BINARY COMPATIBILITY
2299 * java6 apparently needs to modify some code in the
2300 * dyld shared cache and needs to be allowed to add
2301 * write access...
2302 */
2303 max_prot = VM_PROT_ALL;
2304 } else {
2305 max_prot = VM_PROT_READ;
2306 /* make it "permanent" to protect against re-mappings */
2307 vmk_flags.vmkf_permanent = true;
2308 }
2309
2310 /*
2311 * Start mapping the shared region's VM sub map into the task's VM map.
2312 */
2313 sr_offset = 0;
2314
2315 if (sr_pmap_nesting_start > sr_address) {
2316 /* we need to map a range without pmap-nesting first */
2317 target_address = sr_address;
2318 mapping_size = sr_pmap_nesting_start - sr_address;
2319 kr = vm_map_enter_mem_object(
2320 map,
2321 &target_address,
2322 mapping_size,
2323 0,
2324 VM_FLAGS_FIXED,
2325 vmk_flags,
2326 VM_KERN_MEMORY_NONE,
2327 sr_handle,
2328 sr_offset,
2329 TRUE,
2330 cur_prot,
2331 max_prot,
2332 VM_INHERIT_SHARE);
2333 if (kr != KERN_SUCCESS) {
2334 SHARED_REGION_TRACE_ERROR(
2335 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2336 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2337 (void *)VM_KERNEL_ADDRPERM(map),
2338 (void *)VM_KERNEL_ADDRPERM(task),
2339 (void *)VM_KERNEL_ADDRPERM(fsroot),
2340 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2341 (long long)target_address,
2342 (long long)mapping_size,
2343 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2344 goto done;
2345 }
2346 SHARED_REGION_TRACE_DEBUG(
2347 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2348 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2349 (void *)VM_KERNEL_ADDRPERM(map),
2350 (void *)VM_KERNEL_ADDRPERM(task),
2351 (void *)VM_KERNEL_ADDRPERM(fsroot),
2352 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2353 (long long)target_address, (long long)mapping_size,
2354 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2355 sr_offset += mapping_size;
2356 sr_size -= mapping_size;
2357 }
2358
2359 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2360 vmk_flags.vmkf_nested_pmap = TRUE;
2361
2362 /*
2363 * Use pmap-nesting to map the majority of the shared region into the task's
2364 * VM space. Very rarely will architectures have a shared region that isn't
2365 * the same size as the pmap-nesting region, or start at a different address
2366 * than the pmap-nesting region, so this code will map the entirety of the
2367 * shared region for most architectures.
2368 */
2369 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2370 target_address = sr_pmap_nesting_start;
2371 kr = vm_map_enter_mem_object(
2372 map,
2373 &target_address,
2374 sr_pmap_nesting_size,
2375 0,
2376 VM_FLAGS_FIXED,
2377 vmk_flags,
2378 VM_MEMORY_SHARED_PMAP,
2379 sr_handle,
2380 sr_offset,
2381 TRUE,
2382 cur_prot,
2383 max_prot,
2384 VM_INHERIT_SHARE);
2385 if (kr != KERN_SUCCESS) {
2386 SHARED_REGION_TRACE_ERROR(
2387 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2388 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2389 (void *)VM_KERNEL_ADDRPERM(map),
2390 (void *)VM_KERNEL_ADDRPERM(task),
2391 (void *)VM_KERNEL_ADDRPERM(fsroot),
2392 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2393 (long long)target_address,
2394 (long long)sr_pmap_nesting_size,
2395 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2396 goto done;
2397 }
2398 SHARED_REGION_TRACE_DEBUG(
2399 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2400 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2401 (void *)VM_KERNEL_ADDRPERM(map),
2402 (void *)VM_KERNEL_ADDRPERM(task),
2403 (void *)VM_KERNEL_ADDRPERM(fsroot),
2404 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2405 (long long)target_address, (long long)sr_pmap_nesting_size,
2406 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2407
2408 sr_offset += sr_pmap_nesting_size;
2409 sr_size -= sr_pmap_nesting_size;
2410
2411 if (sr_size > 0) {
2412 /* and there's some left to be mapped without pmap-nesting */
2413 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2414 target_address = sr_address + sr_offset;
2415 mapping_size = sr_size;
2416 kr = vm_map_enter_mem_object(
2417 map,
2418 &target_address,
2419 mapping_size,
2420 0,
2421 VM_FLAGS_FIXED,
2422 VM_MAP_KERNEL_FLAGS_NONE,
2423 VM_KERN_MEMORY_NONE,
2424 sr_handle,
2425 sr_offset,
2426 TRUE,
2427 cur_prot,
2428 max_prot,
2429 VM_INHERIT_SHARE);
2430 if (kr != KERN_SUCCESS) {
2431 SHARED_REGION_TRACE_ERROR(
2432 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2433 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2434 (void *)VM_KERNEL_ADDRPERM(map),
2435 (void *)VM_KERNEL_ADDRPERM(task),
2436 (void *)VM_KERNEL_ADDRPERM(fsroot),
2437 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2438 (long long)target_address,
2439 (long long)mapping_size,
2440 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2441 goto done;
2442 }
2443 SHARED_REGION_TRACE_DEBUG(
2444 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2445 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2446 (void *)VM_KERNEL_ADDRPERM(map),
2447 (void *)VM_KERNEL_ADDRPERM(task),
2448 (void *)VM_KERNEL_ADDRPERM(fsroot),
2449 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2450 (long long)target_address, (long long)mapping_size,
2451 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2452 sr_offset += mapping_size;
2453 sr_size -= mapping_size;
2454 }
2455 assert(sr_size == 0);
2456
2457 done:
2458 if (kr == KERN_SUCCESS) {
2459 /* let the task use that shared region */
2460 vm_shared_region_set(task, shared_region);
2461 } else {
2462 /* drop our reference since we're not using it */
2463 vm_shared_region_deallocate(shared_region);
2464 vm_shared_region_set(task, NULL);
2465 }
2466
2467 SHARED_REGION_TRACE_DEBUG(
2468 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2469 (void *)VM_KERNEL_ADDRPERM(map),
2470 (void *)VM_KERNEL_ADDRPERM(task),
2471 (void *)VM_KERNEL_ADDRPERM(fsroot),
2472 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2473 kr));
2474 return kr;
2475 }
2476
2477 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2478 vm_shared_region_remove(
2479 task_t task,
2480 vm_shared_region_t sr)
2481 {
2482 vm_map_t map;
2483 mach_vm_offset_t start;
2484 mach_vm_size_t size;
2485 vm_tag_t tag;
2486 vm_map_kernel_flags_t vmk_flags;
2487 kern_return_t kr;
2488
2489 if (sr == NULL) {
2490 return;
2491 }
2492 map = get_task_map(task);
2493 start = sr->sr_base_address;
2494 size = sr->sr_size;
2495
2496 tag = VM_MEMORY_DYLD;
2497 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2498 vmk_flags.vmkf_overwrite_immutable = true;
2499 vmk_flags.vmkf_range_id = VM_MAP_RANGE_ID(map, tag);
2500
2501 kr = mach_vm_map_kernel(map,
2502 &start,
2503 size,
2504 0, /* mask */
2505 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
2506 vmk_flags,
2507 tag,
2508 MACH_PORT_NULL,
2509 0,
2510 FALSE, /* copy */
2511 VM_PROT_NONE,
2512 VM_PROT_NONE,
2513 VM_INHERIT_DEFAULT);
2514 if (kr != KERN_SUCCESS) {
2515 printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2516 }
2517 }
2518
2519 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2520
2521 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2522 vm_shared_region_sliding_valid(uint32_t slide)
2523 {
2524 kern_return_t kr = KERN_SUCCESS;
2525 vm_shared_region_t sr = vm_shared_region_get(current_task());
2526
2527 /* No region yet? we're fine. */
2528 if (sr == NULL) {
2529 return kr;
2530 }
2531
2532 if (sr->sr_slide != 0 && slide != 0) {
2533 if (slide == sr->sr_slide) {
2534 /*
2535 * Request for sliding when we've
2536 * already done it with exactly the
2537 * same slide value before.
2538 * This isn't wrong technically but
2539 * we don't want to slide again and
2540 * so we return this value.
2541 */
2542 kr = KERN_INVALID_ARGUMENT;
2543 } else {
2544 printf("Mismatched shared region slide\n");
2545 kr = KERN_FAILURE;
2546 }
2547 }
2548 vm_shared_region_deallocate(sr);
2549 return kr;
2550 }
2551
2552 /*
2553 * Actually create (really overwrite) the mapping to part of the shared cache which
2554 * undergoes relocation. This routine reads in the relocation info from dyld and
2555 * verifies it. It then creates a (or finds a matching) shared region pager which
2556 * handles the actual modification of the page contents and installs the mapping
2557 * using that pager.
2558 */
2559 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2560 vm_shared_region_slide_mapping(
2561 vm_shared_region_t sr,
2562 user_addr_t slide_info_addr,
2563 mach_vm_size_t slide_info_size,
2564 mach_vm_offset_t start,
2565 mach_vm_size_t size,
2566 mach_vm_offset_t slid_mapping,
2567 uint32_t slide,
2568 memory_object_control_t sr_file_control,
2569 vm_prot_t prot)
2570 {
2571 kern_return_t kr;
2572 vm_object_t object = VM_OBJECT_NULL;
2573 vm_shared_region_slide_info_t si = NULL;
2574 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2575 struct vm_map_entry tmp_entry_store;
2576 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2577 vm_map_t sr_map;
2578 int vm_flags;
2579 vm_map_kernel_flags_t vmk_flags;
2580 vm_map_offset_t map_addr;
2581 void *slide_info_entry = NULL;
2582 int error;
2583
2584 assert(sr->sr_slide_in_progress);
2585
2586 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2587 return KERN_INVALID_ARGUMENT;
2588 }
2589
2590 /*
2591 * Copy in and verify the relocation information.
2592 */
2593 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2594 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2595 return KERN_FAILURE;
2596 }
2597 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2598 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2599 return KERN_FAILURE;
2600 }
2601
2602 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2603 if (slide_info_entry == NULL) {
2604 return KERN_RESOURCE_SHORTAGE;
2605 }
2606 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2607 if (error) {
2608 printf("copyin of slide_info failed\n");
2609 kr = KERN_INVALID_ADDRESS;
2610 goto done;
2611 }
2612
2613 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2614 printf("Sanity Check failed for slide_info\n");
2615 goto done;
2616 }
2617
2618 /*
2619 * Allocate and fill in a vm_shared_region_slide_info.
2620 * This will either be used by a new pager, or used to find
2621 * a pre-existing matching pager.
2622 */
2623 object = memory_object_control_to_vm_object(sr_file_control);
2624 if (object == VM_OBJECT_NULL || object->internal) {
2625 object = VM_OBJECT_NULL;
2626 kr = KERN_INVALID_ADDRESS;
2627 goto done;
2628 }
2629
2630 si = kalloc_type(struct vm_shared_region_slide_info,
2631 Z_WAITOK | Z_NOFAIL);
2632 vm_object_lock(object);
2633
2634 vm_object_reference_locked(object); /* for si->slide_object */
2635 object->object_is_shared_cache = TRUE;
2636 vm_object_unlock(object);
2637
2638 si->si_slide_info_entry = slide_info_entry;
2639 si->si_slide_info_size = slide_info_size;
2640
2641 assert(slid_mapping != (mach_vm_offset_t) -1);
2642 si->si_slid_address = slid_mapping + sr->sr_base_address;
2643 si->si_slide_object = object;
2644 si->si_start = start;
2645 si->si_end = si->si_start + size;
2646 si->si_slide = slide;
2647 #if __has_feature(ptrauth_calls)
2648 /*
2649 * If there is authenticated pointer data in this slid mapping,
2650 * then just add the information needed to create new pagers for
2651 * different shared_region_id's later.
2652 */
2653 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2654 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2655 !(prot & VM_PROT_NOAUTH)) {
2656 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2657 printf("Too many auth/private sections for shared region!!\n");
2658 kr = KERN_INVALID_ARGUMENT;
2659 goto done;
2660 }
2661 si->si_ptrauth = TRUE;
2662 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2663 /*
2664 * Remember the shared region, since that's where we'll
2665 * stash this info for all auth pagers to share. Each pager
2666 * will need to take a reference to it.
2667 */
2668 si->si_shared_region = sr;
2669 kr = KERN_SUCCESS;
2670 goto done;
2671 }
2672 si->si_shared_region = NULL;
2673 si->si_ptrauth = FALSE;
2674 #else /* __has_feature(ptrauth_calls) */
2675 (void)prot; /* silence unused warning */
2676 #endif /* __has_feature(ptrauth_calls) */
2677
2678 /*
2679 * find the pre-existing shared region's map entry to slide
2680 */
2681 sr_map = vm_shared_region_vm_map(sr);
2682 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2683 if (kr != KERN_SUCCESS) {
2684 goto done;
2685 }
2686 tmp_entry = &tmp_entry_store;
2687
2688 /*
2689 * The object must exactly cover the region to slide.
2690 */
2691 assert(VME_OFFSET(tmp_entry) == start);
2692 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2693
2694 /* create a "shared_region" sliding pager */
2695 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2696 if (sr_pager == MEMORY_OBJECT_NULL) {
2697 kr = KERN_RESOURCE_SHORTAGE;
2698 goto done;
2699 }
2700
2701 #if CONFIG_SECLUDED_MEMORY
2702 /*
2703 * The shared region pagers used by camera or DEXT should have
2704 * pagers that won't go on the secluded queue.
2705 */
2706 if (primary_system_shared_region == NULL ||
2707 primary_system_shared_region == sr ||
2708 sr->sr_driverkit) {
2709 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2710 }
2711 #endif /* CONFIG_SECLUDED_MEMORY */
2712
2713 /* map that pager over the portion of the mapping that needs sliding */
2714 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
2715 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2716 vmk_flags.vmkf_overwrite_immutable = TRUE;
2717 map_addr = tmp_entry->vme_start;
2718 vmk_flags.vmkf_permanent = shared_region_make_permanent(sr,
2719 tmp_entry->max_protection);
2720 kr = vm_map_enter_mem_object(sr_map,
2721 &map_addr,
2722 (tmp_entry->vme_end - tmp_entry->vme_start),
2723 (mach_vm_offset_t) 0,
2724 vm_flags,
2725 vmk_flags,
2726 VM_KERN_MEMORY_NONE,
2727 (ipc_port_t)(uintptr_t) sr_pager,
2728 0,
2729 TRUE,
2730 tmp_entry->protection,
2731 tmp_entry->max_protection,
2732 tmp_entry->inheritance);
2733 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2734 assertf(map_addr == tmp_entry->vme_start,
2735 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2736 (uint64_t)map_addr,
2737 (uint64_t) tmp_entry->vme_start,
2738 tmp_entry);
2739
2740 /* success! */
2741 kr = KERN_SUCCESS;
2742
2743 done:
2744 if (sr_pager != NULL) {
2745 /*
2746 * Release the sr_pager reference obtained by shared_region_pager_setup().
2747 * The mapping, if it succeeded, is now holding a reference on the memory object.
2748 */
2749 memory_object_deallocate(sr_pager);
2750 sr_pager = MEMORY_OBJECT_NULL;
2751 }
2752 if (tmp_entry != NULL) {
2753 /* release extra ref on tmp_entry's VM object */
2754 vm_object_deallocate(VME_OBJECT(tmp_entry));
2755 tmp_entry = VM_MAP_ENTRY_NULL;
2756 }
2757
2758 if (kr != KERN_SUCCESS) {
2759 /* cleanup */
2760 if (si != NULL) {
2761 if (si->si_slide_object) {
2762 vm_object_deallocate(si->si_slide_object);
2763 si->si_slide_object = VM_OBJECT_NULL;
2764 }
2765 kfree_type(struct vm_shared_region_slide_info, si);
2766 si = NULL;
2767 }
2768 if (slide_info_entry != NULL) {
2769 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2770 slide_info_entry = NULL;
2771 }
2772 }
2773 return kr;
2774 }
2775
2776 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2777 vm_shared_region_slide_sanity_check_v1(
2778 vm_shared_region_slide_info_entry_v1_t s_info)
2779 {
2780 uint32_t pageIndex = 0;
2781 uint16_t entryIndex = 0;
2782 uint16_t *toc = NULL;
2783
2784 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2785 for (; pageIndex < s_info->toc_count; pageIndex++) {
2786 entryIndex = (uint16_t)(toc[pageIndex]);
2787
2788 if (entryIndex >= s_info->entry_count) {
2789 printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2790 return KERN_FAILURE;
2791 }
2792 }
2793 return KERN_SUCCESS;
2794 }
2795
2796 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2797 vm_shared_region_slide_sanity_check_v2(
2798 vm_shared_region_slide_info_entry_v2_t s_info,
2799 mach_vm_size_t slide_info_size)
2800 {
2801 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2802 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2803 return KERN_FAILURE;
2804 }
2805 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2806 return KERN_FAILURE;
2807 }
2808
2809 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2810
2811 uint32_t page_starts_count = s_info->page_starts_count;
2812 uint32_t page_extras_count = s_info->page_extras_count;
2813 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2814 if (num_trailing_entries < page_starts_count) {
2815 return KERN_FAILURE;
2816 }
2817
2818 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2819 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2820 if (trailing_size >> 1 != num_trailing_entries) {
2821 return KERN_FAILURE;
2822 }
2823
2824 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2825 if (required_size < sizeof(*s_info)) {
2826 return KERN_FAILURE;
2827 }
2828
2829 if (required_size > slide_info_size) {
2830 return KERN_FAILURE;
2831 }
2832
2833 return KERN_SUCCESS;
2834 }
2835
2836 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2837 vm_shared_region_slide_sanity_check_v3(
2838 vm_shared_region_slide_info_entry_v3_t s_info,
2839 mach_vm_size_t slide_info_size)
2840 {
2841 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2842 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2843 return KERN_FAILURE;
2844 }
2845 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2846 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2847 return KERN_FAILURE;
2848 }
2849
2850 uint32_t page_starts_count = s_info->page_starts_count;
2851 mach_vm_size_t num_trailing_entries = page_starts_count;
2852 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2853 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2854 if (required_size < sizeof(*s_info)) {
2855 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2856 return KERN_FAILURE;
2857 }
2858
2859 if (required_size > slide_info_size) {
2860 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2861 return KERN_FAILURE;
2862 }
2863
2864 return KERN_SUCCESS;
2865 }
2866
2867 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2868 vm_shared_region_slide_sanity_check_v4(
2869 vm_shared_region_slide_info_entry_v4_t s_info,
2870 mach_vm_size_t slide_info_size)
2871 {
2872 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2873 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2874 return KERN_FAILURE;
2875 }
2876 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2877 return KERN_FAILURE;
2878 }
2879
2880 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2881
2882 uint32_t page_starts_count = s_info->page_starts_count;
2883 uint32_t page_extras_count = s_info->page_extras_count;
2884 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2885 if (num_trailing_entries < page_starts_count) {
2886 return KERN_FAILURE;
2887 }
2888
2889 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2890 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2891 if (trailing_size >> 1 != num_trailing_entries) {
2892 return KERN_FAILURE;
2893 }
2894
2895 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2896 if (required_size < sizeof(*s_info)) {
2897 return KERN_FAILURE;
2898 }
2899
2900 if (required_size > slide_info_size) {
2901 return KERN_FAILURE;
2902 }
2903
2904 return KERN_SUCCESS;
2905 }
2906
2907
2908 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2909 vm_shared_region_slide_sanity_check(
2910 vm_shared_region_slide_info_entry_t s_info,
2911 mach_vm_size_t s_info_size)
2912 {
2913 kern_return_t kr;
2914
2915 switch (s_info->version) {
2916 case 1:
2917 kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2918 break;
2919 case 2:
2920 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2921 break;
2922 case 3:
2923 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2924 break;
2925 case 4:
2926 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2927 break;
2928 default:
2929 kr = KERN_FAILURE;
2930 }
2931 return kr;
2932 }
2933
2934 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2935 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2936 {
2937 uint16_t *toc = NULL;
2938 slide_info_entry_toc_t bitmap = NULL;
2939 uint32_t i = 0, j = 0;
2940 uint8_t b = 0;
2941 uint32_t slide = si->si_slide;
2942 int is_64 = task_has_64Bit_addr(current_task());
2943
2944 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2945 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2946
2947 if (pageIndex >= s_info->toc_count) {
2948 printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2949 } else {
2950 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
2951 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2952
2953 if (entryIndex >= s_info->entry_count) {
2954 printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
2955 } else {
2956 bitmap = &slide_info_entries[entryIndex];
2957
2958 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
2959 b = bitmap->entry[i];
2960 if (b != 0) {
2961 for (j = 0; j < 8; ++j) {
2962 if (b & (1 << j)) {
2963 uint32_t *ptr_to_slide;
2964 uint32_t old_value;
2965
2966 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
2967 old_value = *ptr_to_slide;
2968 *ptr_to_slide += slide;
2969 if (is_64 && *ptr_to_slide < old_value) {
2970 /*
2971 * We just slid the low 32 bits of a 64-bit pointer
2972 * and it looks like there should have been a carry-over
2973 * to the upper 32 bits.
2974 * The sliding failed...
2975 */
2976 printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
2977 i, j, b, slide, old_value, *ptr_to_slide);
2978 return KERN_FAILURE;
2979 }
2980 }
2981 }
2982 }
2983 }
2984 }
2985 }
2986
2987 return KERN_SUCCESS;
2988 }
2989
2990 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2991 rebase_chain_32(
2992 uint8_t *page_content,
2993 uint16_t start_offset,
2994 uint32_t slide_amount,
2995 vm_shared_region_slide_info_entry_v2_t s_info)
2996 {
2997 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
2998
2999 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3000 const uint32_t value_mask = ~delta_mask;
3001 const uint32_t value_add = (uint32_t)(s_info->value_add);
3002 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3003
3004 uint32_t page_offset = start_offset;
3005 uint32_t delta = 1;
3006
3007 while (delta != 0 && page_offset <= last_page_offset) {
3008 uint8_t *loc;
3009 uint32_t value;
3010
3011 loc = page_content + page_offset;
3012 memcpy(&value, loc, sizeof(value));
3013 delta = (value & delta_mask) >> delta_shift;
3014 value &= value_mask;
3015
3016 if (value != 0) {
3017 value += value_add;
3018 value += slide_amount;
3019 }
3020 memcpy(loc, &value, sizeof(value));
3021 page_offset += delta;
3022 }
3023
3024 /* If the offset went past the end of the page, then the slide data is invalid. */
3025 if (page_offset > last_page_offset) {
3026 return KERN_FAILURE;
3027 }
3028 return KERN_SUCCESS;
3029 }
3030
3031 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3032 rebase_chain_64(
3033 uint8_t *page_content,
3034 uint16_t start_offset,
3035 uint32_t slide_amount,
3036 vm_shared_region_slide_info_entry_v2_t s_info)
3037 {
3038 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3039
3040 const uint64_t delta_mask = s_info->delta_mask;
3041 const uint64_t value_mask = ~delta_mask;
3042 const uint64_t value_add = s_info->value_add;
3043 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3044
3045 uint32_t page_offset = start_offset;
3046 uint32_t delta = 1;
3047
3048 while (delta != 0 && page_offset <= last_page_offset) {
3049 uint8_t *loc;
3050 uint64_t value;
3051
3052 loc = page_content + page_offset;
3053 memcpy(&value, loc, sizeof(value));
3054 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3055 value &= value_mask;
3056
3057 if (value != 0) {
3058 value += value_add;
3059 value += slide_amount;
3060 }
3061 memcpy(loc, &value, sizeof(value));
3062 page_offset += delta;
3063 }
3064
3065 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3066 /* If a pointer straddling the page boundary needs to be adjusted, then
3067 * add the slide to the lower half. The encoding guarantees that the upper
3068 * half on the next page will need no masking.
3069 *
3070 * This assumes a little-endian machine and that the region being slid
3071 * never crosses a 4 GB boundary. */
3072
3073 uint8_t *loc = page_content + page_offset;
3074 uint32_t value;
3075
3076 memcpy(&value, loc, sizeof(value));
3077 value += slide_amount;
3078 memcpy(loc, &value, sizeof(value));
3079 } else if (page_offset > last_page_offset) {
3080 return KERN_FAILURE;
3081 }
3082
3083 return KERN_SUCCESS;
3084 }
3085
3086 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3087 rebase_chain(
3088 boolean_t is_64,
3089 uint32_t pageIndex,
3090 uint8_t *page_content,
3091 uint16_t start_offset,
3092 uint32_t slide_amount,
3093 vm_shared_region_slide_info_entry_v2_t s_info)
3094 {
3095 kern_return_t kr;
3096 if (is_64) {
3097 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3098 } else {
3099 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3100 }
3101
3102 if (kr != KERN_SUCCESS) {
3103 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3104 pageIndex, start_offset, slide_amount);
3105 }
3106 return kr;
3107 }
3108
3109 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3110 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3111 {
3112 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3113 const uint32_t slide_amount = si->si_slide;
3114
3115 /* The high bits of the delta_mask field are nonzero precisely when the shared
3116 * cache is 64-bit. */
3117 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3118
3119 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3120 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3121
3122 uint8_t *page_content = (uint8_t *)vaddr;
3123 uint16_t page_entry;
3124
3125 if (pageIndex >= s_info->page_starts_count) {
3126 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3127 pageIndex, s_info->page_starts_count);
3128 return KERN_FAILURE;
3129 }
3130 page_entry = page_starts[pageIndex];
3131
3132 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3133 return KERN_SUCCESS;
3134 }
3135
3136 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3137 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3138 uint16_t info;
3139
3140 do {
3141 uint16_t page_start_offset;
3142 kern_return_t kr;
3143
3144 if (chain_index >= s_info->page_extras_count) {
3145 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3146 chain_index, s_info->page_extras_count);
3147 return KERN_FAILURE;
3148 }
3149 info = page_extras[chain_index];
3150 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3151
3152 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3153 if (kr != KERN_SUCCESS) {
3154 return KERN_FAILURE;
3155 }
3156
3157 chain_index++;
3158 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3159 } else {
3160 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3161 kern_return_t kr;
3162
3163 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3164 if (kr != KERN_SUCCESS) {
3165 return KERN_FAILURE;
3166 }
3167 }
3168
3169 return KERN_SUCCESS;
3170 }
3171
3172
3173 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3174 vm_shared_region_slide_page_v3(
3175 vm_shared_region_slide_info_t si,
3176 vm_offset_t vaddr,
3177 __unused mach_vm_offset_t uservaddr,
3178 uint32_t pageIndex,
3179 #if !__has_feature(ptrauth_calls)
3180 __unused
3181 #endif /* !__has_feature(ptrauth_calls) */
3182 uint64_t jop_key)
3183 {
3184 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3185 const uint32_t slide_amount = si->si_slide;
3186
3187 uint8_t *page_content = (uint8_t *)vaddr;
3188 uint16_t page_entry;
3189
3190 if (pageIndex >= s_info->page_starts_count) {
3191 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3192 pageIndex, s_info->page_starts_count);
3193 return KERN_FAILURE;
3194 }
3195 page_entry = s_info->page_starts[pageIndex];
3196
3197 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3198 return KERN_SUCCESS;
3199 }
3200
3201 uint8_t* rebaseLocation = page_content;
3202 uint64_t delta = page_entry;
3203 do {
3204 rebaseLocation += delta;
3205 uint64_t value;
3206 memcpy(&value, rebaseLocation, sizeof(value));
3207 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3208
3209 // A pointer is one of :
3210 // {
3211 // uint64_t pointerValue : 51;
3212 // uint64_t offsetToNextPointer : 11;
3213 // uint64_t isBind : 1 = 0;
3214 // uint64_t authenticated : 1 = 0;
3215 // }
3216 // {
3217 // uint32_t offsetFromSharedCacheBase;
3218 // uint16_t diversityData;
3219 // uint16_t hasAddressDiversity : 1;
3220 // uint16_t hasDKey : 1;
3221 // uint16_t hasBKey : 1;
3222 // uint16_t offsetToNextPointer : 11;
3223 // uint16_t isBind : 1;
3224 // uint16_t authenticated : 1 = 1;
3225 // }
3226
3227 bool isBind = (value & (1ULL << 62)) == 1;
3228 if (isBind) {
3229 return KERN_FAILURE;
3230 }
3231
3232 #if __has_feature(ptrauth_calls)
3233 uint16_t diversity_data = (uint16_t)(value >> 32);
3234 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3235 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3236 #endif /* __has_feature(ptrauth_calls) */
3237 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3238
3239 if (isAuthenticated) {
3240 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3241 value = (value & 0xFFFFFFFF) + slide_amount;
3242 // Add in the offset from the mach_header
3243 const uint64_t value_add = s_info->value_add;
3244 value += value_add;
3245
3246 #if __has_feature(ptrauth_calls)
3247 uint64_t discriminator = diversity_data;
3248 if (hasAddressDiversity) {
3249 // First calculate a new discriminator using the address of where we are trying to store the value
3250 uintptr_t pageOffset = rebaseLocation - page_content;
3251 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3252 }
3253
3254 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3255 /*
3256 * these pointers are used in user mode. disable the kernel key diversification
3257 * so we can sign them for use in user mode.
3258 */
3259 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3260 }
3261 #endif /* __has_feature(ptrauth_calls) */
3262 } else {
3263 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3264 // Regular pointer which needs to fit in 51-bits of value.
3265 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3266 // and the bottom 43-bits to be fit in to 51-bits.
3267 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3268 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3269 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3270 value = targetValue + slide_amount;
3271 }
3272
3273 memcpy(rebaseLocation, &value, sizeof(value));
3274 } while (delta != 0);
3275
3276 return KERN_SUCCESS;
3277 }
3278
3279 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3280 rebase_chainv4(
3281 uint8_t *page_content,
3282 uint16_t start_offset,
3283 uint32_t slide_amount,
3284 vm_shared_region_slide_info_entry_v4_t s_info)
3285 {
3286 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3287
3288 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3289 const uint32_t value_mask = ~delta_mask;
3290 const uint32_t value_add = (uint32_t)(s_info->value_add);
3291 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3292
3293 uint32_t page_offset = start_offset;
3294 uint32_t delta = 1;
3295
3296 while (delta != 0 && page_offset <= last_page_offset) {
3297 uint8_t *loc;
3298 uint32_t value;
3299
3300 loc = page_content + page_offset;
3301 memcpy(&value, loc, sizeof(value));
3302 delta = (value & delta_mask) >> delta_shift;
3303 value &= value_mask;
3304
3305 if ((value & 0xFFFF8000) == 0) {
3306 // small positive non-pointer, use as-is
3307 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3308 // small negative non-pointer
3309 value |= 0xC0000000;
3310 } else {
3311 // pointer that needs rebasing
3312 value += value_add;
3313 value += slide_amount;
3314 }
3315 memcpy(loc, &value, sizeof(value));
3316 page_offset += delta;
3317 }
3318
3319 /* If the offset went past the end of the page, then the slide data is invalid. */
3320 if (page_offset > last_page_offset) {
3321 return KERN_FAILURE;
3322 }
3323 return KERN_SUCCESS;
3324 }
3325
3326 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3327 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3328 {
3329 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3330 const uint32_t slide_amount = si->si_slide;
3331
3332 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3333 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3334
3335 uint8_t *page_content = (uint8_t *)vaddr;
3336 uint16_t page_entry;
3337
3338 if (pageIndex >= s_info->page_starts_count) {
3339 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3340 pageIndex, s_info->page_starts_count);
3341 return KERN_FAILURE;
3342 }
3343 page_entry = page_starts[pageIndex];
3344
3345 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3346 return KERN_SUCCESS;
3347 }
3348
3349 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3350 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3351 uint16_t info;
3352
3353 do {
3354 uint16_t page_start_offset;
3355 kern_return_t kr;
3356
3357 if (chain_index >= s_info->page_extras_count) {
3358 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3359 chain_index, s_info->page_extras_count);
3360 return KERN_FAILURE;
3361 }
3362 info = page_extras[chain_index];
3363 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3364
3365 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3366 if (kr != KERN_SUCCESS) {
3367 return KERN_FAILURE;
3368 }
3369
3370 chain_index++;
3371 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3372 } else {
3373 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3374 kern_return_t kr;
3375
3376 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3377 if (kr != KERN_SUCCESS) {
3378 return KERN_FAILURE;
3379 }
3380 }
3381
3382 return KERN_SUCCESS;
3383 }
3384
3385
3386
3387 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3388 vm_shared_region_slide_page(
3389 vm_shared_region_slide_info_t si,
3390 vm_offset_t vaddr,
3391 mach_vm_offset_t uservaddr,
3392 uint32_t pageIndex,
3393 uint64_t jop_key)
3394 {
3395 switch (si->si_slide_info_entry->version) {
3396 case 1:
3397 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3398 case 2:
3399 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3400 case 3:
3401 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3402 case 4:
3403 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3404 default:
3405 return KERN_FAILURE;
3406 }
3407 }
3408
3409 /******************************************************************************/
3410 /* Comm page support */
3411 /******************************************************************************/
3412
3413 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3414 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3415 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3416 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3417 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3418 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3419
3420 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3421 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3422 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3423 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3424 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3425 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3426
3427 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3428 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3429
3430 #if defined(__i386__) || defined(__x86_64__)
3431 /*
3432 * Create a memory entry, VM submap and pmap for one commpage.
3433 */
3434 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3435 _vm_commpage_init(
3436 ipc_port_t *handlep,
3437 vm_map_size_t size)
3438 {
3439 vm_named_entry_t mem_entry;
3440 vm_map_t new_map;
3441
3442 SHARED_REGION_TRACE_DEBUG(
3443 ("commpage: -> _init(0x%llx)\n",
3444 (long long)size));
3445
3446 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3447 if (new_pmap == NULL) {
3448 panic("_vm_commpage_init: could not allocate pmap");
3449 }
3450 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3451
3452 mem_entry = mach_memory_entry_allocate(handlep);
3453 mem_entry->backing.map = new_map;
3454 mem_entry->internal = TRUE;
3455 mem_entry->is_sub_map = TRUE;
3456 mem_entry->offset = 0;
3457 mem_entry->protection = VM_PROT_ALL;
3458 mem_entry->size = size;
3459
3460 SHARED_REGION_TRACE_DEBUG(
3461 ("commpage: _init(0x%llx) <- %p\n",
3462 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3463 }
3464 #endif
3465
3466
3467 /*
3468 * Initialize the comm text pages at boot time
3469 */
3470 void
vm_commpage_text_init(void)3471 vm_commpage_text_init(void)
3472 {
3473 SHARED_REGION_TRACE_DEBUG(
3474 ("commpage text: ->init()\n"));
3475 #if defined(__i386__) || defined(__x86_64__)
3476 /* create the 32 bit comm text page */
3477 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3478 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3479 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3480 commpage_text32_map = commpage_text32_entry->backing.map;
3481 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3482 /* XXX if (cpu_is_64bit_capable()) ? */
3483 /* create the 64-bit comm page */
3484 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3485 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3486 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3487 commpage_text64_map = commpage_text64_entry->backing.map;
3488 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3489 #endif
3490
3491 commpage_text_populate();
3492
3493 /* populate the routines in here */
3494 SHARED_REGION_TRACE_DEBUG(
3495 ("commpage text: init() <-\n"));
3496 }
3497
3498 /*
3499 * Initialize the comm pages at boot time.
3500 */
3501 void
vm_commpage_init(void)3502 vm_commpage_init(void)
3503 {
3504 SHARED_REGION_TRACE_DEBUG(
3505 ("commpage: -> init()\n"));
3506
3507 #if defined(__i386__) || defined(__x86_64__)
3508 /* create the 32-bit comm page */
3509 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3510 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3511 commpage32_map = commpage32_entry->backing.map;
3512
3513 /* XXX if (cpu_is_64bit_capable()) ? */
3514 /* create the 64-bit comm page */
3515 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3516 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3517 commpage64_map = commpage64_entry->backing.map;
3518
3519 #endif /* __i386__ || __x86_64__ */
3520
3521 /* populate them according to this specific platform */
3522 commpage_populate();
3523 __commpage_setup = 1;
3524 #if XNU_TARGET_OS_OSX
3525 if (__system_power_source == 0) {
3526 post_sys_powersource_internal(0, 1);
3527 }
3528 #endif /* XNU_TARGET_OS_OSX */
3529
3530 SHARED_REGION_TRACE_DEBUG(
3531 ("commpage: init() <-\n"));
3532 }
3533
3534 /*
3535 * Enter the appropriate comm page into the task's address space.
3536 * This is called at exec() time via vm_map_exec().
3537 */
3538 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3539 vm_commpage_enter(
3540 vm_map_t map,
3541 task_t task,
3542 boolean_t is64bit)
3543 {
3544 #if defined(__arm64__)
3545 #pragma unused(is64bit)
3546 (void)task;
3547 (void)map;
3548 pmap_insert_sharedpage(vm_map_pmap(map));
3549 return KERN_SUCCESS;
3550 #else
3551 ipc_port_t commpage_handle, commpage_text_handle;
3552 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3553 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3554 int vm_flags;
3555 vm_map_kernel_flags_t vmk_flags;
3556 kern_return_t kr;
3557
3558 SHARED_REGION_TRACE_DEBUG(
3559 ("commpage: -> enter(%p,%p)\n",
3560 (void *)VM_KERNEL_ADDRPERM(map),
3561 (void *)VM_KERNEL_ADDRPERM(task)));
3562
3563 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3564 /* the comm page is likely to be beyond the actual end of the VM map */
3565 vm_flags = VM_FLAGS_FIXED;
3566 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3567 vmk_flags.vmkf_beyond_max = TRUE;
3568
3569 /* select the appropriate comm page for this task */
3570 assert(!(is64bit ^ vm_map_is_64bit(map)));
3571 if (is64bit) {
3572 commpage_handle = commpage64_handle;
3573 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3574 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3575 objc_size = _COMM_PAGE64_OBJC_SIZE;
3576 objc_address = _COMM_PAGE64_OBJC_BASE;
3577 commpage_text_handle = commpage_text64_handle;
3578 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3579 } else {
3580 commpage_handle = commpage32_handle;
3581 commpage_address =
3582 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3583 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3584 objc_size = _COMM_PAGE32_OBJC_SIZE;
3585 objc_address = _COMM_PAGE32_OBJC_BASE;
3586 commpage_text_handle = commpage_text32_handle;
3587 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3588 }
3589
3590 vm_tag_t tag = VM_KERN_MEMORY_NONE;
3591 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3592 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3593 /* the commpage is properly aligned or sized for pmap-nesting */
3594 tag = VM_MEMORY_SHARED_PMAP;
3595 vmk_flags.vmkf_nested_pmap = TRUE;
3596 }
3597 /* map the comm page in the task's address space */
3598 assert(commpage_handle != IPC_PORT_NULL);
3599 kr = vm_map_enter_mem_object(
3600 map,
3601 &commpage_address,
3602 commpage_size,
3603 0,
3604 vm_flags,
3605 vmk_flags,
3606 tag,
3607 commpage_handle,
3608 0,
3609 FALSE,
3610 VM_PROT_READ,
3611 VM_PROT_READ,
3612 VM_INHERIT_SHARE);
3613 if (kr != KERN_SUCCESS) {
3614 SHARED_REGION_TRACE_ERROR(
3615 ("commpage: enter(%p,0x%llx,0x%llx) "
3616 "commpage %p mapping failed 0x%x\n",
3617 (void *)VM_KERNEL_ADDRPERM(map),
3618 (long long)commpage_address,
3619 (long long)commpage_size,
3620 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3621 }
3622
3623 /* map the comm text page in the task's address space */
3624 assert(commpage_text_handle != IPC_PORT_NULL);
3625 kr = vm_map_enter_mem_object(
3626 map,
3627 &commpage_text_address,
3628 commpage_text_size,
3629 0,
3630 vm_flags,
3631 vmk_flags,
3632 tag,
3633 commpage_text_handle,
3634 0,
3635 FALSE,
3636 VM_PROT_READ | VM_PROT_EXECUTE,
3637 VM_PROT_READ | VM_PROT_EXECUTE,
3638 VM_INHERIT_SHARE);
3639 if (kr != KERN_SUCCESS) {
3640 SHARED_REGION_TRACE_ERROR(
3641 ("commpage text: enter(%p,0x%llx,0x%llx) "
3642 "commpage text %p mapping failed 0x%x\n",
3643 (void *)VM_KERNEL_ADDRPERM(map),
3644 (long long)commpage_text_address,
3645 (long long)commpage_text_size,
3646 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3647 }
3648
3649 /*
3650 * Since we're here, we also pre-allocate some virtual space for the
3651 * Objective-C run-time, if needed...
3652 */
3653 if (objc_size != 0) {
3654 kr = vm_map_enter_mem_object(
3655 map,
3656 &objc_address,
3657 objc_size,
3658 0,
3659 VM_FLAGS_FIXED,
3660 vmk_flags,
3661 tag,
3662 IPC_PORT_NULL,
3663 0,
3664 FALSE,
3665 VM_PROT_ALL,
3666 VM_PROT_ALL,
3667 VM_INHERIT_DEFAULT);
3668 if (kr != KERN_SUCCESS) {
3669 SHARED_REGION_TRACE_ERROR(
3670 ("commpage: enter(%p,0x%llx,0x%llx) "
3671 "objc mapping failed 0x%x\n",
3672 (void *)VM_KERNEL_ADDRPERM(map),
3673 (long long)objc_address,
3674 (long long)objc_size, kr));
3675 }
3676 }
3677
3678 SHARED_REGION_TRACE_DEBUG(
3679 ("commpage: enter(%p,%p) <- 0x%x\n",
3680 (void *)VM_KERNEL_ADDRPERM(map),
3681 (void *)VM_KERNEL_ADDRPERM(task), kr));
3682 return kr;
3683 #endif
3684 }
3685
3686 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3687 vm_shared_region_slide(
3688 uint32_t slide,
3689 mach_vm_offset_t entry_start_address,
3690 mach_vm_size_t entry_size,
3691 mach_vm_offset_t slide_start,
3692 mach_vm_size_t slide_size,
3693 mach_vm_offset_t slid_mapping,
3694 memory_object_control_t sr_file_control,
3695 vm_prot_t prot)
3696 {
3697 vm_shared_region_t sr;
3698 kern_return_t error;
3699
3700 SHARED_REGION_TRACE_DEBUG(
3701 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3702 slide, entry_start_address, entry_size, slide_start, slide_size));
3703
3704 sr = vm_shared_region_get(current_task());
3705 if (sr == NULL) {
3706 printf("%s: no shared region?\n", __FUNCTION__);
3707 SHARED_REGION_TRACE_DEBUG(
3708 ("vm_shared_region_slide: <- %d (no shared region)\n",
3709 KERN_FAILURE));
3710 return KERN_FAILURE;
3711 }
3712
3713 /*
3714 * Protect from concurrent access.
3715 */
3716 vm_shared_region_lock();
3717 while (sr->sr_slide_in_progress) {
3718 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3719 }
3720
3721 sr->sr_slide_in_progress = TRUE;
3722 vm_shared_region_unlock();
3723
3724 error = vm_shared_region_slide_mapping(sr,
3725 (user_addr_t)slide_start,
3726 slide_size,
3727 entry_start_address,
3728 entry_size,
3729 slid_mapping,
3730 slide,
3731 sr_file_control,
3732 prot);
3733 if (error) {
3734 printf("slide_info initialization failed with kr=%d\n", error);
3735 }
3736
3737 vm_shared_region_lock();
3738
3739 assert(sr->sr_slide_in_progress);
3740 sr->sr_slide_in_progress = FALSE;
3741 thread_wakeup(&sr->sr_slide_in_progress);
3742
3743 #if XNU_TARGET_OS_OSX
3744 if (error == KERN_SUCCESS) {
3745 shared_region_completed_slide = TRUE;
3746 }
3747 #endif /* XNU_TARGET_OS_OSX */
3748 vm_shared_region_unlock();
3749
3750 vm_shared_region_deallocate(sr);
3751
3752 SHARED_REGION_TRACE_DEBUG(
3753 ("vm_shared_region_slide: <- %d\n",
3754 error));
3755
3756 return error;
3757 }
3758
3759 /*
3760 * Used during Authenticated Root Volume macOS boot.
3761 * Launchd re-execs itself and wants the new launchd to use
3762 * the shared cache from the new root volume. This call
3763 * makes all the existing shared caches stale to allow
3764 * that to happen.
3765 */
3766 void
vm_shared_region_pivot(void)3767 vm_shared_region_pivot(void)
3768 {
3769 vm_shared_region_t shared_region = NULL;
3770
3771 vm_shared_region_lock();
3772
3773 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3774 assert(shared_region->sr_ref_count > 0);
3775 shared_region->sr_stale = TRUE;
3776 if (shared_region->sr_timer_call) {
3777 /*
3778 * We have a shared region ready to be destroyed
3779 * and just waiting for a delayed timer to fire.
3780 * Marking it stale cements its ineligibility to
3781 * be used ever again. So let's shorten the timer
3782 * aggressively down to 10 milliseconds and get rid of it.
3783 * This is a single quantum and we don't need to go
3784 * shorter than this duration. We want it to be short
3785 * enough, however, because we could have an unmount
3786 * of the volume hosting this shared region just behind
3787 * us.
3788 */
3789 uint64_t deadline;
3790 assert(shared_region->sr_ref_count == 1);
3791
3792 /*
3793 * Free the old timer call. Returns with a reference held.
3794 * If the old timer has fired and is waiting for the vm_shared_region_lock
3795 * lock, we will just return with an additional ref_count i.e. 2.
3796 * The old timer will then fire and just drop the ref count down to 1
3797 * with no other modifications.
3798 */
3799 vm_shared_region_reference_locked(shared_region);
3800
3801 /* set up the timer. Keep the reference from above for this timer.*/
3802 shared_region->sr_timer_call = thread_call_allocate(
3803 (thread_call_func_t) vm_shared_region_timeout,
3804 (thread_call_param_t) shared_region);
3805
3806 /* schedule the timer */
3807 clock_interval_to_deadline(10, /* 10 milliseconds */
3808 NSEC_PER_MSEC,
3809 &deadline);
3810 thread_call_enter_delayed(shared_region->sr_timer_call,
3811 deadline);
3812
3813 SHARED_REGION_TRACE_DEBUG(
3814 ("shared_region: pivot(%p): armed timer\n",
3815 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3816 }
3817 }
3818
3819 vm_shared_region_unlock();
3820 }
3821
3822 /*
3823 * Routine to mark any non-standard slide shared cache region as stale.
3824 * This causes the next "reslide" spawn to create a new shared region.
3825 */
3826 void
vm_shared_region_reslide_stale(boolean_t driverkit)3827 vm_shared_region_reslide_stale(boolean_t driverkit)
3828 {
3829 #if __has_feature(ptrauth_calls)
3830 vm_shared_region_t shared_region = NULL;
3831
3832 vm_shared_region_lock();
3833
3834 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3835 assert(shared_region->sr_ref_count > 0);
3836 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3837 shared_region->sr_stale = TRUE;
3838 vm_shared_region_reslide_count++;
3839 }
3840 }
3841
3842 vm_shared_region_unlock();
3843 #else
3844 (void)driverkit;
3845 #endif /* __has_feature(ptrauth_calls) */
3846 }
3847
3848 /*
3849 * report if the task is using a reslide shared cache region.
3850 */
3851 bool
vm_shared_region_is_reslide(__unused struct task * task)3852 vm_shared_region_is_reslide(__unused struct task *task)
3853 {
3854 bool is_reslide = FALSE;
3855 #if __has_feature(ptrauth_calls)
3856 vm_shared_region_t sr = vm_shared_region_get(task);
3857
3858 if (sr != NULL) {
3859 is_reslide = sr->sr_reslide;
3860 vm_shared_region_deallocate(sr);
3861 }
3862 #endif /* __has_feature(ptrauth_calls) */
3863 return is_reslide;
3864 }
3865
3866 /*
3867 * This is called from powermanagement code to let kernel know the current source of power.
3868 * 0 if it is external source (connected to power )
3869 * 1 if it is internal power source ie battery
3870 */
3871 void
3872 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3873 post_sys_powersource(int i)
3874 #else /* XNU_TARGET_OS_OSX */
3875 post_sys_powersource(__unused int i)
3876 #endif /* XNU_TARGET_OS_OSX */
3877 {
3878 #if XNU_TARGET_OS_OSX
3879 post_sys_powersource_internal(i, 0);
3880 #endif /* XNU_TARGET_OS_OSX */
3881 }
3882
3883
3884 #if XNU_TARGET_OS_OSX
3885 static void
post_sys_powersource_internal(int i,int internal)3886 post_sys_powersource_internal(int i, int internal)
3887 {
3888 if (internal == 0) {
3889 __system_power_source = i;
3890 }
3891 }
3892 #endif /* XNU_TARGET_OS_OSX */
3893
3894 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3895 vm_shared_region_root_dir(
3896 struct vm_shared_region *sr)
3897 {
3898 void *vnode;
3899
3900 vm_shared_region_lock();
3901 vnode = sr->sr_root_dir;
3902 vm_shared_region_unlock();
3903 return vnode;
3904 }
3905