1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109
110 #include <vm/vm_map.h>
111 #include <vm/vm_map_internal.h>
112 #include <vm/vm_shared_region.h>
113
114 #include <vm/vm_protos.h>
115
116 #include <machine/commpage.h>
117 #include <machine/cpu_capabilities.h>
118 #include <sys/random.h>
119 #include <sys/errno.h>
120
121 #if defined(__arm64__)
122 #include <arm/cpu_data_internal.h>
123 #include <arm/misc_protos.h>
124 #endif
125
126 /*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132 #if __has_feature(ptrauth_calls)
133 #include <ptrauth.h>
134 #endif /* __has_feature(ptrauth_calls) */
135
136 /* "dyld" uses this to figure out what the kernel supports */
137 int shared_region_version = 3;
138
139 /* trace level, output is sent to the system log file */
140 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142 /* should local (non-chroot) shared regions persist when no task uses them ? */
143 int shared_region_persistence = 0; /* no by default */
144
145
146 /* delay in seconds before reclaiming an unused shared region */
147 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149 /*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156 struct vm_shared_region *primary_system_shared_region = NULL;
157
158 #if XNU_TARGET_OS_OSX
159 /*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164 boolean_t shared_region_completed_slide = FALSE;
165 #endif /* XNU_TARGET_OS_OSX */
166
167 /* this lock protects all the shared region data structures */
168 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173 #define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 (interruptible))
178
179 /* the list of currently available shared regions (one per environment) */
180 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
181 int vm_shared_region_count = 0;
182 int vm_shared_region_peak = 0;
183 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
184
185 /*
186 * the number of times an event has forced the recalculation of the reslide
187 * shared region slide.
188 */
189 #if __has_feature(ptrauth_calls)
190 int vm_shared_region_reslide_count = 0;
191 #endif /* __has_feature(ptrauth_calls) */
192
193 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
194 static vm_shared_region_t vm_shared_region_create(
195 void *root_dir,
196 cpu_type_t cputype,
197 cpu_subtype_t cpu_subtype,
198 boolean_t is_64bit,
199 int target_page_shift,
200 boolean_t reslide,
201 boolean_t is_driverkit,
202 uint32_t rsr_version);
203 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
204
205 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
206 static void vm_shared_region_timeout(thread_call_param_t param0,
207 thread_call_param_t param1);
208 static kern_return_t vm_shared_region_slide_mapping(
209 vm_shared_region_t sr,
210 user_addr_t slide_info_addr,
211 mach_vm_size_t slide_info_size,
212 mach_vm_offset_t start,
213 mach_vm_size_t size,
214 mach_vm_offset_t slid_mapping,
215 uint32_t slide,
216 memory_object_control_t,
217 vm_prot_t prot); /* forward */
218
219 static int __commpage_setup = 0;
220 #if XNU_TARGET_OS_OSX
221 static int __system_power_source = 1; /* init to extrnal power source */
222 static void post_sys_powersource_internal(int i, int internal);
223 #endif /* XNU_TARGET_OS_OSX */
224
225 extern u_int32_t random(void);
226
227 /*
228 * Retrieve a task's shared region and grab an extra reference to
229 * make sure it doesn't disappear while the caller is using it.
230 * The caller is responsible for consuming that extra reference if
231 * necessary.
232 */
233 vm_shared_region_t
vm_shared_region_get(task_t task)234 vm_shared_region_get(
235 task_t task)
236 {
237 vm_shared_region_t shared_region;
238
239 SHARED_REGION_TRACE_DEBUG(
240 ("shared_region: -> get(%p)\n",
241 (void *)VM_KERNEL_ADDRPERM(task)));
242
243 task_lock(task);
244 vm_shared_region_lock();
245 shared_region = task->shared_region;
246 if (shared_region) {
247 assert(shared_region->sr_ref_count > 0);
248 vm_shared_region_reference_locked(shared_region);
249 }
250 vm_shared_region_unlock();
251 task_unlock(task);
252
253 SHARED_REGION_TRACE_DEBUG(
254 ("shared_region: get(%p) <- %p\n",
255 (void *)VM_KERNEL_ADDRPERM(task),
256 (void *)VM_KERNEL_ADDRPERM(shared_region)));
257
258 return shared_region;
259 }
260
261 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)262 vm_shared_region_vm_map(
263 vm_shared_region_t shared_region)
264 {
265 ipc_port_t sr_handle;
266 vm_named_entry_t sr_mem_entry;
267 vm_map_t sr_map;
268
269 SHARED_REGION_TRACE_DEBUG(
270 ("shared_region: -> vm_map(%p)\n",
271 (void *)VM_KERNEL_ADDRPERM(shared_region)));
272 assert(shared_region->sr_ref_count > 0);
273
274 sr_handle = shared_region->sr_mem_entry;
275 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
276 sr_map = sr_mem_entry->backing.map;
277 assert(sr_mem_entry->is_sub_map);
278
279 SHARED_REGION_TRACE_DEBUG(
280 ("shared_region: vm_map(%p) <- %p\n",
281 (void *)VM_KERNEL_ADDRPERM(shared_region),
282 (void *)VM_KERNEL_ADDRPERM(sr_map)));
283 return sr_map;
284 }
285
286 /*
287 * Set the shared region the process should use.
288 * A NULL new shared region means that we just want to release the old
289 * shared region.
290 * The caller should already have an extra reference on the new shared region
291 * (if any). We release a reference on the old shared region (if any).
292 */
293 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)294 vm_shared_region_set(
295 task_t task,
296 vm_shared_region_t new_shared_region)
297 {
298 vm_shared_region_t old_shared_region;
299
300 SHARED_REGION_TRACE_DEBUG(
301 ("shared_region: -> set(%p, %p)\n",
302 (void *)VM_KERNEL_ADDRPERM(task),
303 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
304
305 task_lock(task);
306 vm_shared_region_lock();
307
308 old_shared_region = task->shared_region;
309 if (new_shared_region) {
310 assert(new_shared_region->sr_ref_count > 0);
311 }
312
313 task->shared_region = new_shared_region;
314
315 vm_shared_region_unlock();
316 task_unlock(task);
317
318 if (old_shared_region) {
319 assert(old_shared_region->sr_ref_count > 0);
320 vm_shared_region_deallocate(old_shared_region);
321 }
322
323 SHARED_REGION_TRACE_DEBUG(
324 ("shared_region: set(%p) <- old=%p new=%p\n",
325 (void *)VM_KERNEL_ADDRPERM(task),
326 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
327 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
328 }
329
330 /*
331 * New arm64 shared regions match with an existing arm64e region.
332 * They just get a private non-authenticating pager.
333 */
334 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)335 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
336 {
337 if (exist == new) {
338 return true;
339 }
340 if (cputype == CPU_TYPE_ARM64 &&
341 exist == CPU_SUBTYPE_ARM64E &&
342 new == CPU_SUBTYPE_ARM64_ALL) {
343 return true;
344 }
345 return false;
346 }
347
348
349 /*
350 * Lookup up the shared region for the desired environment.
351 * If none is found, create a new (empty) one.
352 * Grab an extra reference on the returned shared region, to make sure
353 * it doesn't get destroyed before the caller is done with it. The caller
354 * is responsible for consuming that extra reference if necessary.
355 */
356 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)357 vm_shared_region_lookup(
358 void *root_dir,
359 cpu_type_t cputype,
360 cpu_subtype_t cpu_subtype,
361 boolean_t is_64bit,
362 int target_page_shift,
363 boolean_t reslide,
364 boolean_t is_driverkit,
365 uint32_t rsr_version)
366 {
367 vm_shared_region_t shared_region;
368 vm_shared_region_t new_shared_region;
369
370 SHARED_REGION_TRACE_DEBUG(
371 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
372 (void *)VM_KERNEL_ADDRPERM(root_dir),
373 cputype, cpu_subtype, is_64bit, target_page_shift,
374 reslide, is_driverkit));
375
376 shared_region = NULL;
377 new_shared_region = NULL;
378
379 vm_shared_region_lock();
380 for (;;) {
381 queue_iterate(&vm_shared_region_queue,
382 shared_region,
383 vm_shared_region_t,
384 sr_q) {
385 assert(shared_region->sr_ref_count > 0);
386 if (shared_region->sr_cpu_type == cputype &&
387 match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
388 shared_region->sr_root_dir == root_dir &&
389 shared_region->sr_64bit == is_64bit &&
390 #if __ARM_MIXED_PAGE_SIZE__
391 shared_region->sr_page_shift == target_page_shift &&
392 #endif /* __ARM_MIXED_PAGE_SIZE__ */
393 #if __has_feature(ptrauth_calls)
394 shared_region->sr_reslide == reslide &&
395 #endif /* __has_feature(ptrauth_calls) */
396 shared_region->sr_driverkit == is_driverkit &&
397 shared_region->sr_rsr_version == rsr_version &&
398 !shared_region->sr_stale) {
399 /* found a match ! */
400 vm_shared_region_reference_locked(shared_region);
401 goto done;
402 }
403 }
404 if (new_shared_region == NULL) {
405 /* no match: create a new one */
406 vm_shared_region_unlock();
407 new_shared_region = vm_shared_region_create(root_dir,
408 cputype,
409 cpu_subtype,
410 is_64bit,
411 target_page_shift,
412 reslide,
413 is_driverkit,
414 rsr_version);
415 /* do the lookup again, in case we lost a race */
416 vm_shared_region_lock();
417 continue;
418 }
419 /* still no match: use our new one */
420 shared_region = new_shared_region;
421 new_shared_region = NULL;
422 uint32_t newid = ++vm_shared_region_lastid;
423 if (newid == 0) {
424 panic("shared_region: vm_shared_region_lastid wrapped");
425 }
426 shared_region->sr_id = newid;
427 shared_region->sr_install_time = mach_absolute_time();
428 queue_enter(&vm_shared_region_queue,
429 shared_region,
430 vm_shared_region_t,
431 sr_q);
432 vm_shared_region_count++;
433 if (vm_shared_region_count > vm_shared_region_peak) {
434 vm_shared_region_peak = vm_shared_region_count;
435 }
436 break;
437 }
438
439 done:
440 vm_shared_region_unlock();
441
442 if (new_shared_region) {
443 /*
444 * We lost a race with someone else to create a new shared
445 * region for that environment. Get rid of our unused one.
446 */
447 assert(new_shared_region->sr_ref_count == 1);
448 new_shared_region->sr_ref_count--;
449 vm_shared_region_destroy(new_shared_region);
450 new_shared_region = NULL;
451 }
452
453 SHARED_REGION_TRACE_DEBUG(
454 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
455 (void *)VM_KERNEL_ADDRPERM(root_dir),
456 cputype, cpu_subtype, is_64bit, target_page_shift,
457 reslide, is_driverkit,
458 (void *)VM_KERNEL_ADDRPERM(shared_region)));
459
460 assert(shared_region->sr_ref_count > 0);
461 return shared_region;
462 }
463
464 /*
465 * Take an extra reference on a shared region.
466 * The vm_shared_region_lock should already be held by the caller.
467 */
468 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)469 vm_shared_region_reference_locked(
470 vm_shared_region_t shared_region)
471 {
472 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
473
474 SHARED_REGION_TRACE_DEBUG(
475 ("shared_region: -> reference_locked(%p)\n",
476 (void *)VM_KERNEL_ADDRPERM(shared_region)));
477 assert(shared_region->sr_ref_count > 0);
478 shared_region->sr_ref_count++;
479 assert(shared_region->sr_ref_count != 0);
480
481 if (shared_region->sr_timer_call != NULL) {
482 boolean_t cancelled;
483
484 /* cancel and free any pending timeout */
485 cancelled = thread_call_cancel(shared_region->sr_timer_call);
486 if (cancelled) {
487 thread_call_free(shared_region->sr_timer_call);
488 shared_region->sr_timer_call = NULL;
489 /* release the reference held by the cancelled timer */
490 shared_region->sr_ref_count--;
491 } else {
492 /* the timer will drop the reference and free itself */
493 }
494 }
495
496 SHARED_REGION_TRACE_DEBUG(
497 ("shared_region: reference_locked(%p) <- %d\n",
498 (void *)VM_KERNEL_ADDRPERM(shared_region),
499 shared_region->sr_ref_count));
500 }
501
502 /*
503 * Take a reference on a shared region.
504 */
505 void
vm_shared_region_reference(vm_shared_region_t shared_region)506 vm_shared_region_reference(vm_shared_region_t shared_region)
507 {
508 SHARED_REGION_TRACE_DEBUG(
509 ("shared_region: -> reference(%p)\n",
510 (void *)VM_KERNEL_ADDRPERM(shared_region)));
511
512 vm_shared_region_lock();
513 vm_shared_region_reference_locked(shared_region);
514 vm_shared_region_unlock();
515
516 SHARED_REGION_TRACE_DEBUG(
517 ("shared_region: reference(%p) <- %d\n",
518 (void *)VM_KERNEL_ADDRPERM(shared_region),
519 shared_region->sr_ref_count));
520 }
521
522 /*
523 * Release a reference on the shared region.
524 * Destroy it if there are no references left.
525 */
526 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)527 vm_shared_region_deallocate(
528 vm_shared_region_t shared_region)
529 {
530 SHARED_REGION_TRACE_DEBUG(
531 ("shared_region: -> deallocate(%p)\n",
532 (void *)VM_KERNEL_ADDRPERM(shared_region)));
533
534 vm_shared_region_lock();
535
536 assert(shared_region->sr_ref_count > 0);
537
538 if (shared_region->sr_root_dir == NULL) {
539 /*
540 * Local (i.e. based on the boot volume) shared regions
541 * can persist or not based on the "shared_region_persistence"
542 * sysctl.
543 * Make sure that this one complies.
544 *
545 * See comments in vm_shared_region_slide() for notes about
546 * shared regions we have slid (which are not torn down currently).
547 */
548 if (shared_region_persistence &&
549 !shared_region->sr_persists) {
550 /* make this one persistent */
551 shared_region->sr_ref_count++;
552 shared_region->sr_persists = TRUE;
553 } else if (!shared_region_persistence &&
554 shared_region->sr_persists) {
555 /* make this one no longer persistent */
556 assert(shared_region->sr_ref_count > 1);
557 shared_region->sr_ref_count--;
558 shared_region->sr_persists = FALSE;
559 }
560 }
561
562 assert(shared_region->sr_ref_count > 0);
563 shared_region->sr_ref_count--;
564 SHARED_REGION_TRACE_DEBUG(
565 ("shared_region: deallocate(%p): ref now %d\n",
566 (void *)VM_KERNEL_ADDRPERM(shared_region),
567 shared_region->sr_ref_count));
568
569 if (shared_region->sr_ref_count == 0) {
570 uint64_t deadline;
571
572 /*
573 * Even though a shared region is unused, delay a while before
574 * tearing it down, in case a new app launch can use it.
575 * We don't keep around stale shared regions, nor older RSR ones.
576 */
577 if (shared_region->sr_timer_call == NULL &&
578 shared_region_destroy_delay != 0 &&
579 !shared_region->sr_stale &&
580 !(shared_region->sr_rsr_version != 0 &&
581 shared_region->sr_rsr_version != rsr_get_version())) {
582 /* hold one reference for the timer */
583 assert(!shared_region->sr_mapping_in_progress);
584 shared_region->sr_ref_count++;
585
586 /* set up the timer */
587 shared_region->sr_timer_call = thread_call_allocate(
588 (thread_call_func_t) vm_shared_region_timeout,
589 (thread_call_param_t) shared_region);
590
591 /* schedule the timer */
592 clock_interval_to_deadline(shared_region_destroy_delay,
593 NSEC_PER_SEC,
594 &deadline);
595 thread_call_enter_delayed(shared_region->sr_timer_call,
596 deadline);
597
598 SHARED_REGION_TRACE_DEBUG(
599 ("shared_region: deallocate(%p): armed timer\n",
600 (void *)VM_KERNEL_ADDRPERM(shared_region)));
601
602 vm_shared_region_unlock();
603 } else {
604 /* timer expired: let go of this shared region */
605
606 /* Make sure there's no cached pointer to the region. */
607 if (primary_system_shared_region == shared_region) {
608 primary_system_shared_region = NULL;
609 }
610
611 /*
612 * Remove it from the queue first, so no one can find
613 * it...
614 */
615 queue_remove(&vm_shared_region_queue,
616 shared_region,
617 vm_shared_region_t,
618 sr_q);
619 vm_shared_region_count--;
620 vm_shared_region_unlock();
621
622 /* ... and destroy it */
623 vm_shared_region_destroy(shared_region);
624 shared_region = NULL;
625 }
626 } else {
627 vm_shared_region_unlock();
628 }
629
630 SHARED_REGION_TRACE_DEBUG(
631 ("shared_region: deallocate(%p) <-\n",
632 (void *)VM_KERNEL_ADDRPERM(shared_region)));
633 }
634
635 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)636 vm_shared_region_timeout(
637 thread_call_param_t param0,
638 __unused thread_call_param_t param1)
639 {
640 vm_shared_region_t shared_region;
641
642 shared_region = (vm_shared_region_t) param0;
643
644 vm_shared_region_deallocate(shared_region);
645 }
646
647
648 /*
649 * Create a new (empty) shared region for a new environment.
650 */
651 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)652 vm_shared_region_create(
653 void *root_dir,
654 cpu_type_t cputype,
655 cpu_subtype_t cpu_subtype,
656 boolean_t is_64bit,
657 int target_page_shift,
658 #if !__has_feature(ptrauth_calls)
659 __unused
660 #endif /* __has_feature(ptrauth_calls) */
661 boolean_t reslide,
662 boolean_t is_driverkit,
663 uint32_t rsr_version)
664 {
665 vm_named_entry_t mem_entry;
666 ipc_port_t mem_entry_port;
667 vm_shared_region_t shared_region;
668 vm_map_t sub_map;
669 mach_vm_offset_t base_address, pmap_nesting_start;
670 mach_vm_size_t size, pmap_nesting_size;
671
672 SHARED_REGION_TRACE_INFO(
673 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
674 (void *)VM_KERNEL_ADDRPERM(root_dir),
675 cputype, cpu_subtype, is_64bit, target_page_shift,
676 reslide, is_driverkit));
677
678 base_address = 0;
679 size = 0;
680 mem_entry = NULL;
681 mem_entry_port = IPC_PORT_NULL;
682 sub_map = VM_MAP_NULL;
683
684 /* create a new shared region structure... */
685 shared_region = kalloc_type(struct vm_shared_region,
686 Z_WAITOK | Z_NOFAIL);
687
688 /* figure out the correct settings for the desired environment */
689 if (is_64bit) {
690 switch (cputype) {
691 #if defined(__arm64__)
692 case CPU_TYPE_ARM64:
693 base_address = SHARED_REGION_BASE_ARM64;
694 size = SHARED_REGION_SIZE_ARM64;
695 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
696 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
697 break;
698 #else
699 case CPU_TYPE_I386:
700 base_address = SHARED_REGION_BASE_X86_64;
701 size = SHARED_REGION_SIZE_X86_64;
702 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
703 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
704 break;
705 case CPU_TYPE_POWERPC:
706 base_address = SHARED_REGION_BASE_PPC64;
707 size = SHARED_REGION_SIZE_PPC64;
708 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
709 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
710 break;
711 #endif
712 default:
713 SHARED_REGION_TRACE_ERROR(
714 ("shared_region: create: unknown cpu type %d\n",
715 cputype));
716 kfree_type(struct vm_shared_region, shared_region);
717 shared_region = NULL;
718 goto done;
719 }
720 } else {
721 switch (cputype) {
722 #if defined(__arm64__)
723 case CPU_TYPE_ARM:
724 base_address = SHARED_REGION_BASE_ARM;
725 size = SHARED_REGION_SIZE_ARM;
726 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
727 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
728 break;
729 #else
730 case CPU_TYPE_I386:
731 base_address = SHARED_REGION_BASE_I386;
732 size = SHARED_REGION_SIZE_I386;
733 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
734 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
735 break;
736 case CPU_TYPE_POWERPC:
737 base_address = SHARED_REGION_BASE_PPC;
738 size = SHARED_REGION_SIZE_PPC;
739 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
740 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
741 break;
742 #endif
743 default:
744 SHARED_REGION_TRACE_ERROR(
745 ("shared_region: create: unknown cpu type %d\n",
746 cputype));
747 kfree_type(struct vm_shared_region, shared_region);
748 shared_region = NULL;
749 goto done;
750 }
751 }
752
753 /* create a memory entry structure and a Mach port handle */
754 mem_entry = mach_memory_entry_allocate(&mem_entry_port);
755
756 #if defined(__arm64__)
757 {
758 struct pmap *pmap_nested;
759 int pmap_flags = 0;
760 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
761
762
763 #if __ARM_MIXED_PAGE_SIZE__
764 if (cputype == CPU_TYPE_ARM64 &&
765 target_page_shift == FOURK_PAGE_SHIFT) {
766 /* arm64/4k address space */
767 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
768 }
769 #endif /* __ARM_MIXED_PAGE_SIZE__ */
770
771 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
772 if (pmap_nested != PMAP_NULL) {
773 pmap_set_nested(pmap_nested);
774 sub_map = vm_map_create_options(pmap_nested, 0,
775 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
776
777 if (is_64bit ||
778 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
779 /* enforce 16KB alignment of VM map entries */
780 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
781 }
782 #if __ARM_MIXED_PAGE_SIZE__
783 if (cputype == CPU_TYPE_ARM64 &&
784 target_page_shift == FOURK_PAGE_SHIFT) {
785 /* arm64/4k address space */
786 vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
787 }
788 #endif /* __ARM_MIXED_PAGE_SIZE__ */
789 } else {
790 sub_map = VM_MAP_NULL;
791 }
792 }
793 #else /* defined(__arm64__) */
794 {
795 /* create a VM sub map and its pmap */
796 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
797 if (pmap != NULL) {
798 sub_map = vm_map_create_options(pmap, 0,
799 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
800 } else {
801 sub_map = VM_MAP_NULL;
802 }
803 }
804 #endif /* defined(__arm64__) */
805 if (sub_map == VM_MAP_NULL) {
806 ipc_port_release_send(mem_entry_port);
807 kfree_type(struct vm_shared_region, shared_region);
808 shared_region = NULL;
809 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
810 goto done;
811 }
812
813 /* shared regions should always enforce code-signing */
814 vm_map_cs_enforcement_set(sub_map, true);
815 assert(vm_map_cs_enforcement(sub_map));
816 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
817
818 assert(!sub_map->disable_vmentry_reuse);
819 sub_map->is_nested_map = TRUE;
820
821 /* make the memory entry point to the VM sub map */
822 mem_entry->is_sub_map = TRUE;
823 mem_entry->backing.map = sub_map;
824 mem_entry->size = size;
825 mem_entry->protection = VM_PROT_ALL;
826
827 /* make the shared region point at the memory entry */
828 shared_region->sr_mem_entry = mem_entry_port;
829
830 /* fill in the shared region's environment and settings */
831 shared_region->sr_base_address = base_address;
832 shared_region->sr_size = size;
833 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
834 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
835 shared_region->sr_cpu_type = cputype;
836 shared_region->sr_cpu_subtype = cpu_subtype;
837 shared_region->sr_64bit = (uint8_t)is_64bit;
838 #if __ARM_MIXED_PAGE_SIZE__
839 shared_region->sr_page_shift = (uint8_t)target_page_shift;
840 #endif /* __ARM_MIXED_PAGE_SIZE__ */
841 shared_region->sr_driverkit = (uint8_t)is_driverkit;
842 shared_region->sr_rsr_version = rsr_version;
843 shared_region->sr_root_dir = root_dir;
844
845 queue_init(&shared_region->sr_q);
846 shared_region->sr_mapping_in_progress = FALSE;
847 shared_region->sr_slide_in_progress = FALSE;
848 shared_region->sr_persists = FALSE;
849 shared_region->sr_stale = FALSE;
850 shared_region->sr_timer_call = NULL;
851 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
852
853 /* grab a reference for the caller */
854 shared_region->sr_ref_count = 1;
855
856 shared_region->sr_slide = 0; /* not slid yet */
857
858 /* Initialize UUID and other metadata */
859 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
860 shared_region->sr_uuid_copied = FALSE;
861 shared_region->sr_images_count = 0;
862 shared_region->sr_images = NULL;
863 #if __has_feature(ptrauth_calls)
864 shared_region->sr_reslide = reslide;
865 shared_region->sr_num_auth_section = 0;
866 shared_region->sr_next_auth_section = 0;
867 shared_region->sr_auth_section = NULL;
868 #endif /* __has_feature(ptrauth_calls) */
869
870 done:
871 if (shared_region) {
872 SHARED_REGION_TRACE_INFO(
873 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
874 "base=0x%llx,size=0x%llx) <- "
875 "%p mem=(%p,%p) map=%p pmap=%p\n",
876 (void *)VM_KERNEL_ADDRPERM(root_dir),
877 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
878 (long long)base_address,
879 (long long)size,
880 (void *)VM_KERNEL_ADDRPERM(shared_region),
881 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
882 (void *)VM_KERNEL_ADDRPERM(mem_entry),
883 (void *)VM_KERNEL_ADDRPERM(sub_map),
884 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
885 } else {
886 SHARED_REGION_TRACE_INFO(
887 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
888 "base=0x%llx,size=0x%llx) <- NULL",
889 (void *)VM_KERNEL_ADDRPERM(root_dir),
890 cputype, cpu_subtype, is_64bit, is_driverkit,
891 (long long)base_address,
892 (long long)size));
893 }
894 return shared_region;
895 }
896
897 /*
898 * Destroy a now-unused shared region.
899 * The shared region is no longer in the queue and can not be looked up.
900 */
901 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)902 vm_shared_region_destroy(
903 vm_shared_region_t shared_region)
904 {
905 vm_named_entry_t mem_entry;
906 vm_map_t map;
907
908 SHARED_REGION_TRACE_INFO(
909 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
910 (void *)VM_KERNEL_ADDRPERM(shared_region),
911 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
912 shared_region->sr_cpu_type,
913 shared_region->sr_cpu_subtype,
914 shared_region->sr_64bit,
915 shared_region->sr_driverkit));
916
917 assert(shared_region->sr_ref_count == 0);
918 assert(!shared_region->sr_persists);
919
920 mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
921 assert(mem_entry->is_sub_map);
922 assert(!mem_entry->internal);
923 assert(!mem_entry->is_copy);
924 map = mem_entry->backing.map;
925
926 /*
927 * Clean up the pmap first. The virtual addresses that were
928 * entered in this possibly "nested" pmap may have different values
929 * than the VM map's min and max offsets, if the VM sub map was
930 * mapped at a non-zero offset in the processes' main VM maps, which
931 * is usually the case, so the clean-up we do in vm_map_destroy() would
932 * not be enough.
933 */
934 if (map->pmap) {
935 pmap_remove(map->pmap,
936 (vm_map_offset_t)shared_region->sr_base_address,
937 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
938 }
939
940 /*
941 * Release our (one and only) handle on the memory entry.
942 * This will generate a no-senders notification, which will be processed
943 * by ipc_kobject_notify_no_senders(), which will release the one and only
944 * reference on the memory entry and cause it to be destroyed, along
945 * with the VM sub map and its pmap.
946 */
947 mach_memory_entry_port_release(shared_region->sr_mem_entry);
948 mem_entry = NULL;
949 shared_region->sr_mem_entry = IPC_PORT_NULL;
950
951 if (shared_region->sr_timer_call) {
952 thread_call_free(shared_region->sr_timer_call);
953 }
954
955 #if __has_feature(ptrauth_calls)
956 /*
957 * Free the cached copies of slide_info for the AUTH regions.
958 */
959 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
960 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
961 if (si != NULL) {
962 vm_object_deallocate(si->si_slide_object);
963 kfree_data(si->si_slide_info_entry,
964 si->si_slide_info_size);
965 kfree_type(struct vm_shared_region_slide_info, si);
966 shared_region->sr_auth_section[i] = NULL;
967 }
968 }
969 if (shared_region->sr_auth_section != NULL) {
970 assert(shared_region->sr_num_auth_section > 0);
971 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
972 shared_region->sr_auth_section = NULL;
973 shared_region->sr_num_auth_section = 0;
974 }
975 #endif /* __has_feature(ptrauth_calls) */
976
977 /* release the shared region structure... */
978 kfree_type(struct vm_shared_region, shared_region);
979
980 SHARED_REGION_TRACE_DEBUG(
981 ("shared_region: destroy(%p) <-\n",
982 (void *)VM_KERNEL_ADDRPERM(shared_region)));
983 shared_region = NULL;
984 }
985
986 /*
987 * Gets the address of the first (in time) mapping in the shared region.
988 * If used during initial task setup by dyld, task should non-NULL.
989 */
990 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)991 vm_shared_region_start_address(
992 vm_shared_region_t shared_region,
993 mach_vm_offset_t *start_address,
994 task_t task)
995 {
996 kern_return_t kr;
997 mach_vm_offset_t sr_base_address;
998 mach_vm_offset_t sr_first_mapping;
999
1000 SHARED_REGION_TRACE_DEBUG(
1001 ("shared_region: -> start_address(%p)\n",
1002 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1003
1004 vm_shared_region_lock();
1005
1006 /*
1007 * Wait if there's another thread establishing a mapping
1008 * in this shared region right when we're looking at it.
1009 * We want a consistent view of the map...
1010 */
1011 while (shared_region->sr_mapping_in_progress) {
1012 /* wait for our turn... */
1013 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1014 THREAD_UNINT);
1015 }
1016 assert(!shared_region->sr_mapping_in_progress);
1017 assert(shared_region->sr_ref_count > 0);
1018
1019 sr_base_address = shared_region->sr_base_address;
1020 sr_first_mapping = shared_region->sr_first_mapping;
1021
1022 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1023 /* shared region is empty */
1024 kr = KERN_INVALID_ADDRESS;
1025 } else {
1026 kr = KERN_SUCCESS;
1027 *start_address = sr_base_address + sr_first_mapping;
1028 }
1029
1030
1031 uint32_t slide = shared_region->sr_slide;
1032
1033 vm_shared_region_unlock();
1034
1035 /*
1036 * Cache shared region info in the task for telemetry gathering, if we're
1037 * passed in the task. No task lock here as we're still in intial task set up.
1038 */
1039 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1040 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1041 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1042 (char *)&task->task_shared_region_uuid,
1043 sizeof(task->task_shared_region_uuid)) == 0) {
1044 task->task_shared_region_slide = slide;
1045 }
1046 }
1047
1048 SHARED_REGION_TRACE_DEBUG(
1049 ("shared_region: start_address(%p) <- 0x%llx\n",
1050 (void *)VM_KERNEL_ADDRPERM(shared_region),
1051 (long long)shared_region->sr_base_address));
1052
1053 return kr;
1054 }
1055
1056 /*
1057 * Look up a pre-existing mapping in shared region, for replacement.
1058 * Takes an extra object reference if found.
1059 */
1060 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1061 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1062 {
1063 vm_map_entry_t found;
1064
1065 /* find the shared region's map entry to slide */
1066 vm_map_lock_read(map);
1067 if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1068 /* no mapping there */
1069 vm_map_unlock(map);
1070 return KERN_INVALID_ARGUMENT;
1071 }
1072
1073 *entry = *found;
1074 /* extra ref to keep object alive while map is unlocked */
1075 vm_object_reference(VME_OBJECT(found));
1076 vm_map_unlock_read(map);
1077 return KERN_SUCCESS;
1078 }
1079
1080 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1081 shared_region_make_permanent(
1082 vm_shared_region_t sr,
1083 vm_prot_t max_prot)
1084 {
1085 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1086 return false;
1087 }
1088 if (max_prot & VM_PROT_WRITE) {
1089 /*
1090 * Potentially writable mapping: no major issue with allowing
1091 * it to be replaced since its contents could be modified
1092 * anyway.
1093 */
1094 return false;
1095 }
1096 if (max_prot & VM_PROT_EXECUTE) {
1097 /*
1098 * Potentially executable mapping: some software might want
1099 * to try and replace it to interpose their own code when a
1100 * given routine is called or returns, for example.
1101 * So let's not make it "permanent".
1102 */
1103 return false;
1104 }
1105 /*
1106 * Make this mapping "permanent" to prevent it from being deleted
1107 * and/or replaced with another mapping.
1108 */
1109 return true;
1110 }
1111
1112 #if __has_feature(ptrauth_calls)
1113
1114 /*
1115 * Determine if this task is actually using pointer signing.
1116 */
1117 static boolean_t
task_sign_pointers(task_t task)1118 task_sign_pointers(task_t task)
1119 {
1120 if (task->map &&
1121 task->map->pmap &&
1122 !task->map->pmap->disable_jop) {
1123 return TRUE;
1124 }
1125 return FALSE;
1126 }
1127
1128 /*
1129 * If the shared region contains mappings that are authenticated, then
1130 * remap them into the task private map.
1131 *
1132 * Failures are possible in this routine when jetsam kills a process
1133 * just as dyld is trying to set it up. The vm_map and task shared region
1134 * info get torn down w/o waiting for this thread to finish up.
1135 */
1136 __attribute__((noinline))
1137 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1138 vm_shared_region_auth_remap(vm_shared_region_t sr)
1139 {
1140 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1141 task_t task = current_task();
1142 vm_shared_region_slide_info_t si;
1143 uint_t i;
1144 vm_object_t object;
1145 vm_map_t sr_map;
1146 struct vm_map_entry tmp_entry_store = {0};
1147 vm_map_entry_t tmp_entry = NULL;
1148 int vm_flags;
1149 vm_map_kernel_flags_t vmk_flags;
1150 vm_map_offset_t map_addr;
1151 kern_return_t kr = KERN_SUCCESS;
1152 boolean_t use_ptr_auth = task_sign_pointers(task);
1153
1154 /*
1155 * Don't do this more than once and avoid any race conditions in finishing it.
1156 */
1157 vm_shared_region_lock();
1158 while (sr->sr_mapping_in_progress) {
1159 /* wait for our turn... */
1160 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1161 }
1162 assert(!sr->sr_mapping_in_progress);
1163 assert(sr->sr_ref_count > 0);
1164
1165 /* Just return if already done. */
1166 if (task->shared_region_auth_remapped) {
1167 vm_shared_region_unlock();
1168 return KERN_SUCCESS;
1169 }
1170
1171 /* let others know to wait while we're working in this shared region */
1172 sr->sr_mapping_in_progress = TRUE;
1173 vm_shared_region_unlock();
1174
1175 /*
1176 * Remap any sections with pointer authentications into the private map.
1177 */
1178 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1179 si = sr->sr_auth_section[i];
1180 assert(si != NULL);
1181 assert(si->si_ptrauth);
1182
1183 /*
1184 * We have mapping that needs to be private.
1185 * Look for an existing slid mapping's pager with matching
1186 * object, offset, slide info and shared_region_id to reuse.
1187 */
1188 object = si->si_slide_object;
1189 sr_pager = shared_region_pager_match(object, si->si_start, si,
1190 use_ptr_auth ? task->jop_pid : 0);
1191 if (sr_pager == MEMORY_OBJECT_NULL) {
1192 printf("%s(): shared_region_pager_match() failed\n", __func__);
1193 kr = KERN_FAILURE;
1194 goto done;
1195 }
1196
1197 /*
1198 * verify matching jop_pid for this task and this pager
1199 */
1200 if (use_ptr_auth) {
1201 shared_region_pager_match_task_key(sr_pager, task);
1202 }
1203
1204 sr_map = vm_shared_region_vm_map(sr);
1205 tmp_entry = NULL;
1206
1207 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1208 if (kr != KERN_SUCCESS) {
1209 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1210 goto done;
1211 }
1212 tmp_entry = &tmp_entry_store;
1213
1214 /*
1215 * Check that the object exactly covers the region to slide.
1216 */
1217 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1218 printf("%s(): doesn't fully cover\n", __func__);
1219 kr = KERN_FAILURE;
1220 goto done;
1221 }
1222
1223 /*
1224 * map the pager over the portion of the mapping that needs sliding
1225 */
1226 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
1227 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1228 vmk_flags.vmkf_overwrite_immutable = TRUE;
1229 vmk_flags.vmkf_permanent = shared_region_make_permanent(sr,
1230 tmp_entry->max_protection);
1231
1232 map_addr = si->si_slid_address;
1233 kr = vm_map_enter_mem_object(task->map,
1234 &map_addr,
1235 si->si_end - si->si_start,
1236 (mach_vm_offset_t) 0,
1237 vm_flags,
1238 vmk_flags,
1239 VM_KERN_MEMORY_NONE,
1240 (ipc_port_t)(uintptr_t) sr_pager,
1241 0,
1242 TRUE,
1243 tmp_entry->protection,
1244 tmp_entry->max_protection,
1245 tmp_entry->inheritance);
1246 memory_object_deallocate(sr_pager);
1247 sr_pager = MEMORY_OBJECT_NULL;
1248 if (kr != KERN_SUCCESS) {
1249 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1250 goto done;
1251 }
1252 assertf(map_addr == si->si_slid_address,
1253 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1254 (uint64_t)map_addr,
1255 (uint64_t)si->si_slid_address,
1256 tmp_entry);
1257
1258 /* Drop the ref count grabbed by find_mapping_to_slide */
1259 vm_object_deallocate(VME_OBJECT(tmp_entry));
1260 tmp_entry = NULL;
1261 }
1262
1263 done:
1264 if (tmp_entry) {
1265 /* Drop the ref count grabbed by find_mapping_to_slide */
1266 vm_object_deallocate(VME_OBJECT(tmp_entry));
1267 tmp_entry = NULL;
1268 }
1269
1270 /*
1271 * Drop any extra reference to the pager in case we're quitting due to an error above.
1272 */
1273 if (sr_pager != MEMORY_OBJECT_NULL) {
1274 memory_object_deallocate(sr_pager);
1275 }
1276
1277 /*
1278 * Mark the region as having it's auth sections remapped.
1279 */
1280 vm_shared_region_lock();
1281 task->shared_region_auth_remapped = TRUE;
1282 sr->sr_mapping_in_progress = FALSE;
1283 thread_wakeup((event_t)&sr->sr_mapping_in_progress);
1284 vm_shared_region_unlock();
1285 return kr;
1286 }
1287 #endif /* __has_feature(ptrauth_calls) */
1288
1289 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1290 vm_shared_region_undo_mappings(
1291 vm_map_t sr_map,
1292 mach_vm_offset_t sr_base_address,
1293 struct _sr_file_mappings *srf_mappings,
1294 struct _sr_file_mappings *srf_mappings_current,
1295 unsigned int srf_current_mappings_count)
1296 {
1297 unsigned int j = 0;
1298 vm_shared_region_t shared_region = NULL;
1299 boolean_t reset_shared_region_state = FALSE;
1300 struct _sr_file_mappings *srfmp;
1301 unsigned int mappings_count;
1302 struct shared_file_mapping_slide_np *mappings;
1303
1304 shared_region = vm_shared_region_get(current_task());
1305 if (shared_region == NULL) {
1306 printf("Failed to undo mappings because of NULL shared region.\n");
1307 return;
1308 }
1309
1310 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1311
1312 if (sr_map == NULL) {
1313 ipc_port_t sr_handle;
1314 vm_named_entry_t sr_mem_entry;
1315
1316 vm_shared_region_lock();
1317 assert(shared_region->sr_ref_count > 0);
1318
1319 while (shared_region->sr_mapping_in_progress) {
1320 /* wait for our turn... */
1321 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1322 THREAD_UNINT);
1323 }
1324 assert(!shared_region->sr_mapping_in_progress);
1325 assert(shared_region->sr_ref_count > 0);
1326 /* let others know we're working in this shared region */
1327 shared_region->sr_mapping_in_progress = TRUE;
1328
1329 vm_shared_region_unlock();
1330
1331 reset_shared_region_state = TRUE;
1332
1333 /* no need to lock because this data is never modified... */
1334 sr_handle = shared_region->sr_mem_entry;
1335 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1336 sr_map = sr_mem_entry->backing.map;
1337 sr_base_address = shared_region->sr_base_address;
1338 }
1339 /*
1340 * Undo the mappings we've established so far.
1341 */
1342 for (srfmp = &srf_mappings[0];
1343 srfmp <= srf_mappings_current;
1344 srfmp++) {
1345 mappings = srfmp->mappings;
1346 mappings_count = srfmp->mappings_count;
1347 if (srfmp == srf_mappings_current) {
1348 mappings_count = srf_current_mappings_count;
1349 }
1350
1351 for (j = 0; j < mappings_count; j++) {
1352 kern_return_t kr2;
1353 mach_vm_offset_t start, end;
1354
1355 if (mappings[j].sms_size == 0) {
1356 /*
1357 * We didn't establish this
1358 * mapping, so nothing to undo.
1359 */
1360 continue;
1361 }
1362 SHARED_REGION_TRACE_INFO(
1363 ("shared_region: mapping[%d]: "
1364 "address:0x%016llx "
1365 "size:0x%016llx "
1366 "offset:0x%016llx "
1367 "maxprot:0x%x prot:0x%x: "
1368 "undoing...\n",
1369 j,
1370 (long long)mappings[j].sms_address,
1371 (long long)mappings[j].sms_size,
1372 (long long)mappings[j].sms_file_offset,
1373 mappings[j].sms_max_prot,
1374 mappings[j].sms_init_prot));
1375 start = (mappings[j].sms_address - sr_base_address);
1376 end = start + mappings[j].sms_size;
1377 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1378 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1379 kr2 = vm_map_remove_guard(sr_map,
1380 start,
1381 end,
1382 VM_MAP_REMOVE_IMMUTABLE,
1383 KMEM_GUARD_NONE).kmr_return;
1384 assert(kr2 == KERN_SUCCESS);
1385 }
1386 }
1387
1388 if (reset_shared_region_state) {
1389 vm_shared_region_lock();
1390 assert(shared_region->sr_ref_count > 0);
1391 assert(shared_region->sr_mapping_in_progress);
1392 /* we're done working on that shared region */
1393 shared_region->sr_mapping_in_progress = FALSE;
1394 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1395 vm_shared_region_unlock();
1396 reset_shared_region_state = FALSE;
1397 }
1398
1399 vm_shared_region_deallocate(shared_region);
1400 }
1401
1402 /*
1403 * First part of vm_shared_region_map_file(). Split out to
1404 * avoid kernel stack overflow.
1405 */
1406 __attribute__((noinline))
1407 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1408 vm_shared_region_map_file_setup(
1409 vm_shared_region_t shared_region,
1410 int sr_file_mappings_count,
1411 struct _sr_file_mappings *sr_file_mappings,
1412 unsigned int *mappings_to_slide_cnt,
1413 struct shared_file_mapping_slide_np **mappings_to_slide,
1414 mach_vm_offset_t *slid_mappings,
1415 memory_object_control_t *slid_file_controls,
1416 mach_vm_offset_t *sfm_min_address,
1417 mach_vm_offset_t *sfm_max_address,
1418 vm_map_t *sr_map_ptr,
1419 vm_map_offset_t *lowest_unnestable_addr_ptr,
1420 unsigned int vmsr_num_slides)
1421 {
1422 kern_return_t kr = KERN_SUCCESS;
1423 memory_object_control_t file_control;
1424 vm_object_t file_object;
1425 ipc_port_t sr_handle;
1426 vm_named_entry_t sr_mem_entry;
1427 vm_map_t sr_map;
1428 mach_vm_offset_t sr_base_address;
1429 unsigned int i = 0;
1430 mach_port_t map_port;
1431 vm_map_offset_t target_address;
1432 vm_object_t object;
1433 vm_object_size_t obj_size;
1434 vm_map_offset_t lowest_unnestable_addr = 0;
1435 vm_map_kernel_flags_t vmk_flags;
1436 mach_vm_offset_t sfm_end;
1437 uint32_t mappings_count;
1438 struct shared_file_mapping_slide_np *mappings;
1439 struct _sr_file_mappings *srfmp;
1440
1441 vm_shared_region_lock();
1442 assert(shared_region->sr_ref_count > 0);
1443
1444 /*
1445 * Make sure we handle only one mapping at a time in a given
1446 * shared region, to avoid race conditions. This should not
1447 * happen frequently...
1448 */
1449 while (shared_region->sr_mapping_in_progress) {
1450 /* wait for our turn... */
1451 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1452 THREAD_UNINT);
1453 }
1454 assert(!shared_region->sr_mapping_in_progress);
1455 assert(shared_region->sr_ref_count > 0);
1456
1457
1458 /* let others know we're working in this shared region */
1459 shared_region->sr_mapping_in_progress = TRUE;
1460
1461 /*
1462 * Did someone race in and map this shared region already?
1463 */
1464 if (shared_region->sr_first_mapping != -1) {
1465 vm_shared_region_unlock();
1466 #if DEVELOPMENT || DEBUG
1467 printf("shared_region: caught race in map and slide\n");
1468 #endif /* DEVELOPMENT || DEBUG */
1469 return KERN_FAILURE;
1470 }
1471
1472 vm_shared_region_unlock();
1473
1474 /* no need to lock because this data is never modified... */
1475 sr_handle = shared_region->sr_mem_entry;
1476 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1477 sr_map = sr_mem_entry->backing.map;
1478 sr_base_address = shared_region->sr_base_address;
1479
1480 SHARED_REGION_TRACE_DEBUG(
1481 ("shared_region: -> map(%p)\n",
1482 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1483
1484 mappings_count = 0;
1485 mappings = NULL;
1486 srfmp = NULL;
1487
1488 /* process all the files to be mapped */
1489 for (srfmp = &sr_file_mappings[0];
1490 srfmp < &sr_file_mappings[sr_file_mappings_count];
1491 srfmp++) {
1492 mappings_count = srfmp->mappings_count;
1493 mappings = srfmp->mappings;
1494 file_control = srfmp->file_control;
1495
1496 if (mappings_count == 0) {
1497 /* no mappings here... */
1498 continue;
1499 }
1500
1501 /*
1502 * The code below can only correctly "slide" (perform relocations) for one
1503 * value of the slide amount. So if a file has a non-zero slide, it has to
1504 * match any previous value. A zero slide value is ok for things that are
1505 * just directly mapped.
1506 */
1507 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1508 shared_region->sr_slide = srfmp->slide;
1509 } else if (shared_region->sr_slide != 0 &&
1510 srfmp->slide != 0 &&
1511 shared_region->sr_slide != srfmp->slide) {
1512 SHARED_REGION_TRACE_ERROR(
1513 ("shared_region: more than 1 non-zero slide value amount "
1514 "slide 1:0x%x slide 2:0x%x\n ",
1515 shared_region->sr_slide, srfmp->slide));
1516 kr = KERN_INVALID_ARGUMENT;
1517 break;
1518 }
1519
1520 #if __arm64__
1521 if ((shared_region->sr_64bit ||
1522 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1523 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1524 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1525 __FUNCTION__, srfmp->slide);
1526 kr = KERN_INVALID_ARGUMENT;
1527 break;
1528 }
1529 #endif /* __arm64__ */
1530
1531 /*
1532 * An FD of -1 means we need to copyin the data to an anonymous object.
1533 */
1534 if (srfmp->fd == -1) {
1535 assert(mappings_count == 1);
1536 SHARED_REGION_TRACE_INFO(
1537 ("shared_region: mapping[0]: "
1538 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1539 "maxprot:0x%x prot:0x%x fd==-1\n",
1540 (long long)mappings[0].sms_address,
1541 (long long)mappings[0].sms_size,
1542 (long long)mappings[0].sms_file_offset,
1543 mappings[0].sms_max_prot,
1544 mappings[0].sms_init_prot));
1545
1546 /*
1547 * We need an anon object to hold the data in the shared region.
1548 * The size needs to be suitable to map into kernel.
1549 */
1550 obj_size = vm_object_round_page(mappings->sms_size);
1551 object = vm_object_allocate(obj_size);
1552 if (object == VM_OBJECT_NULL) {
1553 printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1554 kr = KERN_RESOURCE_SHORTAGE;
1555 break;
1556 }
1557
1558 /*
1559 * map the object into the kernel
1560 */
1561 vm_map_offset_t kaddr = 0;
1562 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1563 vmk_flags.vmkf_no_copy_on_read = 1;
1564 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1565 kr = vm_map_enter(kernel_map,
1566 &kaddr,
1567 obj_size,
1568 0,
1569 VM_FLAGS_ANYWHERE,
1570 vmk_flags,
1571 VM_KERN_MEMORY_NONE,
1572 object,
1573 0,
1574 FALSE,
1575 (VM_PROT_READ | VM_PROT_WRITE),
1576 (VM_PROT_READ | VM_PROT_WRITE),
1577 VM_INHERIT_NONE);
1578 if (kr != KERN_SUCCESS) {
1579 printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1580 vm_object_deallocate(object);
1581 object = VM_OBJECT_NULL;
1582 break;
1583 }
1584
1585 /*
1586 * We'll need another reference to keep the object alive after
1587 * we vm_map_remove() it from the kernel.
1588 */
1589 vm_object_reference(object);
1590
1591 /*
1592 * Zero out the object's pages, so we can't leak data.
1593 */
1594 bzero((void *)kaddr, obj_size);
1595
1596 /*
1597 * Copyin the data from dyld to the new object.
1598 * Then remove the kernel mapping.
1599 */
1600 int copyin_err =
1601 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1602 vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1603 if (copyin_err) {
1604 printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1605 switch (copyin_err) {
1606 case EPERM:
1607 case EACCES:
1608 kr = KERN_PROTECTION_FAILURE;
1609 break;
1610 case EFAULT:
1611 kr = KERN_INVALID_ADDRESS;
1612 break;
1613 default:
1614 kr = KERN_FAILURE;
1615 break;
1616 }
1617 vm_object_deallocate(object);
1618 object = VM_OBJECT_NULL;
1619 break;
1620 }
1621
1622 /*
1623 * Finally map the object into the shared region.
1624 */
1625 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1626 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1627 vmk_flags.vmkf_already = TRUE;
1628 vmk_flags.vmkf_no_copy_on_read = 1;
1629 vmk_flags.vmkf_permanent = shared_region_make_permanent(shared_region,
1630 mappings[0].sms_max_prot);
1631 kr = vm_map_enter(
1632 sr_map,
1633 &target_address,
1634 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1635 0,
1636 VM_FLAGS_FIXED,
1637 vmk_flags,
1638 VM_KERN_MEMORY_NONE,
1639 object,
1640 0,
1641 TRUE,
1642 mappings[0].sms_init_prot & VM_PROT_ALL,
1643 mappings[0].sms_max_prot & VM_PROT_ALL,
1644 VM_INHERIT_DEFAULT);
1645 if (kr != KERN_SUCCESS) {
1646 printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1647 vm_object_deallocate(object);
1648 break;
1649 }
1650
1651 if (mappings[0].sms_address < *sfm_min_address) {
1652 *sfm_min_address = mappings[0].sms_address;
1653 }
1654
1655 if (os_add_overflow(mappings[0].sms_address,
1656 mappings[0].sms_size,
1657 &sfm_end) ||
1658 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1659 mappings[0].sms_address)) {
1660 /* overflow */
1661 kr = KERN_INVALID_ARGUMENT;
1662 break;
1663 }
1664
1665 if (sfm_end > *sfm_max_address) {
1666 *sfm_max_address = sfm_end;
1667 }
1668
1669 continue;
1670 }
1671
1672 /* get the VM object associated with the file to be mapped */
1673 file_object = memory_object_control_to_vm_object(file_control);
1674 assert(file_object);
1675
1676 if (!file_object->object_is_shared_cache) {
1677 vm_object_lock(file_object);
1678 file_object->object_is_shared_cache = true;
1679 vm_object_unlock(file_object);
1680 }
1681
1682 #if CONFIG_SECLUDED_MEMORY
1683 /*
1684 * Camera will need the shared cache, so don't put the pages
1685 * on the secluded queue, assume that's the primary region.
1686 * Also keep DEXT shared cache pages off secluded.
1687 */
1688 if (primary_system_shared_region == NULL ||
1689 primary_system_shared_region == shared_region ||
1690 shared_region->sr_driverkit) {
1691 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1692 }
1693 #endif /* CONFIG_SECLUDED_MEMORY */
1694
1695 /* establish the mappings for that file */
1696 for (i = 0; i < mappings_count; i++) {
1697 SHARED_REGION_TRACE_INFO(
1698 ("shared_region: mapping[%d]: "
1699 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1700 "maxprot:0x%x prot:0x%x\n",
1701 i,
1702 (long long)mappings[i].sms_address,
1703 (long long)mappings[i].sms_size,
1704 (long long)mappings[i].sms_file_offset,
1705 mappings[i].sms_max_prot,
1706 mappings[i].sms_init_prot));
1707
1708 if (mappings[i].sms_address < *sfm_min_address) {
1709 *sfm_min_address = mappings[i].sms_address;
1710 }
1711
1712 if (os_add_overflow(mappings[i].sms_address,
1713 mappings[i].sms_size,
1714 &sfm_end) ||
1715 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1716 mappings[i].sms_address)) {
1717 /* overflow */
1718 kr = KERN_INVALID_ARGUMENT;
1719 break;
1720 }
1721
1722 if (sfm_end > *sfm_max_address) {
1723 *sfm_max_address = sfm_end;
1724 }
1725
1726 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1727 /* zero-filled memory */
1728 map_port = MACH_PORT_NULL;
1729 } else {
1730 /* file-backed memory */
1731 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1732 }
1733
1734 /*
1735 * Remember which mappings need sliding.
1736 */
1737 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1738 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1739 SHARED_REGION_TRACE_INFO(
1740 ("shared_region: mapping[%d]: "
1741 "address:0x%016llx size:0x%016llx "
1742 "offset:0x%016llx "
1743 "maxprot:0x%x prot:0x%x "
1744 "too many mappings to slide...\n",
1745 i,
1746 (long long)mappings[i].sms_address,
1747 (long long)mappings[i].sms_size,
1748 (long long)mappings[i].sms_file_offset,
1749 mappings[i].sms_max_prot,
1750 mappings[i].sms_init_prot));
1751 } else {
1752 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1753 *mappings_to_slide_cnt += 1;
1754 }
1755 }
1756
1757 /* mapping's address is relative to the shared region base */
1758 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1759
1760 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1761 vmk_flags.vmkf_already = TRUE;
1762 /* no copy-on-read for mapped binaries */
1763 vmk_flags.vmkf_no_copy_on_read = 1;
1764 vmk_flags.vmkf_permanent = shared_region_make_permanent(
1765 shared_region,
1766 mappings[i].sms_max_prot);
1767
1768
1769 /* establish that mapping, OK if it's "already" there */
1770 if (map_port == MACH_PORT_NULL) {
1771 /*
1772 * We want to map some anonymous memory in a shared region.
1773 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1774 */
1775 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1776 object = vm_object_allocate(obj_size);
1777 if (object == VM_OBJECT_NULL) {
1778 kr = KERN_RESOURCE_SHORTAGE;
1779 } else {
1780 kr = vm_map_enter(
1781 sr_map,
1782 &target_address,
1783 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1784 0,
1785 VM_FLAGS_FIXED,
1786 vmk_flags,
1787 VM_KERN_MEMORY_NONE,
1788 object,
1789 0,
1790 TRUE,
1791 mappings[i].sms_init_prot & VM_PROT_ALL,
1792 mappings[i].sms_max_prot & VM_PROT_ALL,
1793 VM_INHERIT_DEFAULT);
1794 }
1795 } else {
1796 object = VM_OBJECT_NULL; /* no anonymous memory here */
1797 kr = vm_map_enter_mem_object(
1798 sr_map,
1799 &target_address,
1800 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1801 0,
1802 VM_FLAGS_FIXED,
1803 vmk_flags,
1804 VM_KERN_MEMORY_NONE,
1805 map_port,
1806 mappings[i].sms_file_offset,
1807 TRUE,
1808 mappings[i].sms_init_prot & VM_PROT_ALL,
1809 mappings[i].sms_max_prot & VM_PROT_ALL,
1810 VM_INHERIT_DEFAULT);
1811 }
1812
1813 if (kr == KERN_SUCCESS) {
1814 /*
1815 * Record the first successful mapping(s) in the shared
1816 * region by file. We're protected by "sr_mapping_in_progress"
1817 * here, so no need to lock "shared_region".
1818 *
1819 * Note that if we have an AOT shared cache (ARM) for a
1820 * translated task, then it's always the first file.
1821 * The original "native" (i.e. x86) shared cache is the
1822 * second file.
1823 */
1824
1825 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1826 shared_region->sr_first_mapping = target_address;
1827 }
1828
1829 if (*mappings_to_slide_cnt > 0 &&
1830 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1831 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1832 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1833 }
1834
1835 /*
1836 * Record the lowest writable address in this
1837 * sub map, to log any unexpected unnesting below
1838 * that address (see log_unnest_badness()).
1839 */
1840 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1841 sr_map->is_nested_map &&
1842 (lowest_unnestable_addr == 0 ||
1843 (target_address < lowest_unnestable_addr))) {
1844 lowest_unnestable_addr = target_address;
1845 }
1846 } else {
1847 if (map_port == MACH_PORT_NULL) {
1848 /*
1849 * Get rid of the VM object we just created
1850 * but failed to map.
1851 */
1852 vm_object_deallocate(object);
1853 object = VM_OBJECT_NULL;
1854 }
1855 if (kr == KERN_MEMORY_PRESENT) {
1856 /*
1857 * This exact mapping was already there:
1858 * that's fine.
1859 */
1860 SHARED_REGION_TRACE_INFO(
1861 ("shared_region: mapping[%d]: "
1862 "address:0x%016llx size:0x%016llx "
1863 "offset:0x%016llx "
1864 "maxprot:0x%x prot:0x%x "
1865 "already mapped...\n",
1866 i,
1867 (long long)mappings[i].sms_address,
1868 (long long)mappings[i].sms_size,
1869 (long long)mappings[i].sms_file_offset,
1870 mappings[i].sms_max_prot,
1871 mappings[i].sms_init_prot));
1872 /*
1873 * We didn't establish this mapping ourselves;
1874 * let's reset its size, so that we do not
1875 * attempt to undo it if an error occurs later.
1876 */
1877 mappings[i].sms_size = 0;
1878 kr = KERN_SUCCESS;
1879 } else {
1880 break;
1881 }
1882 }
1883 }
1884
1885 if (kr != KERN_SUCCESS) {
1886 break;
1887 }
1888 }
1889
1890 if (kr != KERN_SUCCESS) {
1891 /* the last mapping we tried (mappings[i]) failed ! */
1892 assert(i < mappings_count);
1893 SHARED_REGION_TRACE_ERROR(
1894 ("shared_region: mapping[%d]: "
1895 "address:0x%016llx size:0x%016llx "
1896 "offset:0x%016llx "
1897 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1898 i,
1899 (long long)mappings[i].sms_address,
1900 (long long)mappings[i].sms_size,
1901 (long long)mappings[i].sms_file_offset,
1902 mappings[i].sms_max_prot,
1903 mappings[i].sms_init_prot,
1904 kr));
1905
1906 /*
1907 * Respect the design of vm_shared_region_undo_mappings
1908 * as we are holding the sr_mapping_in_progress == true here.
1909 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1910 * will be blocked at waiting sr_mapping_in_progress to be false.
1911 */
1912 assert(sr_map != NULL);
1913 /* undo all the previous mappings */
1914 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1915 return kr;
1916 }
1917
1918 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1919 *sr_map_ptr = sr_map;
1920 return KERN_SUCCESS;
1921 }
1922
1923 /* forwared declaration */
1924 __attribute__((noinline))
1925 static void
1926 vm_shared_region_map_file_final(
1927 vm_shared_region_t shared_region,
1928 vm_map_t sr_map,
1929 mach_vm_offset_t sfm_min_address,
1930 mach_vm_offset_t sfm_max_address);
1931
1932 /*
1933 * Establish some mappings of a file in the shared region.
1934 * This is used by "dyld" via the shared_region_map_np() system call
1935 * to populate the shared region with the appropriate shared cache.
1936 *
1937 * One could also call it several times to incrementally load several
1938 * libraries, as long as they do not overlap.
1939 * It will return KERN_SUCCESS if the mappings were successfully established
1940 * or if they were already established identically by another process.
1941 */
1942 __attribute__((noinline))
1943 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1944 vm_shared_region_map_file(
1945 vm_shared_region_t shared_region,
1946 int sr_file_mappings_count,
1947 struct _sr_file_mappings *sr_file_mappings)
1948 {
1949 kern_return_t kr = KERN_SUCCESS;
1950 unsigned int i;
1951 unsigned int mappings_to_slide_cnt = 0;
1952 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1953 mach_vm_offset_t sfm_max_address = 0;
1954 vm_map_t sr_map = NULL;
1955 vm_map_offset_t lowest_unnestable_addr = 0;
1956 unsigned int vmsr_num_slides = 0;
1957 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1958 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1959 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1960 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1961 struct _sr_file_mappings *srfmp;
1962
1963 /*
1964 * Figure out how many of the mappings have slides.
1965 */
1966 for (srfmp = &sr_file_mappings[0];
1967 srfmp < &sr_file_mappings[sr_file_mappings_count];
1968 srfmp++) {
1969 for (i = 0; i < srfmp->mappings_count; ++i) {
1970 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1971 ++vmsr_num_slides;
1972 }
1973 }
1974 }
1975
1976 /* Allocate per slide data structures */
1977 if (vmsr_num_slides > 0) {
1978 slid_mappings =
1979 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
1980 slid_file_controls =
1981 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
1982 mappings_to_slide =
1983 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
1984 }
1985
1986 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
1987 &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
1988 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
1989 if (kr != KERN_SUCCESS) {
1990 vm_shared_region_lock();
1991 goto done;
1992 }
1993 assert(vmsr_num_slides == mappings_to_slide_cnt);
1994
1995 /*
1996 * The call above installed direct mappings to the shared cache file.
1997 * Now we go back and overwrite the mappings that need relocation
1998 * with a special shared region pager.
1999 *
2000 * Note that this does copyin() of data, needed by the pager, which
2001 * the previous code just established mappings for. This is why we
2002 * do it in a separate pass.
2003 */
2004 #if __has_feature(ptrauth_calls)
2005 /*
2006 * need to allocate storage needed for any sr_auth_sections
2007 */
2008 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2009 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2010 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2011 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2012 ++shared_region->sr_num_auth_section;
2013 }
2014 }
2015 if (shared_region->sr_num_auth_section > 0) {
2016 shared_region->sr_auth_section =
2017 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2018 Z_WAITOK | Z_ZERO);
2019 }
2020 #endif /* __has_feature(ptrauth_calls) */
2021 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2022 kr = vm_shared_region_slide(shared_region->sr_slide,
2023 mappings_to_slide[i]->sms_file_offset,
2024 mappings_to_slide[i]->sms_size,
2025 mappings_to_slide[i]->sms_slide_start,
2026 mappings_to_slide[i]->sms_slide_size,
2027 slid_mappings[i],
2028 slid_file_controls[i],
2029 mappings_to_slide[i]->sms_max_prot);
2030 if (kr != KERN_SUCCESS) {
2031 SHARED_REGION_TRACE_ERROR(
2032 ("shared_region: region_slide("
2033 "slide:0x%x start:0x%016llx "
2034 "size:0x%016llx) failed 0x%x\n",
2035 shared_region->sr_slide,
2036 (long long)mappings_to_slide[i]->sms_slide_start,
2037 (long long)mappings_to_slide[i]->sms_slide_size,
2038 kr));
2039 vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2040 &sr_file_mappings[0],
2041 &sr_file_mappings[sr_file_mappings_count - 1],
2042 sr_file_mappings_count);
2043 vm_shared_region_lock();
2044 goto done;
2045 }
2046 }
2047
2048 assert(kr == KERN_SUCCESS);
2049
2050 /* adjust the map's "lowest_unnestable_start" */
2051 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2052 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2053 vm_map_lock(sr_map);
2054 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2055 vm_map_unlock(sr_map);
2056 }
2057
2058 vm_shared_region_lock();
2059 assert(shared_region->sr_ref_count > 0);
2060 assert(shared_region->sr_mapping_in_progress);
2061
2062 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2063
2064 done:
2065 /*
2066 * We're done working on that shared region.
2067 * Wake up any waiting threads.
2068 */
2069 shared_region->sr_mapping_in_progress = FALSE;
2070 thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2071 vm_shared_region_unlock();
2072
2073 #if __has_feature(ptrauth_calls)
2074 if (kr == KERN_SUCCESS) {
2075 /*
2076 * Since authenticated mappings were just added to the shared region,
2077 * go back and remap them into private mappings for this task.
2078 */
2079 kr = vm_shared_region_auth_remap(shared_region);
2080 }
2081 #endif /* __has_feature(ptrauth_calls) */
2082
2083 /* Cache shared region info needed for telemetry in the task */
2084 task_t task;
2085 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2086 mach_vm_offset_t start_address;
2087 (void)vm_shared_region_start_address(shared_region, &start_address, task);
2088 }
2089
2090 SHARED_REGION_TRACE_DEBUG(
2091 ("shared_region: map(%p) <- 0x%x \n",
2092 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2093 if (vmsr_num_slides > 0) {
2094 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2095 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2096 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2097 mappings_to_slide);
2098 }
2099 return kr;
2100 }
2101
2102 /*
2103 * Final part of vm_shared_region_map_file().
2104 * Kept in separate function to avoid blowing out the stack.
2105 */
2106 __attribute__((noinline))
2107 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map,mach_vm_offset_t sfm_min_address,mach_vm_offset_t sfm_max_address)2108 vm_shared_region_map_file_final(
2109 vm_shared_region_t shared_region,
2110 vm_map_t sr_map,
2111 mach_vm_offset_t sfm_min_address,
2112 mach_vm_offset_t sfm_max_address)
2113 {
2114 struct _dyld_cache_header sr_cache_header;
2115 int error;
2116 size_t image_array_length;
2117 struct _dyld_cache_image_text_info *sr_image_layout;
2118 boolean_t locally_built = FALSE;
2119
2120
2121 /*
2122 * copy in the shared region UUID to the shared region structure.
2123 * we do this indirectly by first copying in the shared cache header
2124 * and then copying the UUID from there because we'll need to look
2125 * at other content from the shared cache header.
2126 */
2127 if (!shared_region->sr_uuid_copied) {
2128 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2129 (char *)&sr_cache_header,
2130 sizeof(sr_cache_header));
2131 if (error == 0) {
2132 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2133 shared_region->sr_uuid_copied = TRUE;
2134 locally_built = sr_cache_header.locallyBuiltCache;
2135 } else {
2136 #if DEVELOPMENT || DEBUG
2137 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2138 "offset:0 size:0x%016llx) failed with %d\n",
2139 (long long)shared_region->sr_base_address,
2140 (long long)shared_region->sr_first_mapping,
2141 (long long)sizeof(sr_cache_header),
2142 error);
2143 #endif /* DEVELOPMENT || DEBUG */
2144 shared_region->sr_uuid_copied = FALSE;
2145 }
2146 }
2147
2148 /*
2149 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2150 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2151 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2152 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2153 * let the most recent one win.
2154 *
2155 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2156 */
2157 bool is_init_task = (task_pid(current_task()) == 1);
2158 if (shared_region->sr_uuid_copied && is_init_task) {
2159 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2160 if (locally_built) {
2161 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2162 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2163 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2164 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2165 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2166 if (error == 0) {
2167 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2168 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2169 }
2170 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2171 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2172 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2173 sizeof(shared_region->sr_images[index].imageUUID));
2174 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2175 }
2176
2177 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2178 } else {
2179 #if DEVELOPMENT || DEBUG
2180 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2181 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2182 (long long)shared_region->sr_base_address,
2183 (long long)shared_region->sr_first_mapping,
2184 (long long)sr_cache_header.imagesTextOffset,
2185 (long long)image_array_length,
2186 error);
2187 #endif /* DEVELOPMENT || DEBUG */
2188 }
2189 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2190 kfree_data(sr_image_layout, image_array_length);
2191 sr_image_layout = NULL;
2192 }
2193 primary_system_shared_region = shared_region;
2194 }
2195
2196 /*
2197 * If we succeeded, we know the bounds of the shared region.
2198 * Trim our pmaps to only cover this range (if applicable to
2199 * this platform).
2200 */
2201 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2202 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2203 }
2204 }
2205
2206 /*
2207 * Retrieve a task's shared region and grab an extra reference to
2208 * make sure it doesn't disappear while the caller is using it.
2209 * The caller is responsible for consuming that extra reference if
2210 * necessary.
2211 *
2212 * This also tries to trim the pmap for the shared region.
2213 */
2214 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2215 vm_shared_region_trim_and_get(task_t task)
2216 {
2217 vm_shared_region_t shared_region;
2218 ipc_port_t sr_handle;
2219 vm_named_entry_t sr_mem_entry;
2220 vm_map_t sr_map;
2221
2222 /* Get the shared region and the map. */
2223 shared_region = vm_shared_region_get(task);
2224 if (shared_region == NULL) {
2225 return NULL;
2226 }
2227
2228 sr_handle = shared_region->sr_mem_entry;
2229 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2230 sr_map = sr_mem_entry->backing.map;
2231
2232 /* Trim the pmap if possible. */
2233 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2234 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2235 }
2236
2237 return shared_region;
2238 }
2239
2240 /*
2241 * Enter the appropriate shared region into "map" for "task".
2242 * This involves looking up the shared region (and possibly creating a new
2243 * one) for the desired environment, then mapping the VM sub map into the
2244 * task's VM "map", with the appropriate level of pmap-nesting.
2245 */
2246 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2247 vm_shared_region_enter(
2248 struct _vm_map *map,
2249 struct task *task,
2250 boolean_t is_64bit,
2251 void *fsroot,
2252 cpu_type_t cpu,
2253 cpu_subtype_t cpu_subtype,
2254 boolean_t reslide,
2255 boolean_t is_driverkit,
2256 uint32_t rsr_version)
2257 {
2258 kern_return_t kr;
2259 vm_shared_region_t shared_region;
2260 vm_map_offset_t sr_address, sr_offset, target_address;
2261 vm_map_size_t sr_size, mapping_size;
2262 vm_map_offset_t sr_pmap_nesting_start;
2263 vm_map_size_t sr_pmap_nesting_size;
2264 ipc_port_t sr_handle;
2265 vm_prot_t cur_prot, max_prot;
2266 vm_map_kernel_flags_t vmk_flags;
2267
2268 SHARED_REGION_TRACE_DEBUG(
2269 ("shared_region: -> "
2270 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2271 (void *)VM_KERNEL_ADDRPERM(map),
2272 (void *)VM_KERNEL_ADDRPERM(task),
2273 (void *)VM_KERNEL_ADDRPERM(fsroot),
2274 cpu, cpu_subtype, is_64bit, is_driverkit));
2275
2276 /* lookup (create if needed) the shared region for this environment */
2277 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2278 if (shared_region == NULL) {
2279 /* this should not happen ! */
2280 SHARED_REGION_TRACE_ERROR(
2281 ("shared_region: -> "
2282 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2283 "lookup failed !\n",
2284 (void *)VM_KERNEL_ADDRPERM(map),
2285 (void *)VM_KERNEL_ADDRPERM(task),
2286 (void *)VM_KERNEL_ADDRPERM(fsroot),
2287 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2288 //panic("shared_region_enter: lookup failed");
2289 return KERN_FAILURE;
2290 }
2291
2292 kr = KERN_SUCCESS;
2293 /* no need to lock since this data is never modified */
2294 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2295 sr_size = (vm_map_size_t)shared_region->sr_size;
2296 sr_handle = shared_region->sr_mem_entry;
2297 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2298 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2299 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2300
2301 cur_prot = VM_PROT_READ;
2302 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2303 /*
2304 * XXX BINARY COMPATIBILITY
2305 * java6 apparently needs to modify some code in the
2306 * dyld shared cache and needs to be allowed to add
2307 * write access...
2308 */
2309 max_prot = VM_PROT_ALL;
2310 } else {
2311 max_prot = VM_PROT_READ;
2312 /* make it "permanent" to protect against re-mappings */
2313 vmk_flags.vmkf_permanent = true;
2314 }
2315
2316 /*
2317 * Start mapping the shared region's VM sub map into the task's VM map.
2318 */
2319 sr_offset = 0;
2320
2321 if (sr_pmap_nesting_start > sr_address) {
2322 /* we need to map a range without pmap-nesting first */
2323 target_address = sr_address;
2324 mapping_size = sr_pmap_nesting_start - sr_address;
2325 kr = vm_map_enter_mem_object(
2326 map,
2327 &target_address,
2328 mapping_size,
2329 0,
2330 VM_FLAGS_FIXED,
2331 vmk_flags,
2332 VM_KERN_MEMORY_NONE,
2333 sr_handle,
2334 sr_offset,
2335 TRUE,
2336 cur_prot,
2337 max_prot,
2338 VM_INHERIT_SHARE);
2339 if (kr != KERN_SUCCESS) {
2340 SHARED_REGION_TRACE_ERROR(
2341 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2342 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2343 (void *)VM_KERNEL_ADDRPERM(map),
2344 (void *)VM_KERNEL_ADDRPERM(task),
2345 (void *)VM_KERNEL_ADDRPERM(fsroot),
2346 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2347 (long long)target_address,
2348 (long long)mapping_size,
2349 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2350 goto done;
2351 }
2352 SHARED_REGION_TRACE_DEBUG(
2353 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2354 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2355 (void *)VM_KERNEL_ADDRPERM(map),
2356 (void *)VM_KERNEL_ADDRPERM(task),
2357 (void *)VM_KERNEL_ADDRPERM(fsroot),
2358 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2359 (long long)target_address, (long long)mapping_size,
2360 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2361 sr_offset += mapping_size;
2362 sr_size -= mapping_size;
2363 }
2364
2365 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2366 vmk_flags.vmkf_nested_pmap = TRUE;
2367
2368 /*
2369 * Use pmap-nesting to map the majority of the shared region into the task's
2370 * VM space. Very rarely will architectures have a shared region that isn't
2371 * the same size as the pmap-nesting region, or start at a different address
2372 * than the pmap-nesting region, so this code will map the entirety of the
2373 * shared region for most architectures.
2374 */
2375 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2376 target_address = sr_pmap_nesting_start;
2377 kr = vm_map_enter_mem_object(
2378 map,
2379 &target_address,
2380 sr_pmap_nesting_size,
2381 0,
2382 VM_FLAGS_FIXED,
2383 vmk_flags,
2384 VM_MEMORY_SHARED_PMAP,
2385 sr_handle,
2386 sr_offset,
2387 TRUE,
2388 cur_prot,
2389 max_prot,
2390 VM_INHERIT_SHARE);
2391 if (kr != KERN_SUCCESS) {
2392 SHARED_REGION_TRACE_ERROR(
2393 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2394 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2395 (void *)VM_KERNEL_ADDRPERM(map),
2396 (void *)VM_KERNEL_ADDRPERM(task),
2397 (void *)VM_KERNEL_ADDRPERM(fsroot),
2398 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2399 (long long)target_address,
2400 (long long)sr_pmap_nesting_size,
2401 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2402 goto done;
2403 }
2404 SHARED_REGION_TRACE_DEBUG(
2405 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2406 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2407 (void *)VM_KERNEL_ADDRPERM(map),
2408 (void *)VM_KERNEL_ADDRPERM(task),
2409 (void *)VM_KERNEL_ADDRPERM(fsroot),
2410 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2411 (long long)target_address, (long long)sr_pmap_nesting_size,
2412 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2413
2414 sr_offset += sr_pmap_nesting_size;
2415 sr_size -= sr_pmap_nesting_size;
2416
2417 if (sr_size > 0) {
2418 /* and there's some left to be mapped without pmap-nesting */
2419 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2420 target_address = sr_address + sr_offset;
2421 mapping_size = sr_size;
2422 kr = vm_map_enter_mem_object(
2423 map,
2424 &target_address,
2425 mapping_size,
2426 0,
2427 VM_FLAGS_FIXED,
2428 VM_MAP_KERNEL_FLAGS_NONE,
2429 VM_KERN_MEMORY_NONE,
2430 sr_handle,
2431 sr_offset,
2432 TRUE,
2433 cur_prot,
2434 max_prot,
2435 VM_INHERIT_SHARE);
2436 if (kr != KERN_SUCCESS) {
2437 SHARED_REGION_TRACE_ERROR(
2438 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2439 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2440 (void *)VM_KERNEL_ADDRPERM(map),
2441 (void *)VM_KERNEL_ADDRPERM(task),
2442 (void *)VM_KERNEL_ADDRPERM(fsroot),
2443 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2444 (long long)target_address,
2445 (long long)mapping_size,
2446 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2447 goto done;
2448 }
2449 SHARED_REGION_TRACE_DEBUG(
2450 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2451 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2452 (void *)VM_KERNEL_ADDRPERM(map),
2453 (void *)VM_KERNEL_ADDRPERM(task),
2454 (void *)VM_KERNEL_ADDRPERM(fsroot),
2455 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2456 (long long)target_address, (long long)mapping_size,
2457 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2458 sr_offset += mapping_size;
2459 sr_size -= mapping_size;
2460 }
2461 assert(sr_size == 0);
2462
2463 done:
2464 if (kr == KERN_SUCCESS) {
2465 /* let the task use that shared region */
2466 vm_shared_region_set(task, shared_region);
2467 } else {
2468 /* drop our reference since we're not using it */
2469 vm_shared_region_deallocate(shared_region);
2470 vm_shared_region_set(task, NULL);
2471 }
2472
2473 SHARED_REGION_TRACE_DEBUG(
2474 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2475 (void *)VM_KERNEL_ADDRPERM(map),
2476 (void *)VM_KERNEL_ADDRPERM(task),
2477 (void *)VM_KERNEL_ADDRPERM(fsroot),
2478 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2479 kr));
2480 return kr;
2481 }
2482
2483 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2484 vm_shared_region_remove(
2485 task_t task,
2486 vm_shared_region_t sr)
2487 {
2488 vm_map_t map;
2489 mach_vm_offset_t start;
2490 mach_vm_size_t size;
2491 vm_tag_t tag;
2492 vm_map_kernel_flags_t vmk_flags;
2493 kern_return_t kr;
2494
2495 if (sr == NULL) {
2496 return;
2497 }
2498 map = get_task_map(task);
2499 start = sr->sr_base_address;
2500 size = sr->sr_size;
2501
2502 tag = VM_MEMORY_DYLD;
2503 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2504 vmk_flags.vmkf_overwrite_immutable = true;
2505 vmk_flags.vmkf_range_id = VM_MAP_RANGE_ID(map, tag);
2506
2507 kr = mach_vm_map_kernel(map,
2508 &start,
2509 size,
2510 0, /* mask */
2511 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
2512 vmk_flags,
2513 tag,
2514 MACH_PORT_NULL,
2515 0,
2516 FALSE, /* copy */
2517 VM_PROT_NONE,
2518 VM_PROT_NONE,
2519 VM_INHERIT_DEFAULT);
2520 if (kr != KERN_SUCCESS) {
2521 printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2522 }
2523 }
2524
2525 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2526
2527 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2528 vm_shared_region_sliding_valid(uint32_t slide)
2529 {
2530 kern_return_t kr = KERN_SUCCESS;
2531 vm_shared_region_t sr = vm_shared_region_get(current_task());
2532
2533 /* No region yet? we're fine. */
2534 if (sr == NULL) {
2535 return kr;
2536 }
2537
2538 if (sr->sr_slide != 0 && slide != 0) {
2539 if (slide == sr->sr_slide) {
2540 /*
2541 * Request for sliding when we've
2542 * already done it with exactly the
2543 * same slide value before.
2544 * This isn't wrong technically but
2545 * we don't want to slide again and
2546 * so we return this value.
2547 */
2548 kr = KERN_INVALID_ARGUMENT;
2549 } else {
2550 printf("Mismatched shared region slide\n");
2551 kr = KERN_FAILURE;
2552 }
2553 }
2554 vm_shared_region_deallocate(sr);
2555 return kr;
2556 }
2557
2558 /*
2559 * Actually create (really overwrite) the mapping to part of the shared cache which
2560 * undergoes relocation. This routine reads in the relocation info from dyld and
2561 * verifies it. It then creates a (or finds a matching) shared region pager which
2562 * handles the actual modification of the page contents and installs the mapping
2563 * using that pager.
2564 */
2565 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2566 vm_shared_region_slide_mapping(
2567 vm_shared_region_t sr,
2568 user_addr_t slide_info_addr,
2569 mach_vm_size_t slide_info_size,
2570 mach_vm_offset_t start,
2571 mach_vm_size_t size,
2572 mach_vm_offset_t slid_mapping,
2573 uint32_t slide,
2574 memory_object_control_t sr_file_control,
2575 vm_prot_t prot)
2576 {
2577 kern_return_t kr;
2578 vm_object_t object = VM_OBJECT_NULL;
2579 vm_shared_region_slide_info_t si = NULL;
2580 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2581 struct vm_map_entry tmp_entry_store;
2582 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2583 vm_map_t sr_map;
2584 int vm_flags;
2585 vm_map_kernel_flags_t vmk_flags;
2586 vm_map_offset_t map_addr;
2587 void *slide_info_entry = NULL;
2588 int error;
2589
2590 assert(sr->sr_slide_in_progress);
2591
2592 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2593 return KERN_INVALID_ARGUMENT;
2594 }
2595
2596 /*
2597 * Copy in and verify the relocation information.
2598 */
2599 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2600 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2601 return KERN_FAILURE;
2602 }
2603 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2604 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2605 return KERN_FAILURE;
2606 }
2607
2608 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2609 if (slide_info_entry == NULL) {
2610 return KERN_RESOURCE_SHORTAGE;
2611 }
2612 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2613 if (error) {
2614 printf("copyin of slide_info failed\n");
2615 kr = KERN_INVALID_ADDRESS;
2616 goto done;
2617 }
2618
2619 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2620 printf("Sanity Check failed for slide_info\n");
2621 goto done;
2622 }
2623
2624 /*
2625 * Allocate and fill in a vm_shared_region_slide_info.
2626 * This will either be used by a new pager, or used to find
2627 * a pre-existing matching pager.
2628 */
2629 object = memory_object_control_to_vm_object(sr_file_control);
2630 if (object == VM_OBJECT_NULL || object->internal) {
2631 object = VM_OBJECT_NULL;
2632 kr = KERN_INVALID_ADDRESS;
2633 goto done;
2634 }
2635
2636 si = kalloc_type(struct vm_shared_region_slide_info,
2637 Z_WAITOK | Z_NOFAIL);
2638 vm_object_lock(object);
2639
2640 vm_object_reference_locked(object); /* for si->slide_object */
2641 object->object_is_shared_cache = TRUE;
2642 vm_object_unlock(object);
2643
2644 si->si_slide_info_entry = slide_info_entry;
2645 si->si_slide_info_size = slide_info_size;
2646
2647 assert(slid_mapping != (mach_vm_offset_t) -1);
2648 si->si_slid_address = slid_mapping + sr->sr_base_address;
2649 si->si_slide_object = object;
2650 si->si_start = start;
2651 si->si_end = si->si_start + size;
2652 si->si_slide = slide;
2653 #if __has_feature(ptrauth_calls)
2654 /*
2655 * If there is authenticated pointer data in this slid mapping,
2656 * then just add the information needed to create new pagers for
2657 * different shared_region_id's later.
2658 */
2659 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2660 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2661 !(prot & VM_PROT_NOAUTH)) {
2662 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2663 printf("Too many auth/private sections for shared region!!\n");
2664 kr = KERN_INVALID_ARGUMENT;
2665 goto done;
2666 }
2667 si->si_ptrauth = TRUE;
2668 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2669 /*
2670 * Remember the shared region, since that's where we'll
2671 * stash this info for all auth pagers to share. Each pager
2672 * will need to take a reference to it.
2673 */
2674 si->si_shared_region = sr;
2675 kr = KERN_SUCCESS;
2676 goto done;
2677 }
2678 si->si_shared_region = NULL;
2679 si->si_ptrauth = FALSE;
2680 #else /* __has_feature(ptrauth_calls) */
2681 (void)prot; /* silence unused warning */
2682 #endif /* __has_feature(ptrauth_calls) */
2683
2684 /*
2685 * find the pre-existing shared region's map entry to slide
2686 */
2687 sr_map = vm_shared_region_vm_map(sr);
2688 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2689 if (kr != KERN_SUCCESS) {
2690 goto done;
2691 }
2692 tmp_entry = &tmp_entry_store;
2693
2694 /*
2695 * The object must exactly cover the region to slide.
2696 */
2697 assert(VME_OFFSET(tmp_entry) == start);
2698 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2699
2700 /* create a "shared_region" sliding pager */
2701 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2702 if (sr_pager == MEMORY_OBJECT_NULL) {
2703 kr = KERN_RESOURCE_SHORTAGE;
2704 goto done;
2705 }
2706
2707 #if CONFIG_SECLUDED_MEMORY
2708 /*
2709 * The shared region pagers used by camera or DEXT should have
2710 * pagers that won't go on the secluded queue.
2711 */
2712 if (primary_system_shared_region == NULL ||
2713 primary_system_shared_region == sr ||
2714 sr->sr_driverkit) {
2715 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2716 }
2717 #endif /* CONFIG_SECLUDED_MEMORY */
2718
2719 /* map that pager over the portion of the mapping that needs sliding */
2720 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
2721 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2722 vmk_flags.vmkf_overwrite_immutable = TRUE;
2723 map_addr = tmp_entry->vme_start;
2724 vmk_flags.vmkf_permanent = shared_region_make_permanent(sr,
2725 tmp_entry->max_protection);
2726 kr = vm_map_enter_mem_object(sr_map,
2727 &map_addr,
2728 (tmp_entry->vme_end - tmp_entry->vme_start),
2729 (mach_vm_offset_t) 0,
2730 vm_flags,
2731 vmk_flags,
2732 VM_KERN_MEMORY_NONE,
2733 (ipc_port_t)(uintptr_t) sr_pager,
2734 0,
2735 TRUE,
2736 tmp_entry->protection,
2737 tmp_entry->max_protection,
2738 tmp_entry->inheritance);
2739 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2740 assertf(map_addr == tmp_entry->vme_start,
2741 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2742 (uint64_t)map_addr,
2743 (uint64_t) tmp_entry->vme_start,
2744 tmp_entry);
2745
2746 /* success! */
2747 kr = KERN_SUCCESS;
2748
2749 done:
2750 if (sr_pager != NULL) {
2751 /*
2752 * Release the sr_pager reference obtained by shared_region_pager_setup().
2753 * The mapping, if it succeeded, is now holding a reference on the memory object.
2754 */
2755 memory_object_deallocate(sr_pager);
2756 sr_pager = MEMORY_OBJECT_NULL;
2757 }
2758 if (tmp_entry != NULL) {
2759 /* release extra ref on tmp_entry's VM object */
2760 vm_object_deallocate(VME_OBJECT(tmp_entry));
2761 tmp_entry = VM_MAP_ENTRY_NULL;
2762 }
2763
2764 if (kr != KERN_SUCCESS) {
2765 /* cleanup */
2766 if (si != NULL) {
2767 if (si->si_slide_object) {
2768 vm_object_deallocate(si->si_slide_object);
2769 si->si_slide_object = VM_OBJECT_NULL;
2770 }
2771 kfree_type(struct vm_shared_region_slide_info, si);
2772 si = NULL;
2773 }
2774 if (slide_info_entry != NULL) {
2775 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2776 slide_info_entry = NULL;
2777 }
2778 }
2779 return kr;
2780 }
2781
2782 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2783 vm_shared_region_slide_sanity_check_v1(
2784 vm_shared_region_slide_info_entry_v1_t s_info)
2785 {
2786 uint32_t pageIndex = 0;
2787 uint16_t entryIndex = 0;
2788 uint16_t *toc = NULL;
2789
2790 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2791 for (; pageIndex < s_info->toc_count; pageIndex++) {
2792 entryIndex = (uint16_t)(toc[pageIndex]);
2793
2794 if (entryIndex >= s_info->entry_count) {
2795 printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2796 return KERN_FAILURE;
2797 }
2798 }
2799 return KERN_SUCCESS;
2800 }
2801
2802 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2803 vm_shared_region_slide_sanity_check_v2(
2804 vm_shared_region_slide_info_entry_v2_t s_info,
2805 mach_vm_size_t slide_info_size)
2806 {
2807 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2808 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2809 return KERN_FAILURE;
2810 }
2811 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2812 return KERN_FAILURE;
2813 }
2814
2815 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2816
2817 uint32_t page_starts_count = s_info->page_starts_count;
2818 uint32_t page_extras_count = s_info->page_extras_count;
2819 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2820 if (num_trailing_entries < page_starts_count) {
2821 return KERN_FAILURE;
2822 }
2823
2824 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2825 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2826 if (trailing_size >> 1 != num_trailing_entries) {
2827 return KERN_FAILURE;
2828 }
2829
2830 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2831 if (required_size < sizeof(*s_info)) {
2832 return KERN_FAILURE;
2833 }
2834
2835 if (required_size > slide_info_size) {
2836 return KERN_FAILURE;
2837 }
2838
2839 return KERN_SUCCESS;
2840 }
2841
2842 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2843 vm_shared_region_slide_sanity_check_v3(
2844 vm_shared_region_slide_info_entry_v3_t s_info,
2845 mach_vm_size_t slide_info_size)
2846 {
2847 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2848 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2849 return KERN_FAILURE;
2850 }
2851 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2852 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2853 return KERN_FAILURE;
2854 }
2855
2856 uint32_t page_starts_count = s_info->page_starts_count;
2857 mach_vm_size_t num_trailing_entries = page_starts_count;
2858 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2859 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2860 if (required_size < sizeof(*s_info)) {
2861 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2862 return KERN_FAILURE;
2863 }
2864
2865 if (required_size > slide_info_size) {
2866 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2867 return KERN_FAILURE;
2868 }
2869
2870 return KERN_SUCCESS;
2871 }
2872
2873 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2874 vm_shared_region_slide_sanity_check_v4(
2875 vm_shared_region_slide_info_entry_v4_t s_info,
2876 mach_vm_size_t slide_info_size)
2877 {
2878 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2879 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2880 return KERN_FAILURE;
2881 }
2882 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2883 return KERN_FAILURE;
2884 }
2885
2886 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2887
2888 uint32_t page_starts_count = s_info->page_starts_count;
2889 uint32_t page_extras_count = s_info->page_extras_count;
2890 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2891 if (num_trailing_entries < page_starts_count) {
2892 return KERN_FAILURE;
2893 }
2894
2895 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2896 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2897 if (trailing_size >> 1 != num_trailing_entries) {
2898 return KERN_FAILURE;
2899 }
2900
2901 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2902 if (required_size < sizeof(*s_info)) {
2903 return KERN_FAILURE;
2904 }
2905
2906 if (required_size > slide_info_size) {
2907 return KERN_FAILURE;
2908 }
2909
2910 return KERN_SUCCESS;
2911 }
2912
2913
2914 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2915 vm_shared_region_slide_sanity_check(
2916 vm_shared_region_slide_info_entry_t s_info,
2917 mach_vm_size_t s_info_size)
2918 {
2919 kern_return_t kr;
2920
2921 switch (s_info->version) {
2922 case 1:
2923 kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2924 break;
2925 case 2:
2926 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2927 break;
2928 case 3:
2929 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2930 break;
2931 case 4:
2932 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2933 break;
2934 default:
2935 kr = KERN_FAILURE;
2936 }
2937 return kr;
2938 }
2939
2940 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2941 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2942 {
2943 uint16_t *toc = NULL;
2944 slide_info_entry_toc_t bitmap = NULL;
2945 uint32_t i = 0, j = 0;
2946 uint8_t b = 0;
2947 uint32_t slide = si->si_slide;
2948 int is_64 = task_has_64Bit_addr(current_task());
2949
2950 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2951 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2952
2953 if (pageIndex >= s_info->toc_count) {
2954 printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2955 } else {
2956 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
2957 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2958
2959 if (entryIndex >= s_info->entry_count) {
2960 printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
2961 } else {
2962 bitmap = &slide_info_entries[entryIndex];
2963
2964 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
2965 b = bitmap->entry[i];
2966 if (b != 0) {
2967 for (j = 0; j < 8; ++j) {
2968 if (b & (1 << j)) {
2969 uint32_t *ptr_to_slide;
2970 uint32_t old_value;
2971
2972 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
2973 old_value = *ptr_to_slide;
2974 *ptr_to_slide += slide;
2975 if (is_64 && *ptr_to_slide < old_value) {
2976 /*
2977 * We just slid the low 32 bits of a 64-bit pointer
2978 * and it looks like there should have been a carry-over
2979 * to the upper 32 bits.
2980 * The sliding failed...
2981 */
2982 printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
2983 i, j, b, slide, old_value, *ptr_to_slide);
2984 return KERN_FAILURE;
2985 }
2986 }
2987 }
2988 }
2989 }
2990 }
2991 }
2992
2993 return KERN_SUCCESS;
2994 }
2995
2996 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)2997 rebase_chain_32(
2998 uint8_t *page_content,
2999 uint16_t start_offset,
3000 uint32_t slide_amount,
3001 vm_shared_region_slide_info_entry_v2_t s_info)
3002 {
3003 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3004
3005 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3006 const uint32_t value_mask = ~delta_mask;
3007 const uint32_t value_add = (uint32_t)(s_info->value_add);
3008 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3009
3010 uint32_t page_offset = start_offset;
3011 uint32_t delta = 1;
3012
3013 while (delta != 0 && page_offset <= last_page_offset) {
3014 uint8_t *loc;
3015 uint32_t value;
3016
3017 loc = page_content + page_offset;
3018 memcpy(&value, loc, sizeof(value));
3019 delta = (value & delta_mask) >> delta_shift;
3020 value &= value_mask;
3021
3022 if (value != 0) {
3023 value += value_add;
3024 value += slide_amount;
3025 }
3026 memcpy(loc, &value, sizeof(value));
3027 page_offset += delta;
3028 }
3029
3030 /* If the offset went past the end of the page, then the slide data is invalid. */
3031 if (page_offset > last_page_offset) {
3032 return KERN_FAILURE;
3033 }
3034 return KERN_SUCCESS;
3035 }
3036
3037 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3038 rebase_chain_64(
3039 uint8_t *page_content,
3040 uint16_t start_offset,
3041 uint32_t slide_amount,
3042 vm_shared_region_slide_info_entry_v2_t s_info)
3043 {
3044 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3045
3046 const uint64_t delta_mask = s_info->delta_mask;
3047 const uint64_t value_mask = ~delta_mask;
3048 const uint64_t value_add = s_info->value_add;
3049 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3050
3051 uint32_t page_offset = start_offset;
3052 uint32_t delta = 1;
3053
3054 while (delta != 0 && page_offset <= last_page_offset) {
3055 uint8_t *loc;
3056 uint64_t value;
3057
3058 loc = page_content + page_offset;
3059 memcpy(&value, loc, sizeof(value));
3060 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3061 value &= value_mask;
3062
3063 if (value != 0) {
3064 value += value_add;
3065 value += slide_amount;
3066 }
3067 memcpy(loc, &value, sizeof(value));
3068 page_offset += delta;
3069 }
3070
3071 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3072 /* If a pointer straddling the page boundary needs to be adjusted, then
3073 * add the slide to the lower half. The encoding guarantees that the upper
3074 * half on the next page will need no masking.
3075 *
3076 * This assumes a little-endian machine and that the region being slid
3077 * never crosses a 4 GB boundary. */
3078
3079 uint8_t *loc = page_content + page_offset;
3080 uint32_t value;
3081
3082 memcpy(&value, loc, sizeof(value));
3083 value += slide_amount;
3084 memcpy(loc, &value, sizeof(value));
3085 } else if (page_offset > last_page_offset) {
3086 return KERN_FAILURE;
3087 }
3088
3089 return KERN_SUCCESS;
3090 }
3091
3092 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3093 rebase_chain(
3094 boolean_t is_64,
3095 uint32_t pageIndex,
3096 uint8_t *page_content,
3097 uint16_t start_offset,
3098 uint32_t slide_amount,
3099 vm_shared_region_slide_info_entry_v2_t s_info)
3100 {
3101 kern_return_t kr;
3102 if (is_64) {
3103 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3104 } else {
3105 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3106 }
3107
3108 if (kr != KERN_SUCCESS) {
3109 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3110 pageIndex, start_offset, slide_amount);
3111 }
3112 return kr;
3113 }
3114
3115 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3116 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3117 {
3118 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3119 const uint32_t slide_amount = si->si_slide;
3120
3121 /* The high bits of the delta_mask field are nonzero precisely when the shared
3122 * cache is 64-bit. */
3123 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3124
3125 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3126 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3127
3128 uint8_t *page_content = (uint8_t *)vaddr;
3129 uint16_t page_entry;
3130
3131 if (pageIndex >= s_info->page_starts_count) {
3132 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3133 pageIndex, s_info->page_starts_count);
3134 return KERN_FAILURE;
3135 }
3136 page_entry = page_starts[pageIndex];
3137
3138 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3139 return KERN_SUCCESS;
3140 }
3141
3142 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3143 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3144 uint16_t info;
3145
3146 do {
3147 uint16_t page_start_offset;
3148 kern_return_t kr;
3149
3150 if (chain_index >= s_info->page_extras_count) {
3151 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3152 chain_index, s_info->page_extras_count);
3153 return KERN_FAILURE;
3154 }
3155 info = page_extras[chain_index];
3156 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3157
3158 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3159 if (kr != KERN_SUCCESS) {
3160 return KERN_FAILURE;
3161 }
3162
3163 chain_index++;
3164 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3165 } else {
3166 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3167 kern_return_t kr;
3168
3169 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3170 if (kr != KERN_SUCCESS) {
3171 return KERN_FAILURE;
3172 }
3173 }
3174
3175 return KERN_SUCCESS;
3176 }
3177
3178
3179 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3180 vm_shared_region_slide_page_v3(
3181 vm_shared_region_slide_info_t si,
3182 vm_offset_t vaddr,
3183 __unused mach_vm_offset_t uservaddr,
3184 uint32_t pageIndex,
3185 #if !__has_feature(ptrauth_calls)
3186 __unused
3187 #endif /* !__has_feature(ptrauth_calls) */
3188 uint64_t jop_key)
3189 {
3190 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3191 const uint32_t slide_amount = si->si_slide;
3192
3193 uint8_t *page_content = (uint8_t *)vaddr;
3194 uint16_t page_entry;
3195
3196 if (pageIndex >= s_info->page_starts_count) {
3197 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3198 pageIndex, s_info->page_starts_count);
3199 return KERN_FAILURE;
3200 }
3201 page_entry = s_info->page_starts[pageIndex];
3202
3203 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3204 return KERN_SUCCESS;
3205 }
3206
3207 uint8_t* rebaseLocation = page_content;
3208 uint64_t delta = page_entry;
3209 do {
3210 rebaseLocation += delta;
3211 uint64_t value;
3212 memcpy(&value, rebaseLocation, sizeof(value));
3213 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3214
3215 // A pointer is one of :
3216 // {
3217 // uint64_t pointerValue : 51;
3218 // uint64_t offsetToNextPointer : 11;
3219 // uint64_t isBind : 1 = 0;
3220 // uint64_t authenticated : 1 = 0;
3221 // }
3222 // {
3223 // uint32_t offsetFromSharedCacheBase;
3224 // uint16_t diversityData;
3225 // uint16_t hasAddressDiversity : 1;
3226 // uint16_t hasDKey : 1;
3227 // uint16_t hasBKey : 1;
3228 // uint16_t offsetToNextPointer : 11;
3229 // uint16_t isBind : 1;
3230 // uint16_t authenticated : 1 = 1;
3231 // }
3232
3233 bool isBind = (value & (1ULL << 62)) == 1;
3234 if (isBind) {
3235 return KERN_FAILURE;
3236 }
3237
3238 #if __has_feature(ptrauth_calls)
3239 uint16_t diversity_data = (uint16_t)(value >> 32);
3240 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3241 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3242 #endif /* __has_feature(ptrauth_calls) */
3243 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3244
3245 if (isAuthenticated) {
3246 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3247 value = (value & 0xFFFFFFFF) + slide_amount;
3248 // Add in the offset from the mach_header
3249 const uint64_t value_add = s_info->value_add;
3250 value += value_add;
3251
3252 #if __has_feature(ptrauth_calls)
3253 uint64_t discriminator = diversity_data;
3254 if (hasAddressDiversity) {
3255 // First calculate a new discriminator using the address of where we are trying to store the value
3256 uintptr_t pageOffset = rebaseLocation - page_content;
3257 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3258 }
3259
3260 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3261 /*
3262 * these pointers are used in user mode. disable the kernel key diversification
3263 * so we can sign them for use in user mode.
3264 */
3265 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3266 }
3267 #endif /* __has_feature(ptrauth_calls) */
3268 } else {
3269 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3270 // Regular pointer which needs to fit in 51-bits of value.
3271 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3272 // and the bottom 43-bits to be fit in to 51-bits.
3273 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3274 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3275 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3276 value = targetValue + slide_amount;
3277 }
3278
3279 memcpy(rebaseLocation, &value, sizeof(value));
3280 } while (delta != 0);
3281
3282 return KERN_SUCCESS;
3283 }
3284
3285 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3286 rebase_chainv4(
3287 uint8_t *page_content,
3288 uint16_t start_offset,
3289 uint32_t slide_amount,
3290 vm_shared_region_slide_info_entry_v4_t s_info)
3291 {
3292 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3293
3294 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3295 const uint32_t value_mask = ~delta_mask;
3296 const uint32_t value_add = (uint32_t)(s_info->value_add);
3297 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3298
3299 uint32_t page_offset = start_offset;
3300 uint32_t delta = 1;
3301
3302 while (delta != 0 && page_offset <= last_page_offset) {
3303 uint8_t *loc;
3304 uint32_t value;
3305
3306 loc = page_content + page_offset;
3307 memcpy(&value, loc, sizeof(value));
3308 delta = (value & delta_mask) >> delta_shift;
3309 value &= value_mask;
3310
3311 if ((value & 0xFFFF8000) == 0) {
3312 // small positive non-pointer, use as-is
3313 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3314 // small negative non-pointer
3315 value |= 0xC0000000;
3316 } else {
3317 // pointer that needs rebasing
3318 value += value_add;
3319 value += slide_amount;
3320 }
3321 memcpy(loc, &value, sizeof(value));
3322 page_offset += delta;
3323 }
3324
3325 /* If the offset went past the end of the page, then the slide data is invalid. */
3326 if (page_offset > last_page_offset) {
3327 return KERN_FAILURE;
3328 }
3329 return KERN_SUCCESS;
3330 }
3331
3332 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3333 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3334 {
3335 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3336 const uint32_t slide_amount = si->si_slide;
3337
3338 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3339 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3340
3341 uint8_t *page_content = (uint8_t *)vaddr;
3342 uint16_t page_entry;
3343
3344 if (pageIndex >= s_info->page_starts_count) {
3345 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3346 pageIndex, s_info->page_starts_count);
3347 return KERN_FAILURE;
3348 }
3349 page_entry = page_starts[pageIndex];
3350
3351 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3352 return KERN_SUCCESS;
3353 }
3354
3355 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3356 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3357 uint16_t info;
3358
3359 do {
3360 uint16_t page_start_offset;
3361 kern_return_t kr;
3362
3363 if (chain_index >= s_info->page_extras_count) {
3364 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3365 chain_index, s_info->page_extras_count);
3366 return KERN_FAILURE;
3367 }
3368 info = page_extras[chain_index];
3369 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3370
3371 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3372 if (kr != KERN_SUCCESS) {
3373 return KERN_FAILURE;
3374 }
3375
3376 chain_index++;
3377 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3378 } else {
3379 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3380 kern_return_t kr;
3381
3382 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3383 if (kr != KERN_SUCCESS) {
3384 return KERN_FAILURE;
3385 }
3386 }
3387
3388 return KERN_SUCCESS;
3389 }
3390
3391
3392
3393 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3394 vm_shared_region_slide_page(
3395 vm_shared_region_slide_info_t si,
3396 vm_offset_t vaddr,
3397 mach_vm_offset_t uservaddr,
3398 uint32_t pageIndex,
3399 uint64_t jop_key)
3400 {
3401 switch (si->si_slide_info_entry->version) {
3402 case 1:
3403 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3404 case 2:
3405 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3406 case 3:
3407 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3408 case 4:
3409 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3410 default:
3411 return KERN_FAILURE;
3412 }
3413 }
3414
3415 /******************************************************************************/
3416 /* Comm page support */
3417 /******************************************************************************/
3418
3419 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3420 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3421 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3422 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3423 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3424 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3425
3426 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3427 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3428 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3429 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3430 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3431 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3432
3433 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3434 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3435
3436 #if defined(__i386__) || defined(__x86_64__)
3437 /*
3438 * Create a memory entry, VM submap and pmap for one commpage.
3439 */
3440 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3441 _vm_commpage_init(
3442 ipc_port_t *handlep,
3443 vm_map_size_t size)
3444 {
3445 vm_named_entry_t mem_entry;
3446 vm_map_t new_map;
3447
3448 SHARED_REGION_TRACE_DEBUG(
3449 ("commpage: -> _init(0x%llx)\n",
3450 (long long)size));
3451
3452 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3453 if (new_pmap == NULL) {
3454 panic("_vm_commpage_init: could not allocate pmap");
3455 }
3456 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3457
3458 mem_entry = mach_memory_entry_allocate(handlep);
3459 mem_entry->backing.map = new_map;
3460 mem_entry->internal = TRUE;
3461 mem_entry->is_sub_map = TRUE;
3462 mem_entry->offset = 0;
3463 mem_entry->protection = VM_PROT_ALL;
3464 mem_entry->size = size;
3465
3466 SHARED_REGION_TRACE_DEBUG(
3467 ("commpage: _init(0x%llx) <- %p\n",
3468 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3469 }
3470 #endif
3471
3472
3473 /*
3474 * Initialize the comm text pages at boot time
3475 */
3476 void
vm_commpage_text_init(void)3477 vm_commpage_text_init(void)
3478 {
3479 SHARED_REGION_TRACE_DEBUG(
3480 ("commpage text: ->init()\n"));
3481 #if defined(__i386__) || defined(__x86_64__)
3482 /* create the 32 bit comm text page */
3483 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3484 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3485 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3486 commpage_text32_map = commpage_text32_entry->backing.map;
3487 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3488 /* XXX if (cpu_is_64bit_capable()) ? */
3489 /* create the 64-bit comm page */
3490 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3491 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3492 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3493 commpage_text64_map = commpage_text64_entry->backing.map;
3494 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3495 #endif
3496
3497 commpage_text_populate();
3498
3499 /* populate the routines in here */
3500 SHARED_REGION_TRACE_DEBUG(
3501 ("commpage text: init() <-\n"));
3502 }
3503
3504 /*
3505 * Initialize the comm pages at boot time.
3506 */
3507 void
vm_commpage_init(void)3508 vm_commpage_init(void)
3509 {
3510 SHARED_REGION_TRACE_DEBUG(
3511 ("commpage: -> init()\n"));
3512
3513 #if defined(__i386__) || defined(__x86_64__)
3514 /* create the 32-bit comm page */
3515 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3516 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3517 commpage32_map = commpage32_entry->backing.map;
3518
3519 /* XXX if (cpu_is_64bit_capable()) ? */
3520 /* create the 64-bit comm page */
3521 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3522 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3523 commpage64_map = commpage64_entry->backing.map;
3524
3525 #endif /* __i386__ || __x86_64__ */
3526
3527 /* populate them according to this specific platform */
3528 commpage_populate();
3529 __commpage_setup = 1;
3530 #if XNU_TARGET_OS_OSX
3531 if (__system_power_source == 0) {
3532 post_sys_powersource_internal(0, 1);
3533 }
3534 #endif /* XNU_TARGET_OS_OSX */
3535
3536 SHARED_REGION_TRACE_DEBUG(
3537 ("commpage: init() <-\n"));
3538 }
3539
3540 /*
3541 * Enter the appropriate comm page into the task's address space.
3542 * This is called at exec() time via vm_map_exec().
3543 */
3544 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3545 vm_commpage_enter(
3546 vm_map_t map,
3547 task_t task,
3548 boolean_t is64bit)
3549 {
3550 #if defined(__arm64__)
3551 #pragma unused(is64bit)
3552 (void)task;
3553 (void)map;
3554 pmap_insert_sharedpage(vm_map_pmap(map));
3555 return KERN_SUCCESS;
3556 #else
3557 ipc_port_t commpage_handle, commpage_text_handle;
3558 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3559 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3560 int vm_flags;
3561 vm_map_kernel_flags_t vmk_flags;
3562 kern_return_t kr;
3563
3564 SHARED_REGION_TRACE_DEBUG(
3565 ("commpage: -> enter(%p,%p)\n",
3566 (void *)VM_KERNEL_ADDRPERM(map),
3567 (void *)VM_KERNEL_ADDRPERM(task)));
3568
3569 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3570 /* the comm page is likely to be beyond the actual end of the VM map */
3571 vm_flags = VM_FLAGS_FIXED;
3572 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
3573 vmk_flags.vmkf_beyond_max = TRUE;
3574
3575 /* select the appropriate comm page for this task */
3576 assert(!(is64bit ^ vm_map_is_64bit(map)));
3577 if (is64bit) {
3578 commpage_handle = commpage64_handle;
3579 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3580 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3581 objc_size = _COMM_PAGE64_OBJC_SIZE;
3582 objc_address = _COMM_PAGE64_OBJC_BASE;
3583 commpage_text_handle = commpage_text64_handle;
3584 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3585 } else {
3586 commpage_handle = commpage32_handle;
3587 commpage_address =
3588 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3589 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3590 objc_size = _COMM_PAGE32_OBJC_SIZE;
3591 objc_address = _COMM_PAGE32_OBJC_BASE;
3592 commpage_text_handle = commpage_text32_handle;
3593 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3594 }
3595
3596 vm_tag_t tag = VM_KERN_MEMORY_NONE;
3597 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3598 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3599 /* the commpage is properly aligned or sized for pmap-nesting */
3600 tag = VM_MEMORY_SHARED_PMAP;
3601 vmk_flags.vmkf_nested_pmap = TRUE;
3602 }
3603 /* map the comm page in the task's address space */
3604 assert(commpage_handle != IPC_PORT_NULL);
3605 kr = vm_map_enter_mem_object(
3606 map,
3607 &commpage_address,
3608 commpage_size,
3609 0,
3610 vm_flags,
3611 vmk_flags,
3612 tag,
3613 commpage_handle,
3614 0,
3615 FALSE,
3616 VM_PROT_READ,
3617 VM_PROT_READ,
3618 VM_INHERIT_SHARE);
3619 if (kr != KERN_SUCCESS) {
3620 SHARED_REGION_TRACE_ERROR(
3621 ("commpage: enter(%p,0x%llx,0x%llx) "
3622 "commpage %p mapping failed 0x%x\n",
3623 (void *)VM_KERNEL_ADDRPERM(map),
3624 (long long)commpage_address,
3625 (long long)commpage_size,
3626 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3627 }
3628
3629 /* map the comm text page in the task's address space */
3630 assert(commpage_text_handle != IPC_PORT_NULL);
3631 kr = vm_map_enter_mem_object(
3632 map,
3633 &commpage_text_address,
3634 commpage_text_size,
3635 0,
3636 vm_flags,
3637 vmk_flags,
3638 tag,
3639 commpage_text_handle,
3640 0,
3641 FALSE,
3642 VM_PROT_READ | VM_PROT_EXECUTE,
3643 VM_PROT_READ | VM_PROT_EXECUTE,
3644 VM_INHERIT_SHARE);
3645 if (kr != KERN_SUCCESS) {
3646 SHARED_REGION_TRACE_ERROR(
3647 ("commpage text: enter(%p,0x%llx,0x%llx) "
3648 "commpage text %p mapping failed 0x%x\n",
3649 (void *)VM_KERNEL_ADDRPERM(map),
3650 (long long)commpage_text_address,
3651 (long long)commpage_text_size,
3652 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3653 }
3654
3655 /*
3656 * Since we're here, we also pre-allocate some virtual space for the
3657 * Objective-C run-time, if needed...
3658 */
3659 if (objc_size != 0) {
3660 kr = vm_map_enter_mem_object(
3661 map,
3662 &objc_address,
3663 objc_size,
3664 0,
3665 VM_FLAGS_FIXED,
3666 vmk_flags,
3667 tag,
3668 IPC_PORT_NULL,
3669 0,
3670 FALSE,
3671 VM_PROT_ALL,
3672 VM_PROT_ALL,
3673 VM_INHERIT_DEFAULT);
3674 if (kr != KERN_SUCCESS) {
3675 SHARED_REGION_TRACE_ERROR(
3676 ("commpage: enter(%p,0x%llx,0x%llx) "
3677 "objc mapping failed 0x%x\n",
3678 (void *)VM_KERNEL_ADDRPERM(map),
3679 (long long)objc_address,
3680 (long long)objc_size, kr));
3681 }
3682 }
3683
3684 SHARED_REGION_TRACE_DEBUG(
3685 ("commpage: enter(%p,%p) <- 0x%x\n",
3686 (void *)VM_KERNEL_ADDRPERM(map),
3687 (void *)VM_KERNEL_ADDRPERM(task), kr));
3688 return kr;
3689 #endif
3690 }
3691
3692 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3693 vm_shared_region_slide(
3694 uint32_t slide,
3695 mach_vm_offset_t entry_start_address,
3696 mach_vm_size_t entry_size,
3697 mach_vm_offset_t slide_start,
3698 mach_vm_size_t slide_size,
3699 mach_vm_offset_t slid_mapping,
3700 memory_object_control_t sr_file_control,
3701 vm_prot_t prot)
3702 {
3703 vm_shared_region_t sr;
3704 kern_return_t error;
3705
3706 SHARED_REGION_TRACE_DEBUG(
3707 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3708 slide, entry_start_address, entry_size, slide_start, slide_size));
3709
3710 sr = vm_shared_region_get(current_task());
3711 if (sr == NULL) {
3712 printf("%s: no shared region?\n", __FUNCTION__);
3713 SHARED_REGION_TRACE_DEBUG(
3714 ("vm_shared_region_slide: <- %d (no shared region)\n",
3715 KERN_FAILURE));
3716 return KERN_FAILURE;
3717 }
3718
3719 /*
3720 * Protect from concurrent access.
3721 */
3722 vm_shared_region_lock();
3723 while (sr->sr_slide_in_progress) {
3724 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3725 }
3726
3727 sr->sr_slide_in_progress = TRUE;
3728 vm_shared_region_unlock();
3729
3730 error = vm_shared_region_slide_mapping(sr,
3731 (user_addr_t)slide_start,
3732 slide_size,
3733 entry_start_address,
3734 entry_size,
3735 slid_mapping,
3736 slide,
3737 sr_file_control,
3738 prot);
3739 if (error) {
3740 printf("slide_info initialization failed with kr=%d\n", error);
3741 }
3742
3743 vm_shared_region_lock();
3744
3745 assert(sr->sr_slide_in_progress);
3746 sr->sr_slide_in_progress = FALSE;
3747 thread_wakeup(&sr->sr_slide_in_progress);
3748
3749 #if XNU_TARGET_OS_OSX
3750 if (error == KERN_SUCCESS) {
3751 shared_region_completed_slide = TRUE;
3752 }
3753 #endif /* XNU_TARGET_OS_OSX */
3754 vm_shared_region_unlock();
3755
3756 vm_shared_region_deallocate(sr);
3757
3758 SHARED_REGION_TRACE_DEBUG(
3759 ("vm_shared_region_slide: <- %d\n",
3760 error));
3761
3762 return error;
3763 }
3764
3765 /*
3766 * Used during Authenticated Root Volume macOS boot.
3767 * Launchd re-execs itself and wants the new launchd to use
3768 * the shared cache from the new root volume. This call
3769 * makes all the existing shared caches stale to allow
3770 * that to happen.
3771 */
3772 void
vm_shared_region_pivot(void)3773 vm_shared_region_pivot(void)
3774 {
3775 vm_shared_region_t shared_region = NULL;
3776
3777 vm_shared_region_lock();
3778
3779 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3780 assert(shared_region->sr_ref_count > 0);
3781 shared_region->sr_stale = TRUE;
3782 if (shared_region->sr_timer_call) {
3783 /*
3784 * We have a shared region ready to be destroyed
3785 * and just waiting for a delayed timer to fire.
3786 * Marking it stale cements its ineligibility to
3787 * be used ever again. So let's shorten the timer
3788 * aggressively down to 10 milliseconds and get rid of it.
3789 * This is a single quantum and we don't need to go
3790 * shorter than this duration. We want it to be short
3791 * enough, however, because we could have an unmount
3792 * of the volume hosting this shared region just behind
3793 * us.
3794 */
3795 uint64_t deadline;
3796 assert(shared_region->sr_ref_count == 1);
3797
3798 /*
3799 * Free the old timer call. Returns with a reference held.
3800 * If the old timer has fired and is waiting for the vm_shared_region_lock
3801 * lock, we will just return with an additional ref_count i.e. 2.
3802 * The old timer will then fire and just drop the ref count down to 1
3803 * with no other modifications.
3804 */
3805 vm_shared_region_reference_locked(shared_region);
3806
3807 /* set up the timer. Keep the reference from above for this timer.*/
3808 shared_region->sr_timer_call = thread_call_allocate(
3809 (thread_call_func_t) vm_shared_region_timeout,
3810 (thread_call_param_t) shared_region);
3811
3812 /* schedule the timer */
3813 clock_interval_to_deadline(10, /* 10 milliseconds */
3814 NSEC_PER_MSEC,
3815 &deadline);
3816 thread_call_enter_delayed(shared_region->sr_timer_call,
3817 deadline);
3818
3819 SHARED_REGION_TRACE_DEBUG(
3820 ("shared_region: pivot(%p): armed timer\n",
3821 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3822 }
3823 }
3824
3825 vm_shared_region_unlock();
3826 }
3827
3828 /*
3829 * Routine to mark any non-standard slide shared cache region as stale.
3830 * This causes the next "reslide" spawn to create a new shared region.
3831 */
3832 void
vm_shared_region_reslide_stale(boolean_t driverkit)3833 vm_shared_region_reslide_stale(boolean_t driverkit)
3834 {
3835 #if __has_feature(ptrauth_calls)
3836 vm_shared_region_t shared_region = NULL;
3837
3838 vm_shared_region_lock();
3839
3840 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3841 assert(shared_region->sr_ref_count > 0);
3842 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3843 shared_region->sr_stale = TRUE;
3844 vm_shared_region_reslide_count++;
3845 }
3846 }
3847
3848 vm_shared_region_unlock();
3849 #else
3850 (void)driverkit;
3851 #endif /* __has_feature(ptrauth_calls) */
3852 }
3853
3854 /*
3855 * report if the task is using a reslide shared cache region.
3856 */
3857 bool
vm_shared_region_is_reslide(__unused struct task * task)3858 vm_shared_region_is_reslide(__unused struct task *task)
3859 {
3860 bool is_reslide = FALSE;
3861 #if __has_feature(ptrauth_calls)
3862 vm_shared_region_t sr = vm_shared_region_get(task);
3863
3864 if (sr != NULL) {
3865 is_reslide = sr->sr_reslide;
3866 vm_shared_region_deallocate(sr);
3867 }
3868 #endif /* __has_feature(ptrauth_calls) */
3869 return is_reslide;
3870 }
3871
3872 /*
3873 * This is called from powermanagement code to let kernel know the current source of power.
3874 * 0 if it is external source (connected to power )
3875 * 1 if it is internal power source ie battery
3876 */
3877 void
3878 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3879 post_sys_powersource(int i)
3880 #else /* XNU_TARGET_OS_OSX */
3881 post_sys_powersource(__unused int i)
3882 #endif /* XNU_TARGET_OS_OSX */
3883 {
3884 #if XNU_TARGET_OS_OSX
3885 post_sys_powersource_internal(i, 0);
3886 #endif /* XNU_TARGET_OS_OSX */
3887 }
3888
3889
3890 #if XNU_TARGET_OS_OSX
3891 static void
post_sys_powersource_internal(int i,int internal)3892 post_sys_powersource_internal(int i, int internal)
3893 {
3894 if (internal == 0) {
3895 __system_power_source = i;
3896 }
3897 }
3898 #endif /* XNU_TARGET_OS_OSX */
3899
3900 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3901 vm_shared_region_root_dir(
3902 struct vm_shared_region *sr)
3903 {
3904 void *vnode;
3905
3906 vm_shared_region_lock();
3907 vnode = sr->sr_root_dir;
3908 vm_shared_region_unlock();
3909 return vnode;
3910 }
3911