1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30 /*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78 /*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101 #include <debug.h>
102
103 #include <kern/ipc_tt.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread_call.h>
106
107 #include <mach/mach_vm.h>
108 #include <mach/machine.h>
109
110 #include <vm/vm_map.h>
111 #include <vm/vm_map_internal.h>
112 #include <vm/vm_shared_region.h>
113
114 #include <vm/vm_protos.h>
115
116 #include <machine/commpage.h>
117 #include <machine/cpu_capabilities.h>
118 #include <sys/random.h>
119 #include <sys/errno.h>
120
121 #if defined(__arm64__)
122 #include <arm/cpu_data_internal.h>
123 #include <arm/misc_protos.h>
124 #endif
125
126 /*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130 #define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132 #if __has_feature(ptrauth_calls)
133 #include <ptrauth.h>
134 #endif /* __has_feature(ptrauth_calls) */
135
136 /* "dyld" uses this to figure out what the kernel supports */
137 int shared_region_version = 3;
138
139 /* trace level, output is sent to the system log file */
140 int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142 /* should local (non-chroot) shared regions persist when no task uses them ? */
143 int shared_region_persistence = 0; /* no by default */
144
145
146 /* delay in seconds before reclaiming an unused shared region */
147 TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149 /*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156 struct vm_shared_region *primary_system_shared_region = NULL;
157
158 #if XNU_TARGET_OS_OSX
159 /*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164 boolean_t shared_region_completed_slide = FALSE;
165 #endif /* XNU_TARGET_OS_OSX */
166
167 /* this lock protects all the shared region data structures */
168 static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169 static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171 #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172 #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173 #define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep_with_inheritor(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 *(event), \
178 (interruptible) | THREAD_WAIT_NOREPORT, \
179 TIMEOUT_WAIT_FOREVER)
180 #define vm_shared_region_wakeup(event) \
181 wakeup_all_with_inheritor((event), THREAD_AWAKENED)
182
183 /* the list of currently available shared regions (one per environment) */
184 queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
185 int vm_shared_region_count = 0;
186 int vm_shared_region_peak = 0;
187 static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
188
189 /*
190 * the number of times an event has forced the recalculation of the reslide
191 * shared region slide.
192 */
193 #if __has_feature(ptrauth_calls)
194 int vm_shared_region_reslide_count = 0;
195 #endif /* __has_feature(ptrauth_calls) */
196
197 static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
198 static vm_shared_region_t vm_shared_region_create(
199 void *root_dir,
200 cpu_type_t cputype,
201 cpu_subtype_t cpu_subtype,
202 boolean_t is_64bit,
203 int target_page_shift,
204 boolean_t reslide,
205 boolean_t is_driverkit,
206 uint32_t rsr_version);
207 static void vm_shared_region_destroy(vm_shared_region_t shared_region);
208
209 static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
210 static void vm_shared_region_timeout(thread_call_param_t param0,
211 thread_call_param_t param1);
212 static kern_return_t vm_shared_region_slide_mapping(
213 vm_shared_region_t sr,
214 user_addr_t slide_info_addr,
215 mach_vm_size_t slide_info_size,
216 mach_vm_offset_t start,
217 mach_vm_size_t size,
218 mach_vm_offset_t slid_mapping,
219 uint32_t slide,
220 memory_object_control_t,
221 vm_prot_t prot); /* forward */
222
223 static int __commpage_setup = 0;
224 #if XNU_TARGET_OS_OSX
225 static int __system_power_source = 1; /* init to extrnal power source */
226 static void post_sys_powersource_internal(int i, int internal);
227 #endif /* XNU_TARGET_OS_OSX */
228
229 extern u_int32_t random(void);
230
231 /*
232 * Retrieve a task's shared region and grab an extra reference to
233 * make sure it doesn't disappear while the caller is using it.
234 * The caller is responsible for consuming that extra reference if
235 * necessary.
236 */
237 vm_shared_region_t
vm_shared_region_get(task_t task)238 vm_shared_region_get(
239 task_t task)
240 {
241 vm_shared_region_t shared_region;
242
243 SHARED_REGION_TRACE_DEBUG(
244 ("shared_region: -> get(%p)\n",
245 (void *)VM_KERNEL_ADDRPERM(task)));
246
247 task_lock(task);
248 vm_shared_region_lock();
249 shared_region = task->shared_region;
250 if (shared_region) {
251 assert(shared_region->sr_ref_count > 0);
252 vm_shared_region_reference_locked(shared_region);
253 }
254 vm_shared_region_unlock();
255 task_unlock(task);
256
257 SHARED_REGION_TRACE_DEBUG(
258 ("shared_region: get(%p) <- %p\n",
259 (void *)VM_KERNEL_ADDRPERM(task),
260 (void *)VM_KERNEL_ADDRPERM(shared_region)));
261
262 return shared_region;
263 }
264
265 vm_map_t
vm_shared_region_vm_map(vm_shared_region_t shared_region)266 vm_shared_region_vm_map(
267 vm_shared_region_t shared_region)
268 {
269 ipc_port_t sr_handle;
270 vm_named_entry_t sr_mem_entry;
271 vm_map_t sr_map;
272
273 SHARED_REGION_TRACE_DEBUG(
274 ("shared_region: -> vm_map(%p)\n",
275 (void *)VM_KERNEL_ADDRPERM(shared_region)));
276 assert(shared_region->sr_ref_count > 0);
277
278 sr_handle = shared_region->sr_mem_entry;
279 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
280 sr_map = sr_mem_entry->backing.map;
281 assert(sr_mem_entry->is_sub_map);
282
283 SHARED_REGION_TRACE_DEBUG(
284 ("shared_region: vm_map(%p) <- %p\n",
285 (void *)VM_KERNEL_ADDRPERM(shared_region),
286 (void *)VM_KERNEL_ADDRPERM(sr_map)));
287 return sr_map;
288 }
289
290 /*
291 * Set the shared region the process should use.
292 * A NULL new shared region means that we just want to release the old
293 * shared region.
294 * The caller should already have an extra reference on the new shared region
295 * (if any). We release a reference on the old shared region (if any).
296 */
297 void
vm_shared_region_set(task_t task,vm_shared_region_t new_shared_region)298 vm_shared_region_set(
299 task_t task,
300 vm_shared_region_t new_shared_region)
301 {
302 vm_shared_region_t old_shared_region;
303
304 SHARED_REGION_TRACE_DEBUG(
305 ("shared_region: -> set(%p, %p)\n",
306 (void *)VM_KERNEL_ADDRPERM(task),
307 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
308
309 task_lock(task);
310 vm_shared_region_lock();
311
312 old_shared_region = task->shared_region;
313 if (new_shared_region) {
314 assert(new_shared_region->sr_ref_count > 0);
315 }
316
317 task->shared_region = new_shared_region;
318
319 vm_shared_region_unlock();
320 task_unlock(task);
321
322 if (old_shared_region) {
323 assert(old_shared_region->sr_ref_count > 0);
324 vm_shared_region_deallocate(old_shared_region);
325 }
326
327 SHARED_REGION_TRACE_DEBUG(
328 ("shared_region: set(%p) <- old=%p new=%p\n",
329 (void *)VM_KERNEL_ADDRPERM(task),
330 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
331 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
332 }
333
334 /*
335 * New arm64 shared regions match with an existing arm64e region.
336 * They just get a private non-authenticating pager.
337 */
338 static inline bool
match_subtype(cpu_type_t cputype,cpu_subtype_t exist,cpu_subtype_t new)339 match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
340 {
341 if (exist == new) {
342 return true;
343 }
344 if (cputype == CPU_TYPE_ARM64 &&
345 exist == CPU_SUBTYPE_ARM64E &&
346 new == CPU_SUBTYPE_ARM64_ALL) {
347 return true;
348 }
349 return false;
350 }
351
352
353 /*
354 * Lookup up the shared region for the desired environment.
355 * If none is found, create a new (empty) one.
356 * Grab an extra reference on the returned shared region, to make sure
357 * it doesn't get destroyed before the caller is done with it. The caller
358 * is responsible for consuming that extra reference if necessary.
359 */
360 vm_shared_region_t
vm_shared_region_lookup(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)361 vm_shared_region_lookup(
362 void *root_dir,
363 cpu_type_t cputype,
364 cpu_subtype_t cpu_subtype,
365 boolean_t is_64bit,
366 int target_page_shift,
367 boolean_t reslide,
368 boolean_t is_driverkit,
369 uint32_t rsr_version)
370 {
371 vm_shared_region_t shared_region;
372 vm_shared_region_t new_shared_region;
373
374 SHARED_REGION_TRACE_DEBUG(
375 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
376 (void *)VM_KERNEL_ADDRPERM(root_dir),
377 cputype, cpu_subtype, is_64bit, target_page_shift,
378 reslide, is_driverkit));
379
380 shared_region = NULL;
381 new_shared_region = NULL;
382
383 vm_shared_region_lock();
384 for (;;) {
385 queue_iterate(&vm_shared_region_queue,
386 shared_region,
387 vm_shared_region_t,
388 sr_q) {
389 assert(shared_region->sr_ref_count > 0);
390 if (shared_region->sr_cpu_type == cputype &&
391 match_subtype(cputype, shared_region->sr_cpu_subtype, cpu_subtype) &&
392 shared_region->sr_root_dir == root_dir &&
393 shared_region->sr_64bit == is_64bit &&
394 #if __ARM_MIXED_PAGE_SIZE__
395 shared_region->sr_page_shift == target_page_shift &&
396 #endif /* __ARM_MIXED_PAGE_SIZE__ */
397 #if __has_feature(ptrauth_calls)
398 shared_region->sr_reslide == reslide &&
399 #endif /* __has_feature(ptrauth_calls) */
400 shared_region->sr_driverkit == is_driverkit &&
401 shared_region->sr_rsr_version == rsr_version &&
402 !shared_region->sr_stale) {
403 /* found a match ! */
404 vm_shared_region_reference_locked(shared_region);
405 goto done;
406 }
407 }
408 if (new_shared_region == NULL) {
409 /* no match: create a new one */
410 vm_shared_region_unlock();
411 new_shared_region = vm_shared_region_create(root_dir,
412 cputype,
413 cpu_subtype,
414 is_64bit,
415 target_page_shift,
416 reslide,
417 is_driverkit,
418 rsr_version);
419 /* do the lookup again, in case we lost a race */
420 vm_shared_region_lock();
421 continue;
422 }
423 /* still no match: use our new one */
424 shared_region = new_shared_region;
425 new_shared_region = NULL;
426 uint32_t newid = ++vm_shared_region_lastid;
427 if (newid == 0) {
428 panic("shared_region: vm_shared_region_lastid wrapped");
429 }
430 shared_region->sr_id = newid;
431 shared_region->sr_install_time = mach_absolute_time();
432 queue_enter(&vm_shared_region_queue,
433 shared_region,
434 vm_shared_region_t,
435 sr_q);
436 vm_shared_region_count++;
437 if (vm_shared_region_count > vm_shared_region_peak) {
438 vm_shared_region_peak = vm_shared_region_count;
439 }
440 break;
441 }
442
443 done:
444 vm_shared_region_unlock();
445
446 if (new_shared_region) {
447 /*
448 * We lost a race with someone else to create a new shared
449 * region for that environment. Get rid of our unused one.
450 */
451 assert(new_shared_region->sr_ref_count == 1);
452 new_shared_region->sr_ref_count--;
453 vm_shared_region_destroy(new_shared_region);
454 new_shared_region = NULL;
455 }
456
457 SHARED_REGION_TRACE_DEBUG(
458 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
459 (void *)VM_KERNEL_ADDRPERM(root_dir),
460 cputype, cpu_subtype, is_64bit, target_page_shift,
461 reslide, is_driverkit,
462 (void *)VM_KERNEL_ADDRPERM(shared_region)));
463
464 assert(shared_region->sr_ref_count > 0);
465 return shared_region;
466 }
467
468 /*
469 * Take an extra reference on a shared region.
470 * The vm_shared_region_lock should already be held by the caller.
471 */
472 static void
vm_shared_region_reference_locked(vm_shared_region_t shared_region)473 vm_shared_region_reference_locked(
474 vm_shared_region_t shared_region)
475 {
476 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
477
478 SHARED_REGION_TRACE_DEBUG(
479 ("shared_region: -> reference_locked(%p)\n",
480 (void *)VM_KERNEL_ADDRPERM(shared_region)));
481 assert(shared_region->sr_ref_count > 0);
482 shared_region->sr_ref_count++;
483 assert(shared_region->sr_ref_count != 0);
484
485 if (shared_region->sr_timer_call != NULL) {
486 boolean_t cancelled;
487
488 /* cancel and free any pending timeout */
489 cancelled = thread_call_cancel(shared_region->sr_timer_call);
490 if (cancelled) {
491 thread_call_free(shared_region->sr_timer_call);
492 shared_region->sr_timer_call = NULL;
493 /* release the reference held by the cancelled timer */
494 shared_region->sr_ref_count--;
495 } else {
496 /* the timer will drop the reference and free itself */
497 }
498 }
499
500 SHARED_REGION_TRACE_DEBUG(
501 ("shared_region: reference_locked(%p) <- %d\n",
502 (void *)VM_KERNEL_ADDRPERM(shared_region),
503 shared_region->sr_ref_count));
504 }
505
506 /*
507 * Take a reference on a shared region.
508 */
509 void
vm_shared_region_reference(vm_shared_region_t shared_region)510 vm_shared_region_reference(vm_shared_region_t shared_region)
511 {
512 SHARED_REGION_TRACE_DEBUG(
513 ("shared_region: -> reference(%p)\n",
514 (void *)VM_KERNEL_ADDRPERM(shared_region)));
515
516 vm_shared_region_lock();
517 vm_shared_region_reference_locked(shared_region);
518 vm_shared_region_unlock();
519
520 SHARED_REGION_TRACE_DEBUG(
521 ("shared_region: reference(%p) <- %d\n",
522 (void *)VM_KERNEL_ADDRPERM(shared_region),
523 shared_region->sr_ref_count));
524 }
525
526 /*
527 * Release a reference on the shared region.
528 * Destroy it if there are no references left.
529 */
530 void
vm_shared_region_deallocate(vm_shared_region_t shared_region)531 vm_shared_region_deallocate(
532 vm_shared_region_t shared_region)
533 {
534 SHARED_REGION_TRACE_DEBUG(
535 ("shared_region: -> deallocate(%p)\n",
536 (void *)VM_KERNEL_ADDRPERM(shared_region)));
537
538 vm_shared_region_lock();
539
540 assert(shared_region->sr_ref_count > 0);
541
542 if (shared_region->sr_root_dir == NULL) {
543 /*
544 * Local (i.e. based on the boot volume) shared regions
545 * can persist or not based on the "shared_region_persistence"
546 * sysctl.
547 * Make sure that this one complies.
548 *
549 * See comments in vm_shared_region_slide() for notes about
550 * shared regions we have slid (which are not torn down currently).
551 */
552 if (shared_region_persistence &&
553 !shared_region->sr_persists) {
554 /* make this one persistent */
555 shared_region->sr_ref_count++;
556 shared_region->sr_persists = TRUE;
557 } else if (!shared_region_persistence &&
558 shared_region->sr_persists) {
559 /* make this one no longer persistent */
560 assert(shared_region->sr_ref_count > 1);
561 shared_region->sr_ref_count--;
562 shared_region->sr_persists = FALSE;
563 }
564 }
565
566 assert(shared_region->sr_ref_count > 0);
567 shared_region->sr_ref_count--;
568 SHARED_REGION_TRACE_DEBUG(
569 ("shared_region: deallocate(%p): ref now %d\n",
570 (void *)VM_KERNEL_ADDRPERM(shared_region),
571 shared_region->sr_ref_count));
572
573 if (shared_region->sr_ref_count == 0) {
574 uint64_t deadline;
575
576 /*
577 * Even though a shared region is unused, delay a while before
578 * tearing it down, in case a new app launch can use it.
579 * We don't keep around stale shared regions, nor older RSR ones.
580 */
581 if (shared_region->sr_timer_call == NULL &&
582 shared_region_destroy_delay != 0 &&
583 !shared_region->sr_stale &&
584 !(shared_region->sr_rsr_version != 0 &&
585 shared_region->sr_rsr_version != rsr_get_version())) {
586 /* hold one reference for the timer */
587 assert(!shared_region->sr_mapping_in_progress);
588 shared_region->sr_ref_count++;
589
590 /* set up the timer */
591 shared_region->sr_timer_call = thread_call_allocate(
592 (thread_call_func_t) vm_shared_region_timeout,
593 (thread_call_param_t) shared_region);
594
595 /* schedule the timer */
596 clock_interval_to_deadline(shared_region_destroy_delay,
597 NSEC_PER_SEC,
598 &deadline);
599 thread_call_enter_delayed(shared_region->sr_timer_call,
600 deadline);
601
602 SHARED_REGION_TRACE_DEBUG(
603 ("shared_region: deallocate(%p): armed timer\n",
604 (void *)VM_KERNEL_ADDRPERM(shared_region)));
605
606 vm_shared_region_unlock();
607 } else {
608 /* timer expired: let go of this shared region */
609
610 /* Make sure there's no cached pointer to the region. */
611 if (primary_system_shared_region == shared_region) {
612 primary_system_shared_region = NULL;
613 }
614
615 /*
616 * Remove it from the queue first, so no one can find
617 * it...
618 */
619 queue_remove(&vm_shared_region_queue,
620 shared_region,
621 vm_shared_region_t,
622 sr_q);
623 vm_shared_region_count--;
624 vm_shared_region_unlock();
625
626 /* ... and destroy it */
627 vm_shared_region_destroy(shared_region);
628 shared_region = NULL;
629 }
630 } else {
631 vm_shared_region_unlock();
632 }
633
634 SHARED_REGION_TRACE_DEBUG(
635 ("shared_region: deallocate(%p) <-\n",
636 (void *)VM_KERNEL_ADDRPERM(shared_region)));
637 }
638
639 void
vm_shared_region_timeout(thread_call_param_t param0,__unused thread_call_param_t param1)640 vm_shared_region_timeout(
641 thread_call_param_t param0,
642 __unused thread_call_param_t param1)
643 {
644 vm_shared_region_t shared_region;
645
646 shared_region = (vm_shared_region_t) param0;
647
648 vm_shared_region_deallocate(shared_region);
649 }
650
651
652 /*
653 * Create a new (empty) shared region for a new environment.
654 */
655 static vm_shared_region_t
vm_shared_region_create(void * root_dir,cpu_type_t cputype,cpu_subtype_t cpu_subtype,boolean_t is_64bit,int target_page_shift,__unused boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)656 vm_shared_region_create(
657 void *root_dir,
658 cpu_type_t cputype,
659 cpu_subtype_t cpu_subtype,
660 boolean_t is_64bit,
661 int target_page_shift,
662 #if !__has_feature(ptrauth_calls)
663 __unused
664 #endif /* __has_feature(ptrauth_calls) */
665 boolean_t reslide,
666 boolean_t is_driverkit,
667 uint32_t rsr_version)
668 {
669 vm_named_entry_t mem_entry;
670 ipc_port_t mem_entry_port;
671 vm_shared_region_t shared_region;
672 vm_map_t sub_map;
673 mach_vm_offset_t base_address, pmap_nesting_start;
674 mach_vm_size_t size, pmap_nesting_size;
675
676 SHARED_REGION_TRACE_INFO(
677 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
678 (void *)VM_KERNEL_ADDRPERM(root_dir),
679 cputype, cpu_subtype, is_64bit, target_page_shift,
680 reslide, is_driverkit));
681
682 base_address = 0;
683 size = 0;
684 mem_entry = NULL;
685 mem_entry_port = IPC_PORT_NULL;
686 sub_map = VM_MAP_NULL;
687
688 /* create a new shared region structure... */
689 shared_region = kalloc_type(struct vm_shared_region,
690 Z_WAITOK | Z_NOFAIL);
691
692 /* figure out the correct settings for the desired environment */
693 if (is_64bit) {
694 switch (cputype) {
695 #if defined(__arm64__)
696 case CPU_TYPE_ARM64:
697 base_address = SHARED_REGION_BASE_ARM64;
698 size = SHARED_REGION_SIZE_ARM64;
699 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
700 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
701 break;
702 #else
703 case CPU_TYPE_I386:
704 base_address = SHARED_REGION_BASE_X86_64;
705 size = SHARED_REGION_SIZE_X86_64;
706 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
707 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
708 break;
709 case CPU_TYPE_POWERPC:
710 base_address = SHARED_REGION_BASE_PPC64;
711 size = SHARED_REGION_SIZE_PPC64;
712 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
713 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
714 break;
715 #endif
716 default:
717 SHARED_REGION_TRACE_ERROR(
718 ("shared_region: create: unknown cpu type %d\n",
719 cputype));
720 kfree_type(struct vm_shared_region, shared_region);
721 shared_region = NULL;
722 goto done;
723 }
724 } else {
725 switch (cputype) {
726 #if defined(__arm64__)
727 case CPU_TYPE_ARM:
728 base_address = SHARED_REGION_BASE_ARM;
729 size = SHARED_REGION_SIZE_ARM;
730 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
731 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
732 break;
733 #else
734 case CPU_TYPE_I386:
735 base_address = SHARED_REGION_BASE_I386;
736 size = SHARED_REGION_SIZE_I386;
737 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
738 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
739 break;
740 case CPU_TYPE_POWERPC:
741 base_address = SHARED_REGION_BASE_PPC;
742 size = SHARED_REGION_SIZE_PPC;
743 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
744 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
745 break;
746 #endif
747 default:
748 SHARED_REGION_TRACE_ERROR(
749 ("shared_region: create: unknown cpu type %d\n",
750 cputype));
751 kfree_type(struct vm_shared_region, shared_region);
752 shared_region = NULL;
753 goto done;
754 }
755 }
756
757 /* create a memory entry structure and a Mach port handle */
758 mem_entry = mach_memory_entry_allocate(&mem_entry_port);
759
760 #if defined(__arm64__)
761 {
762 struct pmap *pmap_nested;
763 int pmap_flags = 0;
764 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
765
766
767 #if __ARM_MIXED_PAGE_SIZE__
768 if (cputype == CPU_TYPE_ARM64 &&
769 target_page_shift == FOURK_PAGE_SHIFT) {
770 /* arm64/4k address space */
771 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
772 }
773 #endif /* __ARM_MIXED_PAGE_SIZE__ */
774
775 pmap_nested = pmap_create_options(NULL, 0, pmap_flags);
776 if (pmap_nested != PMAP_NULL) {
777 pmap_set_nested(pmap_nested);
778 sub_map = vm_map_create_options(pmap_nested, 0,
779 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
780
781 if (is_64bit ||
782 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
783 /* enforce 16KB alignment of VM map entries */
784 vm_map_set_page_shift(sub_map, SIXTEENK_PAGE_SHIFT);
785 }
786 #if __ARM_MIXED_PAGE_SIZE__
787 if (cputype == CPU_TYPE_ARM64 &&
788 target_page_shift == FOURK_PAGE_SHIFT) {
789 /* arm64/4k address space */
790 vm_map_set_page_shift(sub_map, FOURK_PAGE_SHIFT);
791 }
792 #endif /* __ARM_MIXED_PAGE_SIZE__ */
793 } else {
794 sub_map = VM_MAP_NULL;
795 }
796 }
797 #else /* defined(__arm64__) */
798 {
799 /* create a VM sub map and its pmap */
800 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
801 if (pmap != NULL) {
802 sub_map = vm_map_create_options(pmap, 0,
803 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
804 } else {
805 sub_map = VM_MAP_NULL;
806 }
807 }
808 #endif /* defined(__arm64__) */
809 if (sub_map == VM_MAP_NULL) {
810 ipc_port_release_send(mem_entry_port);
811 kfree_type(struct vm_shared_region, shared_region);
812 shared_region = NULL;
813 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
814 goto done;
815 }
816
817 /* shared regions should always enforce code-signing */
818 vm_map_cs_enforcement_set(sub_map, true);
819 assert(vm_map_cs_enforcement(sub_map));
820 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
821
822 assert(!sub_map->disable_vmentry_reuse);
823 sub_map->is_nested_map = TRUE;
824
825 /* make the memory entry point to the VM sub map */
826 mem_entry->is_sub_map = TRUE;
827 mem_entry->backing.map = sub_map;
828 mem_entry->size = size;
829 mem_entry->protection = VM_PROT_ALL;
830
831 /* make the shared region point at the memory entry */
832 shared_region->sr_mem_entry = mem_entry_port;
833
834 /* fill in the shared region's environment and settings */
835 shared_region->sr_base_address = base_address;
836 shared_region->sr_size = size;
837 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
838 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
839 shared_region->sr_cpu_type = cputype;
840 shared_region->sr_cpu_subtype = cpu_subtype;
841 shared_region->sr_64bit = (uint8_t)is_64bit;
842 #if __ARM_MIXED_PAGE_SIZE__
843 shared_region->sr_page_shift = (uint8_t)target_page_shift;
844 #endif /* __ARM_MIXED_PAGE_SIZE__ */
845 shared_region->sr_driverkit = (uint8_t)is_driverkit;
846 shared_region->sr_rsr_version = rsr_version;
847 shared_region->sr_root_dir = root_dir;
848
849 queue_init(&shared_region->sr_q);
850 shared_region->sr_mapping_in_progress = THREAD_NULL;
851 shared_region->sr_slide_in_progress = THREAD_NULL;
852 shared_region->sr_persists = FALSE;
853 shared_region->sr_stale = FALSE;
854 shared_region->sr_timer_call = NULL;
855 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
856
857 /* grab a reference for the caller */
858 shared_region->sr_ref_count = 1;
859
860 shared_region->sr_slide = 0; /* not slid yet */
861
862 /* Initialize UUID and other metadata */
863 memset(&shared_region->sr_uuid, '\0', sizeof(shared_region->sr_uuid));
864 shared_region->sr_uuid_copied = FALSE;
865 shared_region->sr_images_count = 0;
866 shared_region->sr_images = NULL;
867 #if __has_feature(ptrauth_calls)
868 shared_region->sr_reslide = reslide;
869 shared_region->sr_num_auth_section = 0;
870 shared_region->sr_next_auth_section = 0;
871 shared_region->sr_auth_section = NULL;
872 #endif /* __has_feature(ptrauth_calls) */
873
874 done:
875 if (shared_region) {
876 SHARED_REGION_TRACE_INFO(
877 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
878 "base=0x%llx,size=0x%llx) <- "
879 "%p mem=(%p,%p) map=%p pmap=%p\n",
880 (void *)VM_KERNEL_ADDRPERM(root_dir),
881 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
882 (long long)base_address,
883 (long long)size,
884 (void *)VM_KERNEL_ADDRPERM(shared_region),
885 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
886 (void *)VM_KERNEL_ADDRPERM(mem_entry),
887 (void *)VM_KERNEL_ADDRPERM(sub_map),
888 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
889 } else {
890 SHARED_REGION_TRACE_INFO(
891 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
892 "base=0x%llx,size=0x%llx) <- NULL",
893 (void *)VM_KERNEL_ADDRPERM(root_dir),
894 cputype, cpu_subtype, is_64bit, is_driverkit,
895 (long long)base_address,
896 (long long)size));
897 }
898 return shared_region;
899 }
900
901 /*
902 * Destroy a now-unused shared region.
903 * The shared region is no longer in the queue and can not be looked up.
904 */
905 static void
vm_shared_region_destroy(vm_shared_region_t shared_region)906 vm_shared_region_destroy(
907 vm_shared_region_t shared_region)
908 {
909 vm_named_entry_t mem_entry;
910 vm_map_t map;
911
912 SHARED_REGION_TRACE_INFO(
913 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
914 (void *)VM_KERNEL_ADDRPERM(shared_region),
915 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
916 shared_region->sr_cpu_type,
917 shared_region->sr_cpu_subtype,
918 shared_region->sr_64bit,
919 shared_region->sr_driverkit));
920
921 assert(shared_region->sr_ref_count == 0);
922 assert(!shared_region->sr_persists);
923
924 mem_entry = mach_memory_entry_from_port(shared_region->sr_mem_entry);
925 assert(mem_entry->is_sub_map);
926 assert(!mem_entry->internal);
927 assert(!mem_entry->is_copy);
928 map = mem_entry->backing.map;
929
930 /*
931 * Clean up the pmap first. The virtual addresses that were
932 * entered in this possibly "nested" pmap may have different values
933 * than the VM map's min and max offsets, if the VM sub map was
934 * mapped at a non-zero offset in the processes' main VM maps, which
935 * is usually the case, so the clean-up we do in vm_map_destroy() would
936 * not be enough.
937 */
938 if (map->pmap) {
939 pmap_remove(map->pmap,
940 (vm_map_offset_t)shared_region->sr_base_address,
941 (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
942 }
943
944 /*
945 * Release our (one and only) handle on the memory entry.
946 * This will generate a no-senders notification, which will be processed
947 * by ipc_kobject_notify_no_senders(), which will release the one and only
948 * reference on the memory entry and cause it to be destroyed, along
949 * with the VM sub map and its pmap.
950 */
951 mach_memory_entry_port_release(shared_region->sr_mem_entry);
952 mem_entry = NULL;
953 shared_region->sr_mem_entry = IPC_PORT_NULL;
954
955 if (shared_region->sr_timer_call) {
956 thread_call_free(shared_region->sr_timer_call);
957 }
958
959 #if __has_feature(ptrauth_calls)
960 /*
961 * Free the cached copies of slide_info for the AUTH regions.
962 */
963 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
964 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
965 if (si != NULL) {
966 vm_object_deallocate(si->si_slide_object);
967 kfree_data(si->si_slide_info_entry,
968 si->si_slide_info_size);
969 kfree_type(struct vm_shared_region_slide_info, si);
970 shared_region->sr_auth_section[i] = NULL;
971 }
972 }
973 if (shared_region->sr_auth_section != NULL) {
974 assert(shared_region->sr_num_auth_section > 0);
975 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
976 shared_region->sr_auth_section = NULL;
977 shared_region->sr_num_auth_section = 0;
978 }
979 #endif /* __has_feature(ptrauth_calls) */
980
981 /* release the shared region structure... */
982 kfree_type(struct vm_shared_region, shared_region);
983
984 SHARED_REGION_TRACE_DEBUG(
985 ("shared_region: destroy(%p) <-\n",
986 (void *)VM_KERNEL_ADDRPERM(shared_region)));
987 shared_region = NULL;
988 }
989
990 /*
991 * Gets the address of the first (in time) mapping in the shared region.
992 * If used during initial task setup by dyld, task should non-NULL.
993 */
994 kern_return_t
vm_shared_region_start_address(vm_shared_region_t shared_region,mach_vm_offset_t * start_address,task_t task)995 vm_shared_region_start_address(
996 vm_shared_region_t shared_region,
997 mach_vm_offset_t *start_address,
998 task_t task)
999 {
1000 kern_return_t kr;
1001 mach_vm_offset_t sr_base_address;
1002 mach_vm_offset_t sr_first_mapping;
1003
1004 SHARED_REGION_TRACE_DEBUG(
1005 ("shared_region: -> start_address(%p)\n",
1006 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1007
1008 vm_shared_region_lock();
1009
1010 /*
1011 * Wait if there's another thread establishing a mapping
1012 * in this shared region right when we're looking at it.
1013 * We want a consistent view of the map...
1014 */
1015 while (shared_region->sr_mapping_in_progress) {
1016 /* wait for our turn... */
1017 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1018 THREAD_UNINT);
1019 }
1020 assert(!shared_region->sr_mapping_in_progress);
1021 assert(shared_region->sr_ref_count > 0);
1022
1023 sr_base_address = shared_region->sr_base_address;
1024 sr_first_mapping = shared_region->sr_first_mapping;
1025
1026 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1027 /* shared region is empty */
1028 kr = KERN_INVALID_ADDRESS;
1029 } else {
1030 kr = KERN_SUCCESS;
1031 *start_address = sr_base_address + sr_first_mapping;
1032 }
1033
1034
1035 uint32_t slide = shared_region->sr_slide;
1036
1037 vm_shared_region_unlock();
1038
1039 /*
1040 * Cache shared region info in the task for telemetry gathering, if we're
1041 * passed in the task. No task lock here as we're still in intial task set up.
1042 */
1043 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1044 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1045 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1046 (char *)&task->task_shared_region_uuid,
1047 sizeof(task->task_shared_region_uuid)) == 0) {
1048 task->task_shared_region_slide = slide;
1049 }
1050 }
1051
1052 SHARED_REGION_TRACE_DEBUG(
1053 ("shared_region: start_address(%p) <- 0x%llx\n",
1054 (void *)VM_KERNEL_ADDRPERM(shared_region),
1055 (long long)shared_region->sr_base_address));
1056
1057 return kr;
1058 }
1059
1060 /*
1061 * Look up a pre-existing mapping in shared region, for replacement.
1062 * Takes an extra object reference if found.
1063 */
1064 static kern_return_t
find_mapping_to_slide(vm_map_t map,vm_map_address_t addr,vm_map_entry_t entry)1065 find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1066 {
1067 vm_map_entry_t found;
1068
1069 /* find the shared region's map entry to slide */
1070 vm_map_lock_read(map);
1071 if (!vm_map_lookup_entry_allow_pgz(map, addr, &found)) {
1072 /* no mapping there */
1073 vm_map_unlock(map);
1074 return KERN_INVALID_ARGUMENT;
1075 }
1076
1077 *entry = *found;
1078 /* extra ref to keep object alive while map is unlocked */
1079 vm_object_reference(VME_OBJECT(found));
1080 vm_map_unlock_read(map);
1081 return KERN_SUCCESS;
1082 }
1083
1084 static bool
shared_region_make_permanent(vm_shared_region_t sr,vm_prot_t max_prot)1085 shared_region_make_permanent(
1086 vm_shared_region_t sr,
1087 vm_prot_t max_prot)
1088 {
1089 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1090 return false;
1091 }
1092 if (max_prot & VM_PROT_WRITE) {
1093 /*
1094 * Potentially writable mapping: no major issue with allowing
1095 * it to be replaced since its contents could be modified
1096 * anyway.
1097 */
1098 return false;
1099 }
1100 if (max_prot & VM_PROT_EXECUTE) {
1101 /*
1102 * Potentially executable mapping: some software might want
1103 * to try and replace it to interpose their own code when a
1104 * given routine is called or returns, for example.
1105 * So let's not make it "permanent".
1106 */
1107 return false;
1108 }
1109 /*
1110 * Make this mapping "permanent" to prevent it from being deleted
1111 * and/or replaced with another mapping.
1112 */
1113 return true;
1114 }
1115
1116 static bool
shared_region_tpro_protect(vm_shared_region_t sr,vm_prot_t max_prot __unused)1117 shared_region_tpro_protect(
1118 vm_shared_region_t sr,
1119 vm_prot_t max_prot __unused)
1120 {
1121 if (sr->sr_cpu_type != CPU_TYPE_ARM64 ||
1122 (sr->sr_cpu_subtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) {
1123 return false;
1124 }
1125
1126
1127 /*
1128 * Unless otherwise explicitly requested all other mappings do not get
1129 * TPRO protection.
1130 */
1131 return false;
1132 }
1133
1134 #if __has_feature(ptrauth_calls)
1135
1136 /*
1137 * Determine if this task is actually using pointer signing.
1138 */
1139 static boolean_t
task_sign_pointers(task_t task)1140 task_sign_pointers(task_t task)
1141 {
1142 if (task->map &&
1143 task->map->pmap &&
1144 !task->map->pmap->disable_jop) {
1145 return TRUE;
1146 }
1147 return FALSE;
1148 }
1149
1150 /*
1151 * If the shared region contains mappings that are authenticated, then
1152 * remap them into the task private map.
1153 *
1154 * Failures are possible in this routine when jetsam kills a process
1155 * just as dyld is trying to set it up. The vm_map and task shared region
1156 * info get torn down w/o waiting for this thread to finish up.
1157 */
1158 __attribute__((noinline))
1159 kern_return_t
vm_shared_region_auth_remap(vm_shared_region_t sr)1160 vm_shared_region_auth_remap(vm_shared_region_t sr)
1161 {
1162 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1163 task_t task = current_task();
1164 vm_shared_region_slide_info_t si;
1165 uint_t i;
1166 vm_object_t object;
1167 vm_map_t sr_map;
1168 struct vm_map_entry tmp_entry_store = {0};
1169 vm_map_entry_t tmp_entry = NULL;
1170 vm_map_kernel_flags_t vmk_flags;
1171 vm_map_offset_t map_addr;
1172 kern_return_t kr = KERN_SUCCESS;
1173 boolean_t use_ptr_auth = task_sign_pointers(task);
1174
1175 /*
1176 * Don't do this more than once and avoid any race conditions in finishing it.
1177 */
1178 vm_shared_region_lock();
1179 while (sr->sr_mapping_in_progress) {
1180 /* wait for our turn... */
1181 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1182 }
1183 assert(!sr->sr_mapping_in_progress);
1184 assert(sr->sr_ref_count > 0);
1185
1186 /* Just return if already done. */
1187 if (task->shared_region_auth_remapped) {
1188 vm_shared_region_unlock();
1189 return KERN_SUCCESS;
1190 }
1191
1192 /* let others know to wait while we're working in this shared region */
1193 sr->sr_mapping_in_progress = current_thread();
1194 vm_shared_region_unlock();
1195
1196 /*
1197 * Remap any sections with pointer authentications into the private map.
1198 */
1199 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1200 si = sr->sr_auth_section[i];
1201 assert(si != NULL);
1202 assert(si->si_ptrauth);
1203
1204 /*
1205 * We have mapping that needs to be private.
1206 * Look for an existing slid mapping's pager with matching
1207 * object, offset, slide info and shared_region_id to reuse.
1208 */
1209 object = si->si_slide_object;
1210 sr_pager = shared_region_pager_match(object, si->si_start, si,
1211 use_ptr_auth ? task->jop_pid : 0);
1212 if (sr_pager == MEMORY_OBJECT_NULL) {
1213 printf("%s(): shared_region_pager_match() failed\n", __func__);
1214 kr = KERN_FAILURE;
1215 goto done;
1216 }
1217
1218 /*
1219 * verify matching jop_pid for this task and this pager
1220 */
1221 if (use_ptr_auth) {
1222 shared_region_pager_match_task_key(sr_pager, task);
1223 }
1224
1225 sr_map = vm_shared_region_vm_map(sr);
1226 tmp_entry = NULL;
1227
1228 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1229 if (kr != KERN_SUCCESS) {
1230 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1231 goto done;
1232 }
1233 tmp_entry = &tmp_entry_store;
1234
1235 /*
1236 * Check that the object exactly covers the region to slide.
1237 */
1238 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1239 printf("%s(): doesn't fully cover\n", __func__);
1240 kr = KERN_FAILURE;
1241 goto done;
1242 }
1243
1244 /*
1245 * map the pager over the portion of the mapping that needs sliding
1246 */
1247 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1248 vmk_flags.vmkf_overwrite_immutable = true;
1249 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1250 tmp_entry->max_protection);
1251
1252 /* Preserve the TPRO flag if task has TPRO enabled */
1253 vmk_flags.vmf_tpro = (vm_map_tpro(task->map) &&
1254 tmp_entry->used_for_tpro &&
1255 task_get_platform_binary(task));
1256
1257 map_addr = si->si_slid_address;
1258 kr = vm_map_enter_mem_object(task->map,
1259 &map_addr,
1260 si->si_end - si->si_start,
1261 (mach_vm_offset_t) 0,
1262 vmk_flags,
1263 (ipc_port_t)(uintptr_t) sr_pager,
1264 0,
1265 TRUE,
1266 tmp_entry->protection,
1267 tmp_entry->max_protection,
1268 tmp_entry->inheritance);
1269 memory_object_deallocate(sr_pager);
1270 sr_pager = MEMORY_OBJECT_NULL;
1271 if (kr != KERN_SUCCESS) {
1272 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1273 goto done;
1274 }
1275 assertf(map_addr == si->si_slid_address,
1276 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1277 (uint64_t)map_addr,
1278 (uint64_t)si->si_slid_address,
1279 tmp_entry);
1280
1281 /* Drop the ref count grabbed by find_mapping_to_slide */
1282 vm_object_deallocate(VME_OBJECT(tmp_entry));
1283 tmp_entry = NULL;
1284 }
1285
1286 done:
1287 if (tmp_entry) {
1288 /* Drop the ref count grabbed by find_mapping_to_slide */
1289 vm_object_deallocate(VME_OBJECT(tmp_entry));
1290 tmp_entry = NULL;
1291 }
1292
1293 /*
1294 * Drop any extra reference to the pager in case we're quitting due to an error above.
1295 */
1296 if (sr_pager != MEMORY_OBJECT_NULL) {
1297 memory_object_deallocate(sr_pager);
1298 }
1299
1300 /*
1301 * Mark the region as having it's auth sections remapped.
1302 */
1303 vm_shared_region_lock();
1304 task->shared_region_auth_remapped = TRUE;
1305 assert(sr->sr_mapping_in_progress == current_thread());
1306 sr->sr_mapping_in_progress = THREAD_NULL;
1307 vm_shared_region_wakeup((event_t)&sr->sr_mapping_in_progress);
1308 vm_shared_region_unlock();
1309 return kr;
1310 }
1311 #endif /* __has_feature(ptrauth_calls) */
1312
1313 void
vm_shared_region_undo_mappings(vm_map_t sr_map,mach_vm_offset_t sr_base_address,struct _sr_file_mappings * srf_mappings,struct _sr_file_mappings * srf_mappings_current,unsigned int srf_current_mappings_count)1314 vm_shared_region_undo_mappings(
1315 vm_map_t sr_map,
1316 mach_vm_offset_t sr_base_address,
1317 struct _sr_file_mappings *srf_mappings,
1318 struct _sr_file_mappings *srf_mappings_current,
1319 unsigned int srf_current_mappings_count)
1320 {
1321 unsigned int j = 0;
1322 vm_shared_region_t shared_region = NULL;
1323 boolean_t reset_shared_region_state = FALSE;
1324 struct _sr_file_mappings *srfmp;
1325 unsigned int mappings_count;
1326 struct shared_file_mapping_slide_np *mappings;
1327
1328 shared_region = vm_shared_region_get(current_task());
1329 if (shared_region == NULL) {
1330 printf("Failed to undo mappings because of NULL shared region.\n");
1331 return;
1332 }
1333
1334 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1335
1336 if (sr_map == NULL) {
1337 ipc_port_t sr_handle;
1338 vm_named_entry_t sr_mem_entry;
1339
1340 vm_shared_region_lock();
1341 assert(shared_region->sr_ref_count > 0);
1342
1343 while (shared_region->sr_mapping_in_progress) {
1344 /* wait for our turn... */
1345 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1346 THREAD_UNINT);
1347 }
1348 assert(!shared_region->sr_mapping_in_progress);
1349 assert(shared_region->sr_ref_count > 0);
1350 /* let others know we're working in this shared region */
1351 shared_region->sr_mapping_in_progress = current_thread();
1352
1353 vm_shared_region_unlock();
1354
1355 reset_shared_region_state = TRUE;
1356
1357 /* no need to lock because this data is never modified... */
1358 sr_handle = shared_region->sr_mem_entry;
1359 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1360 sr_map = sr_mem_entry->backing.map;
1361 sr_base_address = shared_region->sr_base_address;
1362 }
1363 /*
1364 * Undo the mappings we've established so far.
1365 */
1366 for (srfmp = &srf_mappings[0];
1367 srfmp <= srf_mappings_current;
1368 srfmp++) {
1369 mappings = srfmp->mappings;
1370 mappings_count = srfmp->mappings_count;
1371 if (srfmp == srf_mappings_current) {
1372 mappings_count = srf_current_mappings_count;
1373 }
1374
1375 for (j = 0; j < mappings_count; j++) {
1376 kern_return_t kr2;
1377 mach_vm_offset_t start, end;
1378
1379 if (mappings[j].sms_size == 0) {
1380 /*
1381 * We didn't establish this
1382 * mapping, so nothing to undo.
1383 */
1384 continue;
1385 }
1386 SHARED_REGION_TRACE_INFO(
1387 ("shared_region: mapping[%d]: "
1388 "address:0x%016llx "
1389 "size:0x%016llx "
1390 "offset:0x%016llx "
1391 "maxprot:0x%x prot:0x%x: "
1392 "undoing...\n",
1393 j,
1394 (long long)mappings[j].sms_address,
1395 (long long)mappings[j].sms_size,
1396 (long long)mappings[j].sms_file_offset,
1397 mappings[j].sms_max_prot,
1398 mappings[j].sms_init_prot));
1399 start = (mappings[j].sms_address - sr_base_address);
1400 end = start + mappings[j].sms_size;
1401 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1402 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1403 kr2 = vm_map_remove_guard(sr_map,
1404 start,
1405 end,
1406 VM_MAP_REMOVE_IMMUTABLE,
1407 KMEM_GUARD_NONE).kmr_return;
1408 assert(kr2 == KERN_SUCCESS);
1409 }
1410 }
1411
1412 if (reset_shared_region_state) {
1413 vm_shared_region_lock();
1414 assert(shared_region->sr_ref_count > 0);
1415 assert(shared_region->sr_mapping_in_progress == current_thread());
1416 /* we're done working on that shared region */
1417 shared_region->sr_mapping_in_progress = THREAD_NULL;
1418 vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1419 vm_shared_region_unlock();
1420 reset_shared_region_state = FALSE;
1421 }
1422
1423 vm_shared_region_deallocate(shared_region);
1424 }
1425
1426 /*
1427 * First part of vm_shared_region_map_file(). Split out to
1428 * avoid kernel stack overflow.
1429 */
1430 __attribute__((noinline))
1431 static kern_return_t
vm_shared_region_map_file_setup(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings,unsigned int * mappings_to_slide_cnt,struct shared_file_mapping_slide_np ** mappings_to_slide,mach_vm_offset_t * slid_mappings,memory_object_control_t * slid_file_controls,mach_vm_offset_t * sfm_min_address,mach_vm_offset_t * sfm_max_address,vm_map_t * sr_map_ptr,vm_map_offset_t * lowest_unnestable_addr_ptr,unsigned int vmsr_num_slides)1432 vm_shared_region_map_file_setup(
1433 vm_shared_region_t shared_region,
1434 int sr_file_mappings_count,
1435 struct _sr_file_mappings *sr_file_mappings,
1436 unsigned int *mappings_to_slide_cnt,
1437 struct shared_file_mapping_slide_np **mappings_to_slide,
1438 mach_vm_offset_t *slid_mappings,
1439 memory_object_control_t *slid_file_controls,
1440 mach_vm_offset_t *sfm_min_address,
1441 mach_vm_offset_t *sfm_max_address,
1442 vm_map_t *sr_map_ptr,
1443 vm_map_offset_t *lowest_unnestable_addr_ptr,
1444 unsigned int vmsr_num_slides)
1445 {
1446 kern_return_t kr = KERN_SUCCESS;
1447 memory_object_control_t file_control;
1448 vm_object_t file_object;
1449 ipc_port_t sr_handle;
1450 vm_named_entry_t sr_mem_entry;
1451 vm_map_t sr_map;
1452 mach_vm_offset_t sr_base_address;
1453 unsigned int i = 0;
1454 mach_port_t map_port;
1455 vm_map_offset_t target_address;
1456 vm_object_t object;
1457 vm_object_size_t obj_size;
1458 vm_map_offset_t lowest_unnestable_addr = 0;
1459 vm_map_kernel_flags_t vmk_flags;
1460 mach_vm_offset_t sfm_end;
1461 uint32_t mappings_count;
1462 struct shared_file_mapping_slide_np *mappings;
1463 struct _sr_file_mappings *srfmp;
1464
1465 vm_shared_region_lock();
1466 assert(shared_region->sr_ref_count > 0);
1467
1468 /*
1469 * Make sure we handle only one mapping at a time in a given
1470 * shared region, to avoid race conditions. This should not
1471 * happen frequently...
1472 */
1473 while (shared_region->sr_mapping_in_progress) {
1474 /* wait for our turn... */
1475 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1476 THREAD_UNINT);
1477 }
1478 assert(!shared_region->sr_mapping_in_progress);
1479 assert(shared_region->sr_ref_count > 0);
1480
1481
1482 /* let others know we're working in this shared region */
1483 shared_region->sr_mapping_in_progress = current_thread();
1484
1485 /*
1486 * Did someone race in and map this shared region already?
1487 */
1488 if (shared_region->sr_first_mapping != -1) {
1489 vm_shared_region_unlock();
1490 #if DEVELOPMENT || DEBUG
1491 printf("shared_region: caught race in map and slide\n");
1492 #endif /* DEVELOPMENT || DEBUG */
1493 return KERN_FAILURE;
1494 }
1495
1496 vm_shared_region_unlock();
1497
1498 /* no need to lock because this data is never modified... */
1499 sr_handle = shared_region->sr_mem_entry;
1500 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
1501 sr_map = sr_mem_entry->backing.map;
1502 sr_base_address = shared_region->sr_base_address;
1503
1504 SHARED_REGION_TRACE_DEBUG(
1505 ("shared_region: -> map(%p)\n",
1506 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1507
1508 mappings_count = 0;
1509 mappings = NULL;
1510 srfmp = NULL;
1511
1512 /* process all the files to be mapped */
1513 for (srfmp = &sr_file_mappings[0];
1514 srfmp < &sr_file_mappings[sr_file_mappings_count];
1515 srfmp++) {
1516 mappings_count = srfmp->mappings_count;
1517 mappings = srfmp->mappings;
1518 file_control = srfmp->file_control;
1519
1520 if (mappings_count == 0) {
1521 /* no mappings here... */
1522 continue;
1523 }
1524
1525 /*
1526 * The code below can only correctly "slide" (perform relocations) for one
1527 * value of the slide amount. So if a file has a non-zero slide, it has to
1528 * match any previous value. A zero slide value is ok for things that are
1529 * just directly mapped.
1530 */
1531 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1532 shared_region->sr_slide = srfmp->slide;
1533 } else if (shared_region->sr_slide != 0 &&
1534 srfmp->slide != 0 &&
1535 shared_region->sr_slide != srfmp->slide) {
1536 SHARED_REGION_TRACE_ERROR(
1537 ("shared_region: more than 1 non-zero slide value amount "
1538 "slide 1:0x%x slide 2:0x%x\n ",
1539 shared_region->sr_slide, srfmp->slide));
1540 kr = KERN_INVALID_ARGUMENT;
1541 break;
1542 }
1543
1544 #if __arm64__
1545 if ((shared_region->sr_64bit ||
1546 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1547 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1548 printf("FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1549 __FUNCTION__, srfmp->slide);
1550 kr = KERN_INVALID_ARGUMENT;
1551 break;
1552 }
1553 #endif /* __arm64__ */
1554
1555 /*
1556 * An FD of -1 means we need to copyin the data to an anonymous object.
1557 */
1558 if (srfmp->fd == -1) {
1559 assert(mappings_count == 1);
1560 SHARED_REGION_TRACE_INFO(
1561 ("shared_region: mapping[0]: "
1562 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1563 "maxprot:0x%x prot:0x%x fd==-1\n",
1564 (long long)mappings[0].sms_address,
1565 (long long)mappings[0].sms_size,
1566 (long long)mappings[0].sms_file_offset,
1567 mappings[0].sms_max_prot,
1568 mappings[0].sms_init_prot));
1569
1570 /*
1571 * We need an anon object to hold the data in the shared region.
1572 * The size needs to be suitable to map into kernel.
1573 */
1574 obj_size = vm_object_round_page(mappings->sms_size);
1575 object = vm_object_allocate(obj_size);
1576 if (object == VM_OBJECT_NULL) {
1577 printf("%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1578 kr = KERN_RESOURCE_SHORTAGE;
1579 break;
1580 }
1581
1582 /*
1583 * map the object into the kernel
1584 */
1585 vm_map_offset_t kaddr = 0;
1586 vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1587 vmk_flags.vmkf_no_copy_on_read = 1;
1588 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1589
1590 kr = vm_map_enter(kernel_map,
1591 &kaddr,
1592 obj_size,
1593 0,
1594 vmk_flags,
1595 object,
1596 0,
1597 FALSE,
1598 (VM_PROT_READ | VM_PROT_WRITE),
1599 (VM_PROT_READ | VM_PROT_WRITE),
1600 VM_INHERIT_NONE);
1601 if (kr != KERN_SUCCESS) {
1602 printf("%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1603 vm_object_deallocate(object);
1604 object = VM_OBJECT_NULL;
1605 break;
1606 }
1607
1608 /*
1609 * We'll need another reference to keep the object alive after
1610 * we vm_map_remove() it from the kernel.
1611 */
1612 vm_object_reference(object);
1613
1614 /*
1615 * Zero out the object's pages, so we can't leak data.
1616 */
1617 bzero((void *)kaddr, obj_size);
1618
1619 /*
1620 * Copyin the data from dyld to the new object.
1621 * Then remove the kernel mapping.
1622 */
1623 int copyin_err =
1624 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1625 vm_map_remove(kernel_map, kaddr, kaddr + obj_size);
1626 if (copyin_err) {
1627 printf("%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1628 switch (copyin_err) {
1629 case EPERM:
1630 case EACCES:
1631 kr = KERN_PROTECTION_FAILURE;
1632 break;
1633 case EFAULT:
1634 kr = KERN_INVALID_ADDRESS;
1635 break;
1636 default:
1637 kr = KERN_FAILURE;
1638 break;
1639 }
1640 vm_object_deallocate(object);
1641 object = VM_OBJECT_NULL;
1642 break;
1643 }
1644
1645 /*
1646 * Finally map the object into the shared region.
1647 */
1648 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1649 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1650 vmk_flags.vmkf_already = TRUE;
1651 vmk_flags.vmkf_no_copy_on_read = 1;
1652 vmk_flags.vmf_permanent = shared_region_make_permanent(shared_region,
1653 mappings[0].sms_max_prot);
1654
1655 kr = vm_map_enter(
1656 sr_map,
1657 &target_address,
1658 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1659 0,
1660 vmk_flags,
1661 object,
1662 0,
1663 TRUE,
1664 mappings[0].sms_init_prot & VM_PROT_ALL,
1665 mappings[0].sms_max_prot & VM_PROT_ALL,
1666 VM_INHERIT_DEFAULT);
1667 if (kr != KERN_SUCCESS) {
1668 printf("%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1669 vm_object_deallocate(object);
1670 break;
1671 }
1672
1673 if (mappings[0].sms_address < *sfm_min_address) {
1674 *sfm_min_address = mappings[0].sms_address;
1675 }
1676
1677 if (os_add_overflow(mappings[0].sms_address,
1678 mappings[0].sms_size,
1679 &sfm_end) ||
1680 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1681 mappings[0].sms_address)) {
1682 /* overflow */
1683 kr = KERN_INVALID_ARGUMENT;
1684 break;
1685 }
1686
1687 if (sfm_end > *sfm_max_address) {
1688 *sfm_max_address = sfm_end;
1689 }
1690
1691 continue;
1692 }
1693
1694 /* get the VM object associated with the file to be mapped */
1695 file_object = memory_object_control_to_vm_object(file_control);
1696 assert(file_object);
1697
1698 if (!file_object->object_is_shared_cache) {
1699 vm_object_lock(file_object);
1700 file_object->object_is_shared_cache = true;
1701 vm_object_unlock(file_object);
1702 }
1703
1704 #if CONFIG_SECLUDED_MEMORY
1705 /*
1706 * Camera will need the shared cache, so don't put the pages
1707 * on the secluded queue, assume that's the primary region.
1708 * Also keep DEXT shared cache pages off secluded.
1709 */
1710 if (primary_system_shared_region == NULL ||
1711 primary_system_shared_region == shared_region ||
1712 shared_region->sr_driverkit) {
1713 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1714 }
1715 #endif /* CONFIG_SECLUDED_MEMORY */
1716
1717 /* establish the mappings for that file */
1718 for (i = 0; i < mappings_count; i++) {
1719 SHARED_REGION_TRACE_INFO(
1720 ("shared_region: mapping[%d]: "
1721 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1722 "maxprot:0x%x prot:0x%x\n",
1723 i,
1724 (long long)mappings[i].sms_address,
1725 (long long)mappings[i].sms_size,
1726 (long long)mappings[i].sms_file_offset,
1727 mappings[i].sms_max_prot,
1728 mappings[i].sms_init_prot));
1729
1730 if (mappings[i].sms_address < *sfm_min_address) {
1731 *sfm_min_address = mappings[i].sms_address;
1732 }
1733
1734 if (os_add_overflow(mappings[i].sms_address,
1735 mappings[i].sms_size,
1736 &sfm_end) ||
1737 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1738 mappings[i].sms_address)) {
1739 /* overflow */
1740 kr = KERN_INVALID_ARGUMENT;
1741 break;
1742 }
1743
1744 if (sfm_end > *sfm_max_address) {
1745 *sfm_max_address = sfm_end;
1746 }
1747
1748 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1749 /* zero-filled memory */
1750 map_port = MACH_PORT_NULL;
1751 } else {
1752 /* file-backed memory */
1753 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1754 }
1755
1756 /*
1757 * Remember which mappings need sliding.
1758 */
1759 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1760 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1761 SHARED_REGION_TRACE_INFO(
1762 ("shared_region: mapping[%d]: "
1763 "address:0x%016llx size:0x%016llx "
1764 "offset:0x%016llx "
1765 "maxprot:0x%x prot:0x%x "
1766 "too many mappings to slide...\n",
1767 i,
1768 (long long)mappings[i].sms_address,
1769 (long long)mappings[i].sms_size,
1770 (long long)mappings[i].sms_file_offset,
1771 mappings[i].sms_max_prot,
1772 mappings[i].sms_init_prot));
1773 } else {
1774 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1775 *mappings_to_slide_cnt += 1;
1776 }
1777 }
1778
1779 /* mapping's address is relative to the shared region base */
1780 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1781
1782 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1783 vmk_flags.vmkf_already = TRUE;
1784 /* no copy-on-read for mapped binaries */
1785 vmk_flags.vmkf_no_copy_on_read = 1;
1786 vmk_flags.vmf_permanent = shared_region_make_permanent(
1787 shared_region,
1788 mappings[i].sms_max_prot);
1789 vmk_flags.vmf_tpro = shared_region_tpro_protect(
1790 shared_region,
1791 mappings[i].sms_max_prot);
1792
1793 /* establish that mapping, OK if it's "already" there */
1794 if (map_port == MACH_PORT_NULL) {
1795 /*
1796 * We want to map some anonymous memory in a shared region.
1797 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1798 */
1799 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1800 object = vm_object_allocate(obj_size);
1801 if (object == VM_OBJECT_NULL) {
1802 kr = KERN_RESOURCE_SHORTAGE;
1803 } else {
1804 kr = vm_map_enter(
1805 sr_map,
1806 &target_address,
1807 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1808 0,
1809 vmk_flags,
1810 object,
1811 0,
1812 TRUE,
1813 mappings[i].sms_init_prot & VM_PROT_ALL,
1814 mappings[i].sms_max_prot & VM_PROT_ALL,
1815 VM_INHERIT_DEFAULT);
1816 }
1817 } else {
1818 object = VM_OBJECT_NULL; /* no anonymous memory here */
1819 kr = vm_map_enter_mem_object(
1820 sr_map,
1821 &target_address,
1822 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1823 0,
1824 vmk_flags,
1825 map_port,
1826 mappings[i].sms_file_offset,
1827 TRUE,
1828 mappings[i].sms_init_prot & VM_PROT_ALL,
1829 mappings[i].sms_max_prot & VM_PROT_ALL,
1830 VM_INHERIT_DEFAULT);
1831 }
1832
1833 if (kr == KERN_SUCCESS) {
1834 /*
1835 * Record the first successful mapping(s) in the shared
1836 * region by file. We're protected by "sr_mapping_in_progress"
1837 * here, so no need to lock "shared_region".
1838 *
1839 * Note that if we have an AOT shared cache (ARM) for a
1840 * translated task, then it's always the first file.
1841 * The original "native" (i.e. x86) shared cache is the
1842 * second file.
1843 */
1844
1845 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1846 shared_region->sr_first_mapping = target_address;
1847 }
1848
1849 if (*mappings_to_slide_cnt > 0 &&
1850 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1851 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1852 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1853 }
1854
1855 /*
1856 * Record the lowest writable address in this
1857 * sub map, to log any unexpected unnesting below
1858 * that address (see log_unnest_badness()).
1859 */
1860 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1861 sr_map->is_nested_map &&
1862 (lowest_unnestable_addr == 0 ||
1863 (target_address < lowest_unnestable_addr))) {
1864 lowest_unnestable_addr = target_address;
1865 }
1866 } else {
1867 if (map_port == MACH_PORT_NULL) {
1868 /*
1869 * Get rid of the VM object we just created
1870 * but failed to map.
1871 */
1872 vm_object_deallocate(object);
1873 object = VM_OBJECT_NULL;
1874 }
1875 if (kr == KERN_MEMORY_PRESENT) {
1876 /*
1877 * This exact mapping was already there:
1878 * that's fine.
1879 */
1880 SHARED_REGION_TRACE_INFO(
1881 ("shared_region: mapping[%d]: "
1882 "address:0x%016llx size:0x%016llx "
1883 "offset:0x%016llx "
1884 "maxprot:0x%x prot:0x%x "
1885 "already mapped...\n",
1886 i,
1887 (long long)mappings[i].sms_address,
1888 (long long)mappings[i].sms_size,
1889 (long long)mappings[i].sms_file_offset,
1890 mappings[i].sms_max_prot,
1891 mappings[i].sms_init_prot));
1892 /*
1893 * We didn't establish this mapping ourselves;
1894 * let's reset its size, so that we do not
1895 * attempt to undo it if an error occurs later.
1896 */
1897 mappings[i].sms_size = 0;
1898 kr = KERN_SUCCESS;
1899 } else {
1900 break;
1901 }
1902 }
1903 }
1904
1905 if (kr != KERN_SUCCESS) {
1906 break;
1907 }
1908 }
1909
1910 if (kr != KERN_SUCCESS) {
1911 /* the last mapping we tried (mappings[i]) failed ! */
1912 assert(i < mappings_count);
1913 SHARED_REGION_TRACE_ERROR(
1914 ("shared_region: mapping[%d]: "
1915 "address:0x%016llx size:0x%016llx "
1916 "offset:0x%016llx "
1917 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1918 i,
1919 (long long)mappings[i].sms_address,
1920 (long long)mappings[i].sms_size,
1921 (long long)mappings[i].sms_file_offset,
1922 mappings[i].sms_max_prot,
1923 mappings[i].sms_init_prot,
1924 kr));
1925
1926 /*
1927 * Respect the design of vm_shared_region_undo_mappings
1928 * as we are holding the sr_mapping_in_progress here.
1929 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1930 * will be blocked at waiting sr_mapping_in_progress to be NULL.
1931 */
1932 assert(sr_map != NULL);
1933 /* undo all the previous mappings */
1934 vm_shared_region_undo_mappings(sr_map, sr_base_address, sr_file_mappings, srfmp, i);
1935 return kr;
1936 }
1937
1938 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1939 *sr_map_ptr = sr_map;
1940 return KERN_SUCCESS;
1941 }
1942
1943 /* forwared declaration */
1944 __attribute__((noinline))
1945 static void
1946 vm_shared_region_map_file_final(
1947 vm_shared_region_t shared_region,
1948 vm_map_t sr_map,
1949 mach_vm_offset_t sfm_min_address,
1950 mach_vm_offset_t sfm_max_address);
1951
1952 /*
1953 * Establish some mappings of a file in the shared region.
1954 * This is used by "dyld" via the shared_region_map_np() system call
1955 * to populate the shared region with the appropriate shared cache.
1956 *
1957 * One could also call it several times to incrementally load several
1958 * libraries, as long as they do not overlap.
1959 * It will return KERN_SUCCESS if the mappings were successfully established
1960 * or if they were already established identically by another process.
1961 */
1962 __attribute__((noinline))
1963 kern_return_t
vm_shared_region_map_file(vm_shared_region_t shared_region,int sr_file_mappings_count,struct _sr_file_mappings * sr_file_mappings)1964 vm_shared_region_map_file(
1965 vm_shared_region_t shared_region,
1966 int sr_file_mappings_count,
1967 struct _sr_file_mappings *sr_file_mappings)
1968 {
1969 kern_return_t kr = KERN_SUCCESS;
1970 unsigned int i;
1971 unsigned int mappings_to_slide_cnt = 0;
1972 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1973 mach_vm_offset_t sfm_max_address = 0;
1974 vm_map_t sr_map = NULL;
1975 vm_map_offset_t lowest_unnestable_addr = 0;
1976 unsigned int vmsr_num_slides = 0;
1977 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1978 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1979 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1980 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1981 struct _sr_file_mappings *srfmp;
1982
1983 /*
1984 * Figure out how many of the mappings have slides.
1985 */
1986 for (srfmp = &sr_file_mappings[0];
1987 srfmp < &sr_file_mappings[sr_file_mappings_count];
1988 srfmp++) {
1989 for (i = 0; i < srfmp->mappings_count; ++i) {
1990 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1991 ++vmsr_num_slides;
1992 }
1993 }
1994 }
1995
1996 /* Allocate per slide data structures */
1997 if (vmsr_num_slides > 0) {
1998 slid_mappings =
1999 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
2000 slid_file_controls =
2001 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
2002 mappings_to_slide =
2003 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
2004 }
2005
2006 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
2007 &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
2008 &sfm_min_address, &sfm_max_address, &sr_map, &lowest_unnestable_addr, vmsr_num_slides);
2009 if (kr != KERN_SUCCESS) {
2010 vm_shared_region_lock();
2011 goto done;
2012 }
2013 assert(vmsr_num_slides == mappings_to_slide_cnt);
2014
2015 /*
2016 * The call above installed direct mappings to the shared cache file.
2017 * Now we go back and overwrite the mappings that need relocation
2018 * with a special shared region pager.
2019 *
2020 * Note that this does copyin() of data, needed by the pager, which
2021 * the previous code just established mappings for. This is why we
2022 * do it in a separate pass.
2023 */
2024 #if __has_feature(ptrauth_calls)
2025 /*
2026 * need to allocate storage needed for any sr_auth_sections
2027 */
2028 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2029 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2030 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2031 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2032 ++shared_region->sr_num_auth_section;
2033 }
2034 }
2035 if (shared_region->sr_num_auth_section > 0) {
2036 shared_region->sr_auth_section =
2037 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2038 Z_WAITOK | Z_ZERO);
2039 }
2040 #endif /* __has_feature(ptrauth_calls) */
2041 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2042 kr = vm_shared_region_slide(shared_region->sr_slide,
2043 mappings_to_slide[i]->sms_file_offset,
2044 mappings_to_slide[i]->sms_size,
2045 mappings_to_slide[i]->sms_slide_start,
2046 mappings_to_slide[i]->sms_slide_size,
2047 slid_mappings[i],
2048 slid_file_controls[i],
2049 mappings_to_slide[i]->sms_max_prot);
2050 if (kr != KERN_SUCCESS) {
2051 SHARED_REGION_TRACE_ERROR(
2052 ("shared_region: region_slide("
2053 "slide:0x%x start:0x%016llx "
2054 "size:0x%016llx) failed 0x%x\n",
2055 shared_region->sr_slide,
2056 (long long)mappings_to_slide[i]->sms_slide_start,
2057 (long long)mappings_to_slide[i]->sms_slide_size,
2058 kr));
2059 vm_shared_region_undo_mappings(sr_map, shared_region->sr_base_address,
2060 &sr_file_mappings[0],
2061 &sr_file_mappings[sr_file_mappings_count - 1],
2062 sr_file_mappings_count);
2063 vm_shared_region_lock();
2064 goto done;
2065 }
2066 }
2067
2068 assert(kr == KERN_SUCCESS);
2069
2070 /* adjust the map's "lowest_unnestable_start" */
2071 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(sr_map->pmap) - 1);
2072 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2073 vm_map_lock(sr_map);
2074 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2075 vm_map_unlock(sr_map);
2076 }
2077
2078 vm_shared_region_lock();
2079 assert(shared_region->sr_ref_count > 0);
2080 assert(shared_region->sr_mapping_in_progress == current_thread());
2081
2082 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2083
2084 done:
2085 /*
2086 * We're done working on that shared region.
2087 * Wake up any waiting threads.
2088 */
2089 assert(shared_region->sr_mapping_in_progress == current_thread());
2090 shared_region->sr_mapping_in_progress = THREAD_NULL;
2091 vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2092 vm_shared_region_unlock();
2093
2094 #if __has_feature(ptrauth_calls)
2095 if (kr == KERN_SUCCESS) {
2096 /*
2097 * Since authenticated mappings were just added to the shared region,
2098 * go back and remap them into private mappings for this task.
2099 */
2100 kr = vm_shared_region_auth_remap(shared_region);
2101 }
2102 #endif /* __has_feature(ptrauth_calls) */
2103
2104 /* Cache shared region info needed for telemetry in the task */
2105 task_t task;
2106 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2107 mach_vm_offset_t start_address;
2108 (void)vm_shared_region_start_address(shared_region, &start_address, task);
2109 }
2110
2111 SHARED_REGION_TRACE_DEBUG(
2112 ("shared_region: map(%p) <- 0x%x \n",
2113 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2114 if (vmsr_num_slides > 0) {
2115 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2116 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2117 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2118 mappings_to_slide);
2119 }
2120 return kr;
2121 }
2122
2123 /*
2124 * Final part of vm_shared_region_map_file().
2125 * Kept in separate function to avoid blowing out the stack.
2126 */
2127 __attribute__((noinline))
2128 static void
vm_shared_region_map_file_final(vm_shared_region_t shared_region,vm_map_t sr_map __unused,mach_vm_offset_t sfm_min_address __unused,mach_vm_offset_t sfm_max_address __unused)2129 vm_shared_region_map_file_final(
2130 vm_shared_region_t shared_region,
2131 vm_map_t sr_map __unused,
2132 mach_vm_offset_t sfm_min_address __unused,
2133 mach_vm_offset_t sfm_max_address __unused)
2134 {
2135 struct _dyld_cache_header sr_cache_header;
2136 int error;
2137 size_t image_array_length;
2138 struct _dyld_cache_image_text_info *sr_image_layout;
2139 boolean_t locally_built = FALSE;
2140
2141
2142 /*
2143 * copy in the shared region UUID to the shared region structure.
2144 * we do this indirectly by first copying in the shared cache header
2145 * and then copying the UUID from there because we'll need to look
2146 * at other content from the shared cache header.
2147 */
2148 if (!shared_region->sr_uuid_copied) {
2149 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2150 (char *)&sr_cache_header,
2151 sizeof(sr_cache_header));
2152 if (error == 0) {
2153 memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
2154 shared_region->sr_uuid_copied = TRUE;
2155 locally_built = sr_cache_header.locallyBuiltCache;
2156 } else {
2157 #if DEVELOPMENT || DEBUG
2158 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2159 "offset:0 size:0x%016llx) failed with %d\n",
2160 (long long)shared_region->sr_base_address,
2161 (long long)shared_region->sr_first_mapping,
2162 (long long)sizeof(sr_cache_header),
2163 error);
2164 #endif /* DEVELOPMENT || DEBUG */
2165 shared_region->sr_uuid_copied = FALSE;
2166 }
2167 }
2168
2169 /*
2170 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2171 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2172 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2173 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2174 * let the most recent one win.
2175 *
2176 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2177 */
2178 bool is_init_task = (task_pid(current_task()) == 1);
2179 if (shared_region->sr_uuid_copied && is_init_task) {
2180 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2181 if (locally_built) {
2182 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2183 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2184 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2185 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2186 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2187 if (error == 0) {
2188 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2189 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2190 }
2191 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2192 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2193 memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
2194 sizeof(shared_region->sr_images[index].imageUUID));
2195 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2196 }
2197
2198 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2199 } else {
2200 #if DEVELOPMENT || DEBUG
2201 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2202 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2203 (long long)shared_region->sr_base_address,
2204 (long long)shared_region->sr_first_mapping,
2205 (long long)sr_cache_header.imagesTextOffset,
2206 (long long)image_array_length,
2207 error);
2208 #endif /* DEVELOPMENT || DEBUG */
2209 }
2210 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2211 kfree_data(sr_image_layout, image_array_length);
2212 sr_image_layout = NULL;
2213 }
2214 primary_system_shared_region = shared_region;
2215 }
2216
2217 /*
2218 * If we succeeded, we know the bounds of the shared region.
2219 * Trim our pmaps to only cover this range (if applicable to
2220 * this platform).
2221 */
2222 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(sr_map)) {
2223 pmap_trim(current_map()->pmap, sr_map->pmap, sfm_min_address, sfm_max_address - sfm_min_address);
2224 }
2225 }
2226
2227 /*
2228 * Retrieve a task's shared region and grab an extra reference to
2229 * make sure it doesn't disappear while the caller is using it.
2230 * The caller is responsible for consuming that extra reference if
2231 * necessary.
2232 *
2233 * This also tries to trim the pmap for the shared region.
2234 */
2235 vm_shared_region_t
vm_shared_region_trim_and_get(task_t task)2236 vm_shared_region_trim_and_get(task_t task)
2237 {
2238 vm_shared_region_t shared_region;
2239 ipc_port_t sr_handle;
2240 vm_named_entry_t sr_mem_entry;
2241 vm_map_t sr_map;
2242
2243 /* Get the shared region and the map. */
2244 shared_region = vm_shared_region_get(task);
2245 if (shared_region == NULL) {
2246 return NULL;
2247 }
2248
2249 sr_handle = shared_region->sr_mem_entry;
2250 sr_mem_entry = mach_memory_entry_from_port(sr_handle);
2251 sr_map = sr_mem_entry->backing.map;
2252
2253 /* Trim the pmap if possible. */
2254 if (VM_MAP_PAGE_SHIFT(task->map) == VM_MAP_PAGE_SHIFT(sr_map)) {
2255 pmap_trim(task->map->pmap, sr_map->pmap, 0, 0);
2256 }
2257
2258 return shared_region;
2259 }
2260
2261 /*
2262 * Enter the appropriate shared region into "map" for "task".
2263 * This involves looking up the shared region (and possibly creating a new
2264 * one) for the desired environment, then mapping the VM sub map into the
2265 * task's VM "map", with the appropriate level of pmap-nesting.
2266 */
2267 kern_return_t
vm_shared_region_enter(struct _vm_map * map,struct task * task,boolean_t is_64bit,void * fsroot,cpu_type_t cpu,cpu_subtype_t cpu_subtype,boolean_t reslide,boolean_t is_driverkit,uint32_t rsr_version)2268 vm_shared_region_enter(
2269 struct _vm_map *map,
2270 struct task *task,
2271 boolean_t is_64bit,
2272 void *fsroot,
2273 cpu_type_t cpu,
2274 cpu_subtype_t cpu_subtype,
2275 boolean_t reslide,
2276 boolean_t is_driverkit,
2277 uint32_t rsr_version)
2278 {
2279 kern_return_t kr;
2280 vm_shared_region_t shared_region;
2281 vm_map_offset_t sr_address, sr_offset, target_address;
2282 vm_map_size_t sr_size, mapping_size;
2283 vm_map_offset_t sr_pmap_nesting_start;
2284 vm_map_size_t sr_pmap_nesting_size;
2285 ipc_port_t sr_handle;
2286 vm_prot_t cur_prot, max_prot;
2287 vm_map_kernel_flags_t vmk_flags;
2288
2289 SHARED_REGION_TRACE_DEBUG(
2290 ("shared_region: -> "
2291 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2292 (void *)VM_KERNEL_ADDRPERM(map),
2293 (void *)VM_KERNEL_ADDRPERM(task),
2294 (void *)VM_KERNEL_ADDRPERM(fsroot),
2295 cpu, cpu_subtype, is_64bit, is_driverkit));
2296
2297 /* lookup (create if needed) the shared region for this environment */
2298 shared_region = vm_shared_region_lookup(fsroot, cpu, cpu_subtype, is_64bit, VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2299 if (shared_region == NULL) {
2300 /* this should not happen ! */
2301 SHARED_REGION_TRACE_ERROR(
2302 ("shared_region: -> "
2303 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2304 "lookup failed !\n",
2305 (void *)VM_KERNEL_ADDRPERM(map),
2306 (void *)VM_KERNEL_ADDRPERM(task),
2307 (void *)VM_KERNEL_ADDRPERM(fsroot),
2308 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2309 //panic("shared_region_enter: lookup failed");
2310 return KERN_FAILURE;
2311 }
2312
2313 kr = KERN_SUCCESS;
2314 /* no need to lock since this data is never modified */
2315 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2316 sr_size = (vm_map_size_t)shared_region->sr_size;
2317 sr_handle = shared_region->sr_mem_entry;
2318 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2319 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2320 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2321
2322 cur_prot = VM_PROT_READ;
2323 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2324 /*
2325 * XXX BINARY COMPATIBILITY
2326 * java6 apparently needs to modify some code in the
2327 * dyld shared cache and needs to be allowed to add
2328 * write access...
2329 */
2330 max_prot = VM_PROT_ALL;
2331 } else {
2332 max_prot = VM_PROT_READ;
2333 /* make it "permanent" to protect against re-mappings */
2334 vmk_flags.vmf_permanent = true;
2335 }
2336
2337 /*
2338 * Start mapping the shared region's VM sub map into the task's VM map.
2339 */
2340 sr_offset = 0;
2341
2342 if (sr_pmap_nesting_start > sr_address) {
2343 /* we need to map a range without pmap-nesting first */
2344 target_address = sr_address;
2345 mapping_size = sr_pmap_nesting_start - sr_address;
2346 kr = vm_map_enter_mem_object(
2347 map,
2348 &target_address,
2349 mapping_size,
2350 0,
2351 vmk_flags,
2352 sr_handle,
2353 sr_offset,
2354 TRUE,
2355 cur_prot,
2356 max_prot,
2357 VM_INHERIT_SHARE);
2358 if (kr != KERN_SUCCESS) {
2359 SHARED_REGION_TRACE_ERROR(
2360 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2361 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2362 (void *)VM_KERNEL_ADDRPERM(map),
2363 (void *)VM_KERNEL_ADDRPERM(task),
2364 (void *)VM_KERNEL_ADDRPERM(fsroot),
2365 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2366 (long long)target_address,
2367 (long long)mapping_size,
2368 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2369 goto done;
2370 }
2371 SHARED_REGION_TRACE_DEBUG(
2372 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2373 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2374 (void *)VM_KERNEL_ADDRPERM(map),
2375 (void *)VM_KERNEL_ADDRPERM(task),
2376 (void *)VM_KERNEL_ADDRPERM(fsroot),
2377 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2378 (long long)target_address, (long long)mapping_size,
2379 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2380 sr_offset += mapping_size;
2381 sr_size -= mapping_size;
2382 }
2383
2384 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2385 vmk_flags.vmkf_nested_pmap = true;
2386 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2387
2388 /*
2389 * Use pmap-nesting to map the majority of the shared region into the task's
2390 * VM space. Very rarely will architectures have a shared region that isn't
2391 * the same size as the pmap-nesting region, or start at a different address
2392 * than the pmap-nesting region, so this code will map the entirety of the
2393 * shared region for most architectures.
2394 */
2395 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2396 target_address = sr_pmap_nesting_start;
2397 kr = vm_map_enter_mem_object(
2398 map,
2399 &target_address,
2400 sr_pmap_nesting_size,
2401 0,
2402 vmk_flags,
2403 sr_handle,
2404 sr_offset,
2405 TRUE,
2406 cur_prot,
2407 max_prot,
2408 VM_INHERIT_SHARE);
2409 if (kr != KERN_SUCCESS) {
2410 SHARED_REGION_TRACE_ERROR(
2411 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2412 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2413 (void *)VM_KERNEL_ADDRPERM(map),
2414 (void *)VM_KERNEL_ADDRPERM(task),
2415 (void *)VM_KERNEL_ADDRPERM(fsroot),
2416 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2417 (long long)target_address,
2418 (long long)sr_pmap_nesting_size,
2419 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2420 goto done;
2421 }
2422 SHARED_REGION_TRACE_DEBUG(
2423 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2424 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2425 (void *)VM_KERNEL_ADDRPERM(map),
2426 (void *)VM_KERNEL_ADDRPERM(task),
2427 (void *)VM_KERNEL_ADDRPERM(fsroot),
2428 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2429 (long long)target_address, (long long)sr_pmap_nesting_size,
2430 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2431
2432 sr_offset += sr_pmap_nesting_size;
2433 sr_size -= sr_pmap_nesting_size;
2434
2435 if (sr_size > 0) {
2436 /* and there's some left to be mapped without pmap-nesting */
2437 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2438 target_address = sr_address + sr_offset;
2439 mapping_size = sr_size;
2440 kr = vm_map_enter_mem_object(
2441 map,
2442 &target_address,
2443 mapping_size,
2444 0,
2445 VM_MAP_KERNEL_FLAGS_FIXED(),
2446 sr_handle,
2447 sr_offset,
2448 TRUE,
2449 cur_prot,
2450 max_prot,
2451 VM_INHERIT_SHARE);
2452 if (kr != KERN_SUCCESS) {
2453 SHARED_REGION_TRACE_ERROR(
2454 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2455 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2456 (void *)VM_KERNEL_ADDRPERM(map),
2457 (void *)VM_KERNEL_ADDRPERM(task),
2458 (void *)VM_KERNEL_ADDRPERM(fsroot),
2459 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2460 (long long)target_address,
2461 (long long)mapping_size,
2462 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2463 goto done;
2464 }
2465 SHARED_REGION_TRACE_DEBUG(
2466 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2467 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2468 (void *)VM_KERNEL_ADDRPERM(map),
2469 (void *)VM_KERNEL_ADDRPERM(task),
2470 (void *)VM_KERNEL_ADDRPERM(fsroot),
2471 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2472 (long long)target_address, (long long)mapping_size,
2473 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2474 sr_offset += mapping_size;
2475 sr_size -= mapping_size;
2476 }
2477 assert(sr_size == 0);
2478
2479 done:
2480 if (kr == KERN_SUCCESS) {
2481 /* let the task use that shared region */
2482 vm_shared_region_set(task, shared_region);
2483 } else {
2484 /* drop our reference since we're not using it */
2485 vm_shared_region_deallocate(shared_region);
2486 vm_shared_region_set(task, NULL);
2487 }
2488
2489 SHARED_REGION_TRACE_DEBUG(
2490 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2491 (void *)VM_KERNEL_ADDRPERM(map),
2492 (void *)VM_KERNEL_ADDRPERM(task),
2493 (void *)VM_KERNEL_ADDRPERM(fsroot),
2494 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2495 kr));
2496 return kr;
2497 }
2498
2499 void
vm_shared_region_remove(task_t task,vm_shared_region_t sr)2500 vm_shared_region_remove(
2501 task_t task,
2502 vm_shared_region_t sr)
2503 {
2504 vm_map_t map;
2505 mach_vm_offset_t start;
2506 mach_vm_size_t size;
2507 vm_map_kernel_flags_t vmk_flags;
2508 kern_return_t kr;
2509
2510 if (sr == NULL) {
2511 return;
2512 }
2513 map = get_task_map(task);
2514 start = sr->sr_base_address;
2515 size = sr->sr_size;
2516
2517 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2518 vmk_flags.vmkf_overwrite_immutable = true;
2519 vmk_flags.vm_tag = VM_MEMORY_DYLD;
2520
2521 /* range_id is set by mach_vm_map_kernel */
2522 kr = mach_vm_map_kernel(map,
2523 &start,
2524 size,
2525 0, /* mask */
2526 vmk_flags,
2527 MACH_PORT_NULL,
2528 0,
2529 FALSE, /* copy */
2530 VM_PROT_NONE,
2531 VM_PROT_NONE,
2532 VM_INHERIT_DEFAULT);
2533 if (kr != KERN_SUCCESS) {
2534 printf("%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2535 }
2536 }
2537
2538 #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2539
2540 kern_return_t
vm_shared_region_sliding_valid(uint32_t slide)2541 vm_shared_region_sliding_valid(uint32_t slide)
2542 {
2543 kern_return_t kr = KERN_SUCCESS;
2544 vm_shared_region_t sr = vm_shared_region_get(current_task());
2545
2546 /* No region yet? we're fine. */
2547 if (sr == NULL) {
2548 return kr;
2549 }
2550
2551 if (sr->sr_slide != 0 && slide != 0) {
2552 if (slide == sr->sr_slide) {
2553 /*
2554 * Request for sliding when we've
2555 * already done it with exactly the
2556 * same slide value before.
2557 * This isn't wrong technically but
2558 * we don't want to slide again and
2559 * so we return this value.
2560 */
2561 kr = KERN_INVALID_ARGUMENT;
2562 } else {
2563 printf("Mismatched shared region slide\n");
2564 kr = KERN_FAILURE;
2565 }
2566 }
2567 vm_shared_region_deallocate(sr);
2568 return kr;
2569 }
2570
2571 /*
2572 * Actually create (really overwrite) the mapping to part of the shared cache which
2573 * undergoes relocation. This routine reads in the relocation info from dyld and
2574 * verifies it. It then creates a (or finds a matching) shared region pager which
2575 * handles the actual modification of the page contents and installs the mapping
2576 * using that pager.
2577 */
2578 kern_return_t
vm_shared_region_slide_mapping(vm_shared_region_t sr,user_addr_t slide_info_addr,mach_vm_size_t slide_info_size,mach_vm_offset_t start,mach_vm_size_t size,mach_vm_offset_t slid_mapping,uint32_t slide,memory_object_control_t sr_file_control,vm_prot_t prot)2579 vm_shared_region_slide_mapping(
2580 vm_shared_region_t sr,
2581 user_addr_t slide_info_addr,
2582 mach_vm_size_t slide_info_size,
2583 mach_vm_offset_t start,
2584 mach_vm_size_t size,
2585 mach_vm_offset_t slid_mapping,
2586 uint32_t slide,
2587 memory_object_control_t sr_file_control,
2588 vm_prot_t prot)
2589 {
2590 kern_return_t kr;
2591 vm_object_t object = VM_OBJECT_NULL;
2592 vm_shared_region_slide_info_t si = NULL;
2593 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2594 struct vm_map_entry tmp_entry_store;
2595 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2596 vm_map_t sr_map;
2597 vm_map_kernel_flags_t vmk_flags;
2598 vm_map_offset_t map_addr;
2599 void *slide_info_entry = NULL;
2600 int error;
2601
2602 assert(sr->sr_slide_in_progress);
2603
2604 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2605 return KERN_INVALID_ARGUMENT;
2606 }
2607
2608 /*
2609 * Copy in and verify the relocation information.
2610 */
2611 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2612 printf("Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2613 return KERN_FAILURE;
2614 }
2615 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2616 printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2617 return KERN_FAILURE;
2618 }
2619
2620 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2621 if (slide_info_entry == NULL) {
2622 return KERN_RESOURCE_SHORTAGE;
2623 }
2624 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2625 if (error) {
2626 printf("copyin of slide_info failed\n");
2627 kr = KERN_INVALID_ADDRESS;
2628 goto done;
2629 }
2630
2631 if ((kr = vm_shared_region_slide_sanity_check(slide_info_entry, slide_info_size)) != KERN_SUCCESS) {
2632 printf("Sanity Check failed for slide_info\n");
2633 goto done;
2634 }
2635
2636 /*
2637 * Allocate and fill in a vm_shared_region_slide_info.
2638 * This will either be used by a new pager, or used to find
2639 * a pre-existing matching pager.
2640 */
2641 object = memory_object_control_to_vm_object(sr_file_control);
2642 if (object == VM_OBJECT_NULL || object->internal) {
2643 object = VM_OBJECT_NULL;
2644 kr = KERN_INVALID_ADDRESS;
2645 goto done;
2646 }
2647
2648 si = kalloc_type(struct vm_shared_region_slide_info,
2649 Z_WAITOK | Z_NOFAIL);
2650 vm_object_lock(object);
2651
2652 vm_object_reference_locked(object); /* for si->slide_object */
2653 object->object_is_shared_cache = TRUE;
2654 vm_object_unlock(object);
2655
2656 si->si_slide_info_entry = slide_info_entry;
2657 si->si_slide_info_size = slide_info_size;
2658
2659 assert(slid_mapping != (mach_vm_offset_t) -1);
2660 si->si_slid_address = slid_mapping + sr->sr_base_address;
2661 si->si_slide_object = object;
2662 si->si_start = start;
2663 si->si_end = si->si_start + size;
2664 si->si_slide = slide;
2665 #if __has_feature(ptrauth_calls)
2666 /*
2667 * If there is authenticated pointer data in this slid mapping,
2668 * then just add the information needed to create new pagers for
2669 * different shared_region_id's later.
2670 */
2671 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2672 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2673 !(prot & VM_PROT_NOAUTH)) {
2674 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2675 printf("Too many auth/private sections for shared region!!\n");
2676 kr = KERN_INVALID_ARGUMENT;
2677 goto done;
2678 }
2679 si->si_ptrauth = TRUE;
2680 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2681 /*
2682 * Remember the shared region, since that's where we'll
2683 * stash this info for all auth pagers to share. Each pager
2684 * will need to take a reference to it.
2685 */
2686 si->si_shared_region = sr;
2687 kr = KERN_SUCCESS;
2688 goto done;
2689 }
2690 si->si_shared_region = NULL;
2691 si->si_ptrauth = FALSE;
2692 #endif /* __has_feature(ptrauth_calls) */
2693
2694 /*
2695 * find the pre-existing shared region's map entry to slide
2696 */
2697 sr_map = vm_shared_region_vm_map(sr);
2698 kr = find_mapping_to_slide(sr_map, (vm_map_address_t)slid_mapping, &tmp_entry_store);
2699 if (kr != KERN_SUCCESS) {
2700 goto done;
2701 }
2702 tmp_entry = &tmp_entry_store;
2703
2704 /*
2705 * The object must exactly cover the region to slide.
2706 */
2707 assert(VME_OFFSET(tmp_entry) == start);
2708 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2709
2710 /* create a "shared_region" sliding pager */
2711 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), VME_OFFSET(tmp_entry), si, 0);
2712 if (sr_pager == MEMORY_OBJECT_NULL) {
2713 kr = KERN_RESOURCE_SHORTAGE;
2714 goto done;
2715 }
2716
2717 #if CONFIG_SECLUDED_MEMORY
2718 /*
2719 * The shared region pagers used by camera or DEXT should have
2720 * pagers that won't go on the secluded queue.
2721 */
2722 if (primary_system_shared_region == NULL ||
2723 primary_system_shared_region == sr ||
2724 sr->sr_driverkit) {
2725 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2726 }
2727 #endif /* CONFIG_SECLUDED_MEMORY */
2728
2729 /* map that pager over the portion of the mapping that needs sliding */
2730 map_addr = tmp_entry->vme_start;
2731 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2732 vmk_flags.vmkf_overwrite_immutable = true;
2733 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2734 tmp_entry->max_protection);
2735 vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2736 prot);
2737 kr = vm_map_enter_mem_object(sr_map,
2738 &map_addr,
2739 (tmp_entry->vme_end - tmp_entry->vme_start),
2740 (mach_vm_offset_t) 0,
2741 vmk_flags,
2742 (ipc_port_t)(uintptr_t) sr_pager,
2743 0,
2744 TRUE,
2745 tmp_entry->protection,
2746 tmp_entry->max_protection,
2747 tmp_entry->inheritance);
2748 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2749 assertf(map_addr == tmp_entry->vme_start,
2750 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2751 (uint64_t)map_addr,
2752 (uint64_t) tmp_entry->vme_start,
2753 tmp_entry);
2754
2755 /* success! */
2756 kr = KERN_SUCCESS;
2757
2758 done:
2759 if (sr_pager != NULL) {
2760 /*
2761 * Release the sr_pager reference obtained by shared_region_pager_setup().
2762 * The mapping, if it succeeded, is now holding a reference on the memory object.
2763 */
2764 memory_object_deallocate(sr_pager);
2765 sr_pager = MEMORY_OBJECT_NULL;
2766 }
2767 if (tmp_entry != NULL) {
2768 /* release extra ref on tmp_entry's VM object */
2769 vm_object_deallocate(VME_OBJECT(tmp_entry));
2770 tmp_entry = VM_MAP_ENTRY_NULL;
2771 }
2772
2773 if (kr != KERN_SUCCESS) {
2774 /* cleanup */
2775 if (si != NULL) {
2776 if (si->si_slide_object) {
2777 vm_object_deallocate(si->si_slide_object);
2778 si->si_slide_object = VM_OBJECT_NULL;
2779 }
2780 kfree_type(struct vm_shared_region_slide_info, si);
2781 si = NULL;
2782 }
2783 if (slide_info_entry != NULL) {
2784 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2785 slide_info_entry = NULL;
2786 }
2787 }
2788 return kr;
2789 }
2790
2791 static kern_return_t
vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info)2792 vm_shared_region_slide_sanity_check_v1(
2793 vm_shared_region_slide_info_entry_v1_t s_info)
2794 {
2795 uint32_t pageIndex = 0;
2796 uint16_t entryIndex = 0;
2797 uint16_t *toc = NULL;
2798
2799 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2800 for (; pageIndex < s_info->toc_count; pageIndex++) {
2801 entryIndex = (uint16_t)(toc[pageIndex]);
2802
2803 if (entryIndex >= s_info->entry_count) {
2804 printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2805 return KERN_FAILURE;
2806 }
2807 }
2808 return KERN_SUCCESS;
2809 }
2810
2811 static kern_return_t
vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info,mach_vm_size_t slide_info_size)2812 vm_shared_region_slide_sanity_check_v2(
2813 vm_shared_region_slide_info_entry_v2_t s_info,
2814 mach_vm_size_t slide_info_size)
2815 {
2816 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2817 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2818 return KERN_FAILURE;
2819 }
2820 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2821 return KERN_FAILURE;
2822 }
2823
2824 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2825
2826 uint32_t page_starts_count = s_info->page_starts_count;
2827 uint32_t page_extras_count = s_info->page_extras_count;
2828 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2829 if (num_trailing_entries < page_starts_count) {
2830 return KERN_FAILURE;
2831 }
2832
2833 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2834 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2835 if (trailing_size >> 1 != num_trailing_entries) {
2836 return KERN_FAILURE;
2837 }
2838
2839 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2840 if (required_size < sizeof(*s_info)) {
2841 return KERN_FAILURE;
2842 }
2843
2844 if (required_size > slide_info_size) {
2845 return KERN_FAILURE;
2846 }
2847
2848 return KERN_SUCCESS;
2849 }
2850
2851 static kern_return_t
vm_shared_region_slide_sanity_check_v3(vm_shared_region_slide_info_entry_v3_t s_info,mach_vm_size_t slide_info_size)2852 vm_shared_region_slide_sanity_check_v3(
2853 vm_shared_region_slide_info_entry_v3_t s_info,
2854 mach_vm_size_t slide_info_size)
2855 {
2856 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2857 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2858 return KERN_FAILURE;
2859 }
2860 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2861 printf("vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2862 return KERN_FAILURE;
2863 }
2864
2865 uint32_t page_starts_count = s_info->page_starts_count;
2866 mach_vm_size_t num_trailing_entries = page_starts_count;
2867 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2868 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2869 if (required_size < sizeof(*s_info)) {
2870 printf("vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2871 return KERN_FAILURE;
2872 }
2873
2874 if (required_size > slide_info_size) {
2875 printf("vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2876 return KERN_FAILURE;
2877 }
2878
2879 return KERN_SUCCESS;
2880 }
2881
2882 static kern_return_t
vm_shared_region_slide_sanity_check_v4(vm_shared_region_slide_info_entry_v4_t s_info,mach_vm_size_t slide_info_size)2883 vm_shared_region_slide_sanity_check_v4(
2884 vm_shared_region_slide_info_entry_v4_t s_info,
2885 mach_vm_size_t slide_info_size)
2886 {
2887 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2888 printf("%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2889 return KERN_FAILURE;
2890 }
2891 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2892 return KERN_FAILURE;
2893 }
2894
2895 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2896
2897 uint32_t page_starts_count = s_info->page_starts_count;
2898 uint32_t page_extras_count = s_info->page_extras_count;
2899 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2900 if (num_trailing_entries < page_starts_count) {
2901 return KERN_FAILURE;
2902 }
2903
2904 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2905 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2906 if (trailing_size >> 1 != num_trailing_entries) {
2907 return KERN_FAILURE;
2908 }
2909
2910 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2911 if (required_size < sizeof(*s_info)) {
2912 return KERN_FAILURE;
2913 }
2914
2915 if (required_size > slide_info_size) {
2916 return KERN_FAILURE;
2917 }
2918
2919 return KERN_SUCCESS;
2920 }
2921
2922
2923 static kern_return_t
vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t s_info,mach_vm_size_t s_info_size)2924 vm_shared_region_slide_sanity_check(
2925 vm_shared_region_slide_info_entry_t s_info,
2926 mach_vm_size_t s_info_size)
2927 {
2928 kern_return_t kr;
2929
2930 switch (s_info->version) {
2931 case 1:
2932 kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1);
2933 break;
2934 case 2:
2935 kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, s_info_size);
2936 break;
2937 case 3:
2938 kr = vm_shared_region_slide_sanity_check_v3(&s_info->v3, s_info_size);
2939 break;
2940 case 4:
2941 kr = vm_shared_region_slide_sanity_check_v4(&s_info->v4, s_info_size);
2942 break;
2943 default:
2944 kr = KERN_FAILURE;
2945 }
2946 return kr;
2947 }
2948
2949 static kern_return_t
vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)2950 vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2951 {
2952 uint16_t *toc = NULL;
2953 slide_info_entry_toc_t bitmap = NULL;
2954 uint32_t i = 0, j = 0;
2955 uint8_t b = 0;
2956 uint32_t slide = si->si_slide;
2957 int is_64 = task_has_64Bit_addr(current_task());
2958
2959 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2960 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2961
2962 if (pageIndex >= s_info->toc_count) {
2963 printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2964 } else {
2965 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
2966 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
2967
2968 if (entryIndex >= s_info->entry_count) {
2969 printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
2970 } else {
2971 bitmap = &slide_info_entries[entryIndex];
2972
2973 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
2974 b = bitmap->entry[i];
2975 if (b != 0) {
2976 for (j = 0; j < 8; ++j) {
2977 if (b & (1 << j)) {
2978 uint32_t *ptr_to_slide;
2979 uint32_t old_value;
2980
2981 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
2982 old_value = *ptr_to_slide;
2983 *ptr_to_slide += slide;
2984 if (is_64 && *ptr_to_slide < old_value) {
2985 /*
2986 * We just slid the low 32 bits of a 64-bit pointer
2987 * and it looks like there should have been a carry-over
2988 * to the upper 32 bits.
2989 * The sliding failed...
2990 */
2991 printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
2992 i, j, b, slide, old_value, *ptr_to_slide);
2993 return KERN_FAILURE;
2994 }
2995 }
2996 }
2997 }
2998 }
2999 }
3000 }
3001
3002 return KERN_SUCCESS;
3003 }
3004
3005 static kern_return_t
rebase_chain_32(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3006 rebase_chain_32(
3007 uint8_t *page_content,
3008 uint16_t start_offset,
3009 uint32_t slide_amount,
3010 vm_shared_region_slide_info_entry_v2_t s_info)
3011 {
3012 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3013
3014 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3015 const uint32_t value_mask = ~delta_mask;
3016 const uint32_t value_add = (uint32_t)(s_info->value_add);
3017 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3018
3019 uint32_t page_offset = start_offset;
3020 uint32_t delta = 1;
3021
3022 while (delta != 0 && page_offset <= last_page_offset) {
3023 uint8_t *loc;
3024 uint32_t value;
3025
3026 loc = page_content + page_offset;
3027 memcpy(&value, loc, sizeof(value));
3028 delta = (value & delta_mask) >> delta_shift;
3029 value &= value_mask;
3030
3031 if (value != 0) {
3032 value += value_add;
3033 value += slide_amount;
3034 }
3035 memcpy(loc, &value, sizeof(value));
3036 page_offset += delta;
3037 }
3038
3039 /* If the offset went past the end of the page, then the slide data is invalid. */
3040 if (page_offset > last_page_offset) {
3041 return KERN_FAILURE;
3042 }
3043 return KERN_SUCCESS;
3044 }
3045
3046 static kern_return_t
rebase_chain_64(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3047 rebase_chain_64(
3048 uint8_t *page_content,
3049 uint16_t start_offset,
3050 uint32_t slide_amount,
3051 vm_shared_region_slide_info_entry_v2_t s_info)
3052 {
3053 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3054
3055 const uint64_t delta_mask = s_info->delta_mask;
3056 const uint64_t value_mask = ~delta_mask;
3057 const uint64_t value_add = s_info->value_add;
3058 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3059
3060 uint32_t page_offset = start_offset;
3061 uint32_t delta = 1;
3062
3063 while (delta != 0 && page_offset <= last_page_offset) {
3064 uint8_t *loc;
3065 uint64_t value;
3066
3067 loc = page_content + page_offset;
3068 memcpy(&value, loc, sizeof(value));
3069 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3070 value &= value_mask;
3071
3072 if (value != 0) {
3073 value += value_add;
3074 value += slide_amount;
3075 }
3076 memcpy(loc, &value, sizeof(value));
3077 page_offset += delta;
3078 }
3079
3080 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3081 /* If a pointer straddling the page boundary needs to be adjusted, then
3082 * add the slide to the lower half. The encoding guarantees that the upper
3083 * half on the next page will need no masking.
3084 *
3085 * This assumes a little-endian machine and that the region being slid
3086 * never crosses a 4 GB boundary. */
3087
3088 uint8_t *loc = page_content + page_offset;
3089 uint32_t value;
3090
3091 memcpy(&value, loc, sizeof(value));
3092 value += slide_amount;
3093 memcpy(loc, &value, sizeof(value));
3094 } else if (page_offset > last_page_offset) {
3095 return KERN_FAILURE;
3096 }
3097
3098 return KERN_SUCCESS;
3099 }
3100
3101 static kern_return_t
rebase_chain(boolean_t is_64,uint32_t pageIndex,uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v2_t s_info)3102 rebase_chain(
3103 boolean_t is_64,
3104 uint32_t pageIndex,
3105 uint8_t *page_content,
3106 uint16_t start_offset,
3107 uint32_t slide_amount,
3108 vm_shared_region_slide_info_entry_v2_t s_info)
3109 {
3110 kern_return_t kr;
3111 if (is_64) {
3112 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3113 } else {
3114 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3115 }
3116
3117 if (kr != KERN_SUCCESS) {
3118 printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3119 pageIndex, start_offset, slide_amount);
3120 }
3121 return kr;
3122 }
3123
3124 static kern_return_t
vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3125 vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3126 {
3127 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3128 const uint32_t slide_amount = si->si_slide;
3129
3130 /* The high bits of the delta_mask field are nonzero precisely when the shared
3131 * cache is 64-bit. */
3132 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3133
3134 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3135 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3136
3137 uint8_t *page_content = (uint8_t *)vaddr;
3138 uint16_t page_entry;
3139
3140 if (pageIndex >= s_info->page_starts_count) {
3141 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3142 pageIndex, s_info->page_starts_count);
3143 return KERN_FAILURE;
3144 }
3145 page_entry = page_starts[pageIndex];
3146
3147 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3148 return KERN_SUCCESS;
3149 }
3150
3151 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3152 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3153 uint16_t info;
3154
3155 do {
3156 uint16_t page_start_offset;
3157 kern_return_t kr;
3158
3159 if (chain_index >= s_info->page_extras_count) {
3160 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3161 chain_index, s_info->page_extras_count);
3162 return KERN_FAILURE;
3163 }
3164 info = page_extras[chain_index];
3165 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3166
3167 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3168 if (kr != KERN_SUCCESS) {
3169 return KERN_FAILURE;
3170 }
3171
3172 chain_index++;
3173 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3174 } else {
3175 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3176 kern_return_t kr;
3177
3178 kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info);
3179 if (kr != KERN_SUCCESS) {
3180 return KERN_FAILURE;
3181 }
3182 }
3183
3184 return KERN_SUCCESS;
3185 }
3186
3187
3188 static kern_return_t
vm_shared_region_slide_page_v3(vm_shared_region_slide_info_t si,vm_offset_t vaddr,__unused mach_vm_offset_t uservaddr,uint32_t pageIndex,__unused uint64_t jop_key)3189 vm_shared_region_slide_page_v3(
3190 vm_shared_region_slide_info_t si,
3191 vm_offset_t vaddr,
3192 __unused mach_vm_offset_t uservaddr,
3193 uint32_t pageIndex,
3194 #if !__has_feature(ptrauth_calls)
3195 __unused
3196 #endif /* !__has_feature(ptrauth_calls) */
3197 uint64_t jop_key)
3198 {
3199 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3200 const uint32_t slide_amount = si->si_slide;
3201
3202 uint8_t *page_content = (uint8_t *)vaddr;
3203 uint16_t page_entry;
3204
3205 if (pageIndex >= s_info->page_starts_count) {
3206 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3207 pageIndex, s_info->page_starts_count);
3208 return KERN_FAILURE;
3209 }
3210 page_entry = s_info->page_starts[pageIndex];
3211
3212 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3213 return KERN_SUCCESS;
3214 }
3215
3216 uint8_t* rebaseLocation = page_content;
3217 uint64_t delta = page_entry;
3218 do {
3219 rebaseLocation += delta;
3220 uint64_t value;
3221 memcpy(&value, rebaseLocation, sizeof(value));
3222 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3223
3224 // A pointer is one of :
3225 // {
3226 // uint64_t pointerValue : 51;
3227 // uint64_t offsetToNextPointer : 11;
3228 // uint64_t isBind : 1 = 0;
3229 // uint64_t authenticated : 1 = 0;
3230 // }
3231 // {
3232 // uint32_t offsetFromSharedCacheBase;
3233 // uint16_t diversityData;
3234 // uint16_t hasAddressDiversity : 1;
3235 // uint16_t hasDKey : 1;
3236 // uint16_t hasBKey : 1;
3237 // uint16_t offsetToNextPointer : 11;
3238 // uint16_t isBind : 1;
3239 // uint16_t authenticated : 1 = 1;
3240 // }
3241
3242 bool isBind = (value & (1ULL << 62)) != 0;
3243 if (isBind) {
3244 return KERN_FAILURE;
3245 }
3246
3247 #if __has_feature(ptrauth_calls)
3248 uint16_t diversity_data = (uint16_t)(value >> 32);
3249 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3250 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3251 #endif /* __has_feature(ptrauth_calls) */
3252 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3253
3254 if (isAuthenticated) {
3255 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3256 value = (value & 0xFFFFFFFF) + slide_amount;
3257 // Add in the offset from the mach_header
3258 const uint64_t value_add = s_info->value_add;
3259 value += value_add;
3260
3261 #if __has_feature(ptrauth_calls)
3262 uint64_t discriminator = diversity_data;
3263 if (hasAddressDiversity) {
3264 // First calculate a new discriminator using the address of where we are trying to store the value
3265 uintptr_t pageOffset = rebaseLocation - page_content;
3266 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3267 }
3268
3269 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3270 /*
3271 * these pointers are used in user mode. disable the kernel key diversification
3272 * so we can sign them for use in user mode.
3273 */
3274 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3275 }
3276 #endif /* __has_feature(ptrauth_calls) */
3277 } else {
3278 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3279 // Regular pointer which needs to fit in 51-bits of value.
3280 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3281 // and the bottom 43-bits to be fit in to 51-bits.
3282 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3283 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3284 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3285 value = targetValue + slide_amount;
3286 }
3287
3288 memcpy(rebaseLocation, &value, sizeof(value));
3289 } while (delta != 0);
3290
3291 return KERN_SUCCESS;
3292 }
3293
3294 static kern_return_t
rebase_chainv4(uint8_t * page_content,uint16_t start_offset,uint32_t slide_amount,vm_shared_region_slide_info_entry_v4_t s_info)3295 rebase_chainv4(
3296 uint8_t *page_content,
3297 uint16_t start_offset,
3298 uint32_t slide_amount,
3299 vm_shared_region_slide_info_entry_v4_t s_info)
3300 {
3301 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3302
3303 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3304 const uint32_t value_mask = ~delta_mask;
3305 const uint32_t value_add = (uint32_t)(s_info->value_add);
3306 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3307
3308 uint32_t page_offset = start_offset;
3309 uint32_t delta = 1;
3310
3311 while (delta != 0 && page_offset <= last_page_offset) {
3312 uint8_t *loc;
3313 uint32_t value;
3314
3315 loc = page_content + page_offset;
3316 memcpy(&value, loc, sizeof(value));
3317 delta = (value & delta_mask) >> delta_shift;
3318 value &= value_mask;
3319
3320 if ((value & 0xFFFF8000) == 0) {
3321 // small positive non-pointer, use as-is
3322 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3323 // small negative non-pointer
3324 value |= 0xC0000000;
3325 } else {
3326 // pointer that needs rebasing
3327 value += value_add;
3328 value += slide_amount;
3329 }
3330 memcpy(loc, &value, sizeof(value));
3331 page_offset += delta;
3332 }
3333
3334 /* If the offset went past the end of the page, then the slide data is invalid. */
3335 if (page_offset > last_page_offset) {
3336 return KERN_FAILURE;
3337 }
3338 return KERN_SUCCESS;
3339 }
3340
3341 static kern_return_t
vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si,vm_offset_t vaddr,uint32_t pageIndex)3342 vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3343 {
3344 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3345 const uint32_t slide_amount = si->si_slide;
3346
3347 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3348 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3349
3350 uint8_t *page_content = (uint8_t *)vaddr;
3351 uint16_t page_entry;
3352
3353 if (pageIndex >= s_info->page_starts_count) {
3354 printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3355 pageIndex, s_info->page_starts_count);
3356 return KERN_FAILURE;
3357 }
3358 page_entry = page_starts[pageIndex];
3359
3360 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3361 return KERN_SUCCESS;
3362 }
3363
3364 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3365 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3366 uint16_t info;
3367
3368 do {
3369 uint16_t page_start_offset;
3370 kern_return_t kr;
3371
3372 if (chain_index >= s_info->page_extras_count) {
3373 printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3374 chain_index, s_info->page_extras_count);
3375 return KERN_FAILURE;
3376 }
3377 info = page_extras[chain_index];
3378 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3379
3380 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3381 if (kr != KERN_SUCCESS) {
3382 return KERN_FAILURE;
3383 }
3384
3385 chain_index++;
3386 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3387 } else {
3388 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3389 kern_return_t kr;
3390
3391 kr = rebase_chainv4(page_content, page_start_offset, slide_amount, s_info);
3392 if (kr != KERN_SUCCESS) {
3393 return KERN_FAILURE;
3394 }
3395 }
3396
3397 return KERN_SUCCESS;
3398 }
3399
3400
3401
3402 kern_return_t
vm_shared_region_slide_page(vm_shared_region_slide_info_t si,vm_offset_t vaddr,mach_vm_offset_t uservaddr,uint32_t pageIndex,uint64_t jop_key)3403 vm_shared_region_slide_page(
3404 vm_shared_region_slide_info_t si,
3405 vm_offset_t vaddr,
3406 mach_vm_offset_t uservaddr,
3407 uint32_t pageIndex,
3408 uint64_t jop_key)
3409 {
3410 switch (si->si_slide_info_entry->version) {
3411 case 1:
3412 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3413 case 2:
3414 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3415 case 3:
3416 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3417 case 4:
3418 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3419 default:
3420 return KERN_FAILURE;
3421 }
3422 }
3423
3424 /******************************************************************************/
3425 /* Comm page support */
3426 /******************************************************************************/
3427
3428 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3429 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3430 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3431 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3432 SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3433 SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3434
3435 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3436 SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3437 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3438 SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3439 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3440 SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3441
3442 SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3443 SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3444
3445 #if defined(__i386__) || defined(__x86_64__)
3446 /*
3447 * Create a memory entry, VM submap and pmap for one commpage.
3448 */
3449 static void
_vm_commpage_init(ipc_port_t * handlep,vm_map_size_t size)3450 _vm_commpage_init(
3451 ipc_port_t *handlep,
3452 vm_map_size_t size)
3453 {
3454 vm_named_entry_t mem_entry;
3455 vm_map_t new_map;
3456
3457 SHARED_REGION_TRACE_DEBUG(
3458 ("commpage: -> _init(0x%llx)\n",
3459 (long long)size));
3460
3461 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3462 if (new_pmap == NULL) {
3463 panic("_vm_commpage_init: could not allocate pmap");
3464 }
3465 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3466
3467 mem_entry = mach_memory_entry_allocate(handlep);
3468 mem_entry->backing.map = new_map;
3469 mem_entry->internal = TRUE;
3470 mem_entry->is_sub_map = TRUE;
3471 mem_entry->offset = 0;
3472 mem_entry->protection = VM_PROT_ALL;
3473 mem_entry->size = size;
3474
3475 SHARED_REGION_TRACE_DEBUG(
3476 ("commpage: _init(0x%llx) <- %p\n",
3477 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3478 }
3479 #endif
3480
3481
3482 /*
3483 * Initialize the comm text pages at boot time
3484 */
3485 void
vm_commpage_text_init(void)3486 vm_commpage_text_init(void)
3487 {
3488 SHARED_REGION_TRACE_DEBUG(
3489 ("commpage text: ->init()\n"));
3490 #if defined(__i386__) || defined(__x86_64__)
3491 /* create the 32 bit comm text page */
3492 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3493 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3494 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3495 commpage_text32_map = commpage_text32_entry->backing.map;
3496 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3497 /* XXX if (cpu_is_64bit_capable()) ? */
3498 /* create the 64-bit comm page */
3499 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3500 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3501 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3502 commpage_text64_map = commpage_text64_entry->backing.map;
3503 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3504 #endif
3505
3506 commpage_text_populate();
3507
3508 /* populate the routines in here */
3509 SHARED_REGION_TRACE_DEBUG(
3510 ("commpage text: init() <-\n"));
3511 }
3512
3513 /*
3514 * Initialize the comm pages at boot time.
3515 */
3516 void
vm_commpage_init(void)3517 vm_commpage_init(void)
3518 {
3519 SHARED_REGION_TRACE_DEBUG(
3520 ("commpage: -> init()\n"));
3521
3522 #if defined(__i386__) || defined(__x86_64__)
3523 /* create the 32-bit comm page */
3524 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3525 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3526 commpage32_map = commpage32_entry->backing.map;
3527
3528 /* XXX if (cpu_is_64bit_capable()) ? */
3529 /* create the 64-bit comm page */
3530 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3531 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3532 commpage64_map = commpage64_entry->backing.map;
3533
3534 #endif /* __i386__ || __x86_64__ */
3535
3536 /* populate them according to this specific platform */
3537 commpage_populate();
3538 __commpage_setup = 1;
3539 #if XNU_TARGET_OS_OSX
3540 if (__system_power_source == 0) {
3541 post_sys_powersource_internal(0, 1);
3542 }
3543 #endif /* XNU_TARGET_OS_OSX */
3544
3545 SHARED_REGION_TRACE_DEBUG(
3546 ("commpage: init() <-\n"));
3547 }
3548
3549 /*
3550 * Enter the appropriate comm page into the task's address space.
3551 * This is called at exec() time via vm_map_exec().
3552 */
3553 kern_return_t
vm_commpage_enter(vm_map_t map,task_t task,boolean_t is64bit)3554 vm_commpage_enter(
3555 vm_map_t map,
3556 task_t task,
3557 boolean_t is64bit)
3558 {
3559 #if defined(__arm64__)
3560 #pragma unused(is64bit)
3561 (void)task;
3562 (void)map;
3563 pmap_insert_commpage(vm_map_pmap(map));
3564 return KERN_SUCCESS;
3565 #else
3566 ipc_port_t commpage_handle, commpage_text_handle;
3567 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3568 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3569 vm_map_kernel_flags_t vmk_flags;
3570 kern_return_t kr;
3571
3572 SHARED_REGION_TRACE_DEBUG(
3573 ("commpage: -> enter(%p,%p)\n",
3574 (void *)VM_KERNEL_ADDRPERM(map),
3575 (void *)VM_KERNEL_ADDRPERM(task)));
3576
3577 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3578 /* the comm page is likely to be beyond the actual end of the VM map */
3579 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3580 vmk_flags.vmkf_beyond_max = TRUE;
3581
3582 /* select the appropriate comm page for this task */
3583 assert(!(is64bit ^ vm_map_is_64bit(map)));
3584 if (is64bit) {
3585 commpage_handle = commpage64_handle;
3586 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3587 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3588 objc_size = _COMM_PAGE64_OBJC_SIZE;
3589 objc_address = _COMM_PAGE64_OBJC_BASE;
3590 commpage_text_handle = commpage_text64_handle;
3591 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3592 } else {
3593 commpage_handle = commpage32_handle;
3594 commpage_address =
3595 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3596 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3597 objc_size = _COMM_PAGE32_OBJC_SIZE;
3598 objc_address = _COMM_PAGE32_OBJC_BASE;
3599 commpage_text_handle = commpage_text32_handle;
3600 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3601 }
3602
3603 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3604 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3605 /* the commpage is properly aligned or sized for pmap-nesting */
3606 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3607 vmk_flags.vmkf_nested_pmap = TRUE;
3608 }
3609
3610 /* map the comm page in the task's address space */
3611 assert(commpage_handle != IPC_PORT_NULL);
3612 kr = vm_map_enter_mem_object(
3613 map,
3614 &commpage_address,
3615 commpage_size,
3616 0,
3617 vmk_flags,
3618 commpage_handle,
3619 0,
3620 FALSE,
3621 VM_PROT_READ,
3622 VM_PROT_READ,
3623 VM_INHERIT_SHARE);
3624 if (kr != KERN_SUCCESS) {
3625 SHARED_REGION_TRACE_ERROR(
3626 ("commpage: enter(%p,0x%llx,0x%llx) "
3627 "commpage %p mapping failed 0x%x\n",
3628 (void *)VM_KERNEL_ADDRPERM(map),
3629 (long long)commpage_address,
3630 (long long)commpage_size,
3631 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3632 }
3633
3634 /* map the comm text page in the task's address space */
3635 assert(commpage_text_handle != IPC_PORT_NULL);
3636 kr = vm_map_enter_mem_object(
3637 map,
3638 &commpage_text_address,
3639 commpage_text_size,
3640 0,
3641 vmk_flags,
3642 commpage_text_handle,
3643 0,
3644 FALSE,
3645 VM_PROT_READ | VM_PROT_EXECUTE,
3646 VM_PROT_READ | VM_PROT_EXECUTE,
3647 VM_INHERIT_SHARE);
3648 if (kr != KERN_SUCCESS) {
3649 SHARED_REGION_TRACE_ERROR(
3650 ("commpage text: enter(%p,0x%llx,0x%llx) "
3651 "commpage text %p mapping failed 0x%x\n",
3652 (void *)VM_KERNEL_ADDRPERM(map),
3653 (long long)commpage_text_address,
3654 (long long)commpage_text_size,
3655 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3656 }
3657
3658 /*
3659 * Since we're here, we also pre-allocate some virtual space for the
3660 * Objective-C run-time, if needed...
3661 */
3662 if (objc_size != 0) {
3663 kr = vm_map_enter_mem_object(
3664 map,
3665 &objc_address,
3666 objc_size,
3667 0,
3668 vmk_flags,
3669 IPC_PORT_NULL,
3670 0,
3671 FALSE,
3672 VM_PROT_ALL,
3673 VM_PROT_ALL,
3674 VM_INHERIT_DEFAULT);
3675 if (kr != KERN_SUCCESS) {
3676 SHARED_REGION_TRACE_ERROR(
3677 ("commpage: enter(%p,0x%llx,0x%llx) "
3678 "objc mapping failed 0x%x\n",
3679 (void *)VM_KERNEL_ADDRPERM(map),
3680 (long long)objc_address,
3681 (long long)objc_size, kr));
3682 }
3683 }
3684
3685 SHARED_REGION_TRACE_DEBUG(
3686 ("commpage: enter(%p,%p) <- 0x%x\n",
3687 (void *)VM_KERNEL_ADDRPERM(map),
3688 (void *)VM_KERNEL_ADDRPERM(task), kr));
3689 return kr;
3690 #endif
3691 }
3692
3693 int
vm_shared_region_slide(uint32_t slide,mach_vm_offset_t entry_start_address,mach_vm_size_t entry_size,mach_vm_offset_t slide_start,mach_vm_size_t slide_size,mach_vm_offset_t slid_mapping,memory_object_control_t sr_file_control,vm_prot_t prot)3694 vm_shared_region_slide(
3695 uint32_t slide,
3696 mach_vm_offset_t entry_start_address,
3697 mach_vm_size_t entry_size,
3698 mach_vm_offset_t slide_start,
3699 mach_vm_size_t slide_size,
3700 mach_vm_offset_t slid_mapping,
3701 memory_object_control_t sr_file_control,
3702 vm_prot_t prot)
3703 {
3704 vm_shared_region_t sr;
3705 kern_return_t error;
3706
3707 SHARED_REGION_TRACE_DEBUG(
3708 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3709 slide, entry_start_address, entry_size, slide_start, slide_size));
3710
3711 sr = vm_shared_region_get(current_task());
3712 if (sr == NULL) {
3713 printf("%s: no shared region?\n", __FUNCTION__);
3714 SHARED_REGION_TRACE_DEBUG(
3715 ("vm_shared_region_slide: <- %d (no shared region)\n",
3716 KERN_FAILURE));
3717 return KERN_FAILURE;
3718 }
3719
3720 /*
3721 * Protect from concurrent access.
3722 */
3723 vm_shared_region_lock();
3724 while (sr->sr_slide_in_progress) {
3725 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3726 }
3727
3728 sr->sr_slide_in_progress = current_thread();
3729 vm_shared_region_unlock();
3730
3731 error = vm_shared_region_slide_mapping(sr,
3732 (user_addr_t)slide_start,
3733 slide_size,
3734 entry_start_address,
3735 entry_size,
3736 slid_mapping,
3737 slide,
3738 sr_file_control,
3739 prot);
3740 if (error) {
3741 printf("slide_info initialization failed with kr=%d\n", error);
3742 }
3743
3744 vm_shared_region_lock();
3745
3746 assert(sr->sr_slide_in_progress == current_thread());
3747 sr->sr_slide_in_progress = THREAD_NULL;
3748 vm_shared_region_wakeup(&sr->sr_slide_in_progress);
3749
3750 #if XNU_TARGET_OS_OSX
3751 if (error == KERN_SUCCESS) {
3752 shared_region_completed_slide = TRUE;
3753 }
3754 #endif /* XNU_TARGET_OS_OSX */
3755 vm_shared_region_unlock();
3756
3757 vm_shared_region_deallocate(sr);
3758
3759 SHARED_REGION_TRACE_DEBUG(
3760 ("vm_shared_region_slide: <- %d\n",
3761 error));
3762
3763 return error;
3764 }
3765
3766 /*
3767 * Used during Authenticated Root Volume macOS boot.
3768 * Launchd re-execs itself and wants the new launchd to use
3769 * the shared cache from the new root volume. This call
3770 * makes all the existing shared caches stale to allow
3771 * that to happen.
3772 */
3773 void
vm_shared_region_pivot(void)3774 vm_shared_region_pivot(void)
3775 {
3776 vm_shared_region_t shared_region = NULL;
3777
3778 vm_shared_region_lock();
3779
3780 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3781 assert(shared_region->sr_ref_count > 0);
3782 shared_region->sr_stale = TRUE;
3783 if (shared_region->sr_timer_call) {
3784 /*
3785 * We have a shared region ready to be destroyed
3786 * and just waiting for a delayed timer to fire.
3787 * Marking it stale cements its ineligibility to
3788 * be used ever again. So let's shorten the timer
3789 * aggressively down to 10 milliseconds and get rid of it.
3790 * This is a single quantum and we don't need to go
3791 * shorter than this duration. We want it to be short
3792 * enough, however, because we could have an unmount
3793 * of the volume hosting this shared region just behind
3794 * us.
3795 */
3796 uint64_t deadline;
3797 assert(shared_region->sr_ref_count == 1);
3798
3799 /*
3800 * Free the old timer call. Returns with a reference held.
3801 * If the old timer has fired and is waiting for the vm_shared_region_lock
3802 * lock, we will just return with an additional ref_count i.e. 2.
3803 * The old timer will then fire and just drop the ref count down to 1
3804 * with no other modifications.
3805 */
3806 vm_shared_region_reference_locked(shared_region);
3807
3808 /* set up the timer. Keep the reference from above for this timer.*/
3809 shared_region->sr_timer_call = thread_call_allocate(
3810 (thread_call_func_t) vm_shared_region_timeout,
3811 (thread_call_param_t) shared_region);
3812
3813 /* schedule the timer */
3814 clock_interval_to_deadline(10, /* 10 milliseconds */
3815 NSEC_PER_MSEC,
3816 &deadline);
3817 thread_call_enter_delayed(shared_region->sr_timer_call,
3818 deadline);
3819
3820 SHARED_REGION_TRACE_DEBUG(
3821 ("shared_region: pivot(%p): armed timer\n",
3822 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3823 }
3824 }
3825
3826 vm_shared_region_unlock();
3827 }
3828
3829 /*
3830 * Routine to mark any non-standard slide shared cache region as stale.
3831 * This causes the next "reslide" spawn to create a new shared region.
3832 */
3833 void
vm_shared_region_reslide_stale(boolean_t driverkit)3834 vm_shared_region_reslide_stale(boolean_t driverkit)
3835 {
3836 #if __has_feature(ptrauth_calls)
3837 vm_shared_region_t shared_region = NULL;
3838
3839 vm_shared_region_lock();
3840
3841 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3842 assert(shared_region->sr_ref_count > 0);
3843 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3844 shared_region->sr_stale = TRUE;
3845 vm_shared_region_reslide_count++;
3846 }
3847 }
3848
3849 vm_shared_region_unlock();
3850 #else
3851 (void)driverkit;
3852 #endif /* __has_feature(ptrauth_calls) */
3853 }
3854
3855 /*
3856 * report if the task is using a reslide shared cache region.
3857 */
3858 bool
vm_shared_region_is_reslide(__unused struct task * task)3859 vm_shared_region_is_reslide(__unused struct task *task)
3860 {
3861 bool is_reslide = FALSE;
3862 #if __has_feature(ptrauth_calls)
3863 vm_shared_region_t sr = vm_shared_region_get(task);
3864
3865 if (sr != NULL) {
3866 is_reslide = sr->sr_reslide;
3867 vm_shared_region_deallocate(sr);
3868 }
3869 #endif /* __has_feature(ptrauth_calls) */
3870 return is_reslide;
3871 }
3872
3873 /*
3874 * This is called from powermanagement code to let kernel know the current source of power.
3875 * 0 if it is external source (connected to power )
3876 * 1 if it is internal power source ie battery
3877 */
3878 void
3879 #if XNU_TARGET_OS_OSX
post_sys_powersource(int i)3880 post_sys_powersource(int i)
3881 #else /* XNU_TARGET_OS_OSX */
3882 post_sys_powersource(__unused int i)
3883 #endif /* XNU_TARGET_OS_OSX */
3884 {
3885 #if XNU_TARGET_OS_OSX
3886 post_sys_powersource_internal(i, 0);
3887 #endif /* XNU_TARGET_OS_OSX */
3888 }
3889
3890
3891 #if XNU_TARGET_OS_OSX
3892 static void
post_sys_powersource_internal(int i,int internal)3893 post_sys_powersource_internal(int i, int internal)
3894 {
3895 if (internal == 0) {
3896 __system_power_source = i;
3897 }
3898 }
3899 #endif /* XNU_TARGET_OS_OSX */
3900
3901 void *
vm_shared_region_root_dir(struct vm_shared_region * sr)3902 vm_shared_region_root_dir(
3903 struct vm_shared_region *sr)
3904 {
3905 void *vnode;
3906
3907 vm_shared_region_lock();
3908 vnode = sr->sr_root_dir;
3909 vm_shared_region_unlock();
3910 return vnode;
3911 }
3912