1 /*
2 * Copyright (c) 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/memory_entry.h>
30 #include <mach/memory_entry_server.h>
31 #include <mach/vm_map_server.h>
32 #include <mach/mach_vm_server.h>
33 #include <vm/vm_purgeable_internal.h>
34 #include <mach/mach_host_server.h>
35 #include <IOKit/IOBSD.h>
36 #include <vm/vm_memory_entry_xnu.h>
37 #include <vm/vm_map_internal.h>
38 #include <vm/memory_object_internal.h>
39 #include <vm/vm_protos_internal.h>
40 #include <vm/vm_object_internal.h>
41 #include <vm/vm_iokit.h>
42
43 static void mach_memory_entry_no_senders(ipc_port_t, mach_port_mscount_t);
44
45 IPC_KOBJECT_DEFINE(IKOT_NAMED_ENTRY,
46 .iko_op_movable_send = true,
47 .iko_op_stable = true,
48 .iko_op_no_senders = mach_memory_entry_no_senders);
49
50 /*
51 * mach_make_memory_entry_64
52 *
53 * Think of it as a two-stage vm_remap() operation. First
54 * you get a handle. Second, you get map that handle in
55 * somewhere else. Rather than doing it all at once (and
56 * without needing access to the other whole map).
57 */
58 kern_return_t
mach_make_memory_entry_64(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_handle)59 mach_make_memory_entry_64(
60 vm_map_t target_map,
61 memory_object_size_ut *size_u,
62 memory_object_offset_ut offset_u,
63 vm_prot_ut permission_u,
64 ipc_port_t *object_handle,
65 ipc_port_t parent_handle)
66 {
67 return mach_make_memory_entry_internal(target_map,
68 size_u,
69 offset_u,
70 permission_u,
71 VM_NAMED_ENTRY_KERNEL_FLAGS_NONE,
72 object_handle,
73 parent_handle);
74 }
75
76 static inline void
vm_memory_entry_decode_perm(vm_prot_t permission,unsigned int * access,vm_prot_t * protections,bool * mask_protections,bool * use_data_addr,bool * use_4K_compat)77 vm_memory_entry_decode_perm(
78 vm_prot_t permission,
79 unsigned int *access,
80 vm_prot_t *protections,
81 bool *mask_protections,
82 bool *use_data_addr,
83 bool *use_4K_compat)
84 {
85 *protections = permission & VM_PROT_ALL;
86 *mask_protections = permission & VM_PROT_IS_MASK;
87 *access = GET_MAP_MEM(permission);
88 *use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0);
89 *use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0);
90 }
91
92 static inline vm_map_offset_t
vm_memory_entry_get_offset_in_page(vm_map_offset_t offset,vm_map_offset_t map_start,bool use_data_addr,bool use_4K_compat)93 vm_memory_entry_get_offset_in_page(
94 vm_map_offset_t offset,
95 vm_map_offset_t map_start,
96 bool use_data_addr,
97 bool use_4K_compat)
98 {
99 vm_map_offset_t offset_in_page;
100
101 if (use_data_addr || use_4K_compat) {
102 offset_in_page = offset - map_start;
103 if (use_4K_compat) {
104 offset_in_page &= ~((signed)(0xFFF));
105 }
106 } else {
107 offset_in_page = 0;
108 }
109
110 return offset_in_page;
111 }
112
113 static inline kern_return_t
mach_make_memory_entry_cleanup(kern_return_t kr,vm_map_t target_map __unused,memory_object_size_ut * size_u,vm_map_offset_ut offset_u __unused,vm_prot_t permission __unused,vm_named_entry_t user_entry __unused,ipc_port_t * object_handle)114 mach_make_memory_entry_cleanup(
115 kern_return_t kr,
116 vm_map_t target_map __unused,
117 memory_object_size_ut *size_u,
118 vm_map_offset_ut offset_u __unused,
119 vm_prot_t permission __unused,
120 vm_named_entry_t user_entry __unused,
121 ipc_port_t *object_handle)
122 {
123 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
124 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
125 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry,
126 vm_sanitize_get_kr(kr));
127 /*
128 * Set safe size and object_handle value on failed return
129 */
130 *size_u = vm_sanitize_wrap_size(0);
131 *object_handle = IPC_PORT_NULL;
132 return vm_sanitize_get_kr(kr);
133 }
134
135 static __attribute__((always_inline, warn_unused_result))
136 kern_return_t
mach_make_memory_entry_mem_only_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size)137 mach_make_memory_entry_mem_only_sanitize(
138 vm_map_t target_map,
139 memory_object_size_ut size_u,
140 vm_map_offset_ut offset_u,
141 vm_map_offset_t *map_start,
142 vm_map_offset_t *map_end,
143 vm_map_size_t *map_size)
144 {
145 /*
146 * This code path doesn't use offset and size. They don't need to be
147 * validated. However inorder to maintain backward compatibility some
148 * checks on offset and size have been left.
149 */
150 return vm_sanitize_addr_size(offset_u, size_u,
151 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
152 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
153 map_start, map_end, map_size);
154 }
155
156 static kern_return_t
mach_make_memory_entry_mem_only(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)157 mach_make_memory_entry_mem_only(
158 vm_map_t target_map,
159 memory_object_size_ut *size_u,
160 memory_object_offset_ut offset_u,
161 vm_prot_t permission,
162 ipc_port_t *object_handle,
163 vm_named_entry_t parent_entry)
164 {
165 boolean_t parent_is_object;
166 vm_object_t object;
167 unsigned int access;
168 vm_prot_t protections;
169 bool mask_protections;
170 unsigned int wimg_mode;
171 bool use_data_addr;
172 bool use_4K_compat;
173 vm_named_entry_t user_entry __unused = NULL;
174 kern_return_t kr;
175 vm_map_size_t map_size;
176 vm_map_offset_t map_start, map_end;
177
178 /*
179 * Sanitize addr and size. Permimssions have been sanitized prior to
180 * dispatch
181 */
182 kr = mach_make_memory_entry_mem_only_sanitize(target_map,
183 *size_u,
184 offset_u,
185 &map_start,
186 &map_end,
187 &map_size);
188 if (__improbable(kr != KERN_SUCCESS)) {
189 return mach_make_memory_entry_cleanup(kr, target_map,
190 size_u, offset_u, permission, user_entry, object_handle);
191 }
192
193 vm_memory_entry_decode_perm(permission, &access, &protections,
194 &mask_protections, &use_data_addr, &use_4K_compat);
195
196 if (use_data_addr || use_4K_compat || parent_entry == NULL) {
197 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
198 size_u, offset_u, permission, user_entry, object_handle);
199 }
200
201 parent_is_object = parent_entry->is_object;
202 if (!parent_is_object) {
203 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
204 size_u, offset_u, permission, user_entry, object_handle);
205 }
206
207 if ((access != parent_entry->access) &&
208 !(parent_entry->protection & VM_PROT_WRITE)) {
209 return mach_make_memory_entry_cleanup(KERN_INVALID_RIGHT, target_map,
210 size_u, offset_u, permission, user_entry, object_handle);
211 }
212
213 object = vm_named_entry_to_vm_object(parent_entry);
214 if (parent_is_object && object != VM_OBJECT_NULL) {
215 wimg_mode = object->wimg_bits;
216 } else {
217 wimg_mode = VM_WIMG_USE_DEFAULT;
218 }
219 vm_prot_to_wimg(access, &wimg_mode);
220 if (parent_is_object && object &&
221 (access != MAP_MEM_NOOP) &&
222 (!(object->nophyscache))) {
223 if (object->wimg_bits != wimg_mode) {
224 vm_object_lock(object);
225 #if HAS_MTE
226 if (vm_object_is_mte_mappable(object)) {
227 vm_object_unlock(object);
228 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT,
229 target_map, size_u, offset_u, permission, user_entry,
230 object_handle);
231 }
232 #endif /* HAS_MTE */
233 vm_object_change_wimg_mode(object, wimg_mode);
234 vm_object_unlock(object);
235 }
236 }
237 if (access != MAP_MEM_NOOP) {
238 parent_entry->access = access;
239 }
240 if (object_handle) {
241 *object_handle = IP_NULL;
242 }
243 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
244 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
245 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry, KERN_SUCCESS);
246 /*
247 * TODO: Size isn't being set in this path
248 */
249 return KERN_SUCCESS;
250 }
251
252 static __attribute__((always_inline, warn_unused_result))
253 kern_return_t
mach_make_memory_entry_generic_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset)254 mach_make_memory_entry_generic_sanitize(
255 vm_map_t target_map,
256 memory_object_size_ut size_u,
257 vm_map_offset_ut offset_u,
258 vm_map_offset_t *map_start,
259 vm_map_offset_t *map_end,
260 vm_map_size_t *map_size,
261 vm_map_offset_t *offset)
262 {
263 kern_return_t kr;
264
265 /*
266 * Validate start and end
267 */
268 kr = vm_sanitize_addr_size(offset_u, size_u,
269 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
270 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
271 map_start, map_end, map_size);
272 if (__improbable(kr != KERN_SUCCESS)) {
273 return kr;
274 }
275 /*
276 * Validate offset
277 */
278 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
279 *map_start, *map_end, offset);
280 if (__improbable(kr != KERN_SUCCESS)) {
281 return kr;
282 }
283
284 return KERN_SUCCESS;
285 }
286
287 static kern_return_t
mach_make_memory_entry_named_create(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle)288 mach_make_memory_entry_named_create(
289 vm_map_t target_map,
290 memory_object_size_ut *size_u,
291 vm_map_offset_ut offset_u,
292 vm_prot_t permission,
293 vm_named_entry_kernel_flags_t vmne_kflags,
294 ipc_port_t *object_handle)
295 {
296 vm_object_t object;
297 unsigned int access;
298 vm_prot_t protections;
299 bool mask_protections;
300 unsigned int wimg_mode;
301 bool use_data_addr;
302 bool use_4K_compat;
303 int ledger_flags = 0;
304 task_t owner;
305 bool fully_owned = false;
306 vm_named_entry_t user_entry = NULL;
307 kern_return_t kr;
308 vm_map_size_t map_size;
309 vm_map_offset_t map_start, map_end, offset;
310
311 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
312 return mach_make_memory_entry_cleanup(KERN_SUCCESS, target_map,
313 size_u, offset_u, permission, user_entry, object_handle);
314 }
315
316 /*
317 * Sanitize addr and size. Permimssions have been sanitized prior to
318 * dispatch
319 */
320 kr = mach_make_memory_entry_generic_sanitize(target_map,
321 *size_u,
322 offset_u,
323 &map_start,
324 &map_end,
325 &map_size,
326 &offset);
327 if (__improbable(kr != KERN_SUCCESS)) {
328 return mach_make_memory_entry_cleanup(kr, target_map,
329 size_u, offset_u, permission, user_entry, object_handle);
330 }
331
332 assert(map_size != 0);
333
334 vm_memory_entry_decode_perm(permission, &access, &protections,
335 &mask_protections, &use_data_addr, &use_4K_compat);
336
337 if (use_data_addr || use_4K_compat) {
338 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
339 size_u, offset_u, permission, user_entry, object_handle);
340 }
341
342 /*
343 * Force the creation of the VM object now.
344 */
345 #if __LP64__
346 if (map_size > ANON_MAX_SIZE) {
347 return mach_make_memory_entry_cleanup(KERN_FAILURE, target_map,
348 size_u, offset_u, permission, user_entry, object_handle);
349 }
350 #endif /* __LP64__ */
351
352 object = vm_object_allocate(map_size, vm_map_maybe_serial_id(target_map));
353 assert(object != VM_OBJECT_NULL);
354 vm_object_lock(object);
355
356 /*
357 * XXX
358 * We use this path when we want to make sure that
359 * nobody messes with the object (coalesce, for
360 * example) before we map it.
361 * We might want to use these objects for transposition via
362 * vm_object_transpose() too, so we don't want any copy or
363 * shadow objects either...
364 */
365 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
366 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
367
368 owner = current_task();
369 if ((permission & MAP_MEM_PURGABLE) ||
370 vmne_kflags.vmnekf_ledger_tag) {
371 assert(object->vo_owner == NULL);
372 assert(object->resident_page_count == 0);
373 assert(object->wired_page_count == 0);
374 assert(owner != TASK_NULL);
375 if (vmne_kflags.vmnekf_ledger_no_footprint) {
376 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
377 object->vo_no_footprint = TRUE;
378 }
379 if (permission & MAP_MEM_PURGABLE) {
380 if (!(permission & VM_PROT_WRITE)) {
381 /* if we can't write, we can't purge */
382 vm_object_unlock(object);
383 vm_object_deallocate(object);
384 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT,
385 target_map, size_u, offset_u, permission, user_entry,
386 object_handle);
387 }
388 VM_OBJECT_SET_PURGABLE(object, VM_PURGABLE_NONVOLATILE);
389 if (permission & MAP_MEM_PURGABLE_KERNEL_ONLY) {
390 VM_OBJECT_SET_PURGEABLE_ONLY_BY_KERNEL(object, TRUE);
391 }
392 #if __arm64__
393 if (owner->task_legacy_footprint) {
394 /*
395 * For ios11, we failed to account for
396 * this memory. Keep doing that for
397 * legacy apps (built before ios12),
398 * for backwards compatibility's sake...
399 */
400 owner = kernel_task;
401 }
402 #endif /* __arm64__ */
403 vm_purgeable_nonvolatile_enqueue(object, owner);
404 /* all memory in this named entry is "owned" */
405 fully_owned = true;
406 }
407 }
408
409 if (vmne_kflags.vmnekf_ledger_tag) {
410 /*
411 * Bill this object to the current task's
412 * ledgers for the given tag.
413 */
414 if (vmne_kflags.vmnekf_ledger_no_footprint) {
415 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
416 }
417 kr = vm_object_ownership_change(
418 object,
419 vmne_kflags.vmnekf_ledger_tag,
420 owner, /* new owner */
421 ledger_flags,
422 FALSE); /* task_objq locked? */
423 if (kr != KERN_SUCCESS) {
424 vm_object_unlock(object);
425 vm_object_deallocate(object);
426 return mach_make_memory_entry_cleanup(kr, target_map,
427 size_u, offset_u, permission, user_entry, object_handle);
428 }
429 /* all memory in this named entry is "owned" */
430 fully_owned = true;
431 }
432
433 #if CONFIG_SECLUDED_MEMORY
434 if (secluded_for_iokit && /* global boot-arg */
435 ((permission & MAP_MEM_GRAB_SECLUDED))) {
436 object->can_grab_secluded = TRUE;
437 assert(!object->eligible_for_secluded);
438 }
439 #endif /* CONFIG_SECLUDED_MEMORY */
440
441 /*
442 * The VM object is brand new and nobody else knows about it,
443 * so we don't need to lock it.
444 */
445
446 wimg_mode = object->wimg_bits;
447 vm_prot_to_wimg(access, &wimg_mode);
448 if (access != MAP_MEM_NOOP) {
449 object->wimg_bits = wimg_mode;
450 }
451
452 vm_object_unlock(object);
453
454 /* the object has no pages, so no WIMG bits to update here */
455
456 user_entry = mach_memory_entry_allocate(object_handle);
457 vm_named_entry_associate_vm_object(
458 user_entry,
459 object,
460 0,
461 map_size,
462 (protections & VM_PROT_ALL));
463 user_entry->internal = TRUE;
464 user_entry->is_sub_map = FALSE;
465 user_entry->offset = 0;
466 user_entry->data_offset = 0;
467 user_entry->protection = protections;
468 user_entry->access = access;
469 user_entry->size = map_size;
470 user_entry->is_fully_owned = fully_owned;
471
472 /* user_object pager and internal fields are not used */
473 /* when the object field is filled in. */
474
475 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
476 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
477 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
478 permission, user_entry, KERN_SUCCESS);
479 return KERN_SUCCESS;
480 }
481
482 static kern_return_t
mach_make_memory_entry_copy(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,__unused vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle)483 mach_make_memory_entry_copy(
484 vm_map_t target_map,
485 memory_object_size_ut *size_u,
486 vm_map_offset_ut offset_u,
487 vm_prot_t permission,
488 __unused vm_named_entry_kernel_flags_t vmne_kflags,
489 ipc_port_t *object_handle)
490 {
491 unsigned int access;
492 vm_prot_t protections;
493 bool mask_protections;
494 bool use_data_addr;
495 bool use_4K_compat;
496 vm_named_entry_t user_entry = NULL;
497 vm_map_copy_t copy;
498 /*
499 * Stash the offset in the page for use by vm_map_enter_mem_object()
500 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
501 */
502 vm_object_offset_t offset_in_page;
503 kern_return_t kr;
504 vm_map_size_t map_size;
505 vm_map_offset_t map_start, map_end, offset;
506
507 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
508 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
509 size_u, offset_u, permission, user_entry, object_handle);
510 }
511
512 /*
513 * Sanitize addr and size. Permimssions have been sanitized prior to
514 * dispatch
515 */
516 kr = mach_make_memory_entry_generic_sanitize(target_map,
517 *size_u,
518 offset_u,
519 &map_start,
520 &map_end,
521 &map_size,
522 &offset);
523 if (__improbable(kr != KERN_SUCCESS)) {
524 return mach_make_memory_entry_cleanup(kr, target_map,
525 size_u, offset_u, permission, user_entry, object_handle);
526 }
527
528 assert(map_size != 0);
529
530 vm_memory_entry_decode_perm(permission, &access, &protections,
531 &mask_protections, &use_data_addr, &use_4K_compat);
532
533 if (target_map == VM_MAP_NULL) {
534 return mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
535 size_u, offset_u, permission, user_entry, object_handle);
536 }
537
538 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
539 use_data_addr, use_4K_compat);
540
541 int copyin_flags = VM_MAP_COPYIN_ENTRY_LIST;
542 #if HAS_MTE
543 copyin_flags |= VM_MAP_COPYIN_DEST_UNKNOWN;
544 copyin_flags |= vmne_kflags.vmnekf_is_iokit ? VM_MAP_COPYIN_IOKIT : 0;
545 #endif
546 kr = vm_map_copyin_internal(target_map,
547 map_start,
548 map_size,
549 copyin_flags,
550 ©);
551 if (kr != KERN_SUCCESS) {
552 return mach_make_memory_entry_cleanup(kr, target_map,
553 size_u, offset_u, permission, user_entry, object_handle);
554 }
555 assert(copy != VM_MAP_COPY_NULL);
556
557 user_entry = mach_memory_entry_allocate(object_handle);
558 user_entry->backing.copy = copy;
559 user_entry->internal = FALSE;
560 user_entry->is_sub_map = FALSE;
561 user_entry->is_copy = TRUE;
562 user_entry->offset = 0;
563 user_entry->protection = protections;
564 user_entry->size = map_size;
565 user_entry->data_offset = offset_in_page;
566
567 /* is all memory in this named entry "owned"? */
568 vm_map_entry_t entry;
569 user_entry->is_fully_owned = TRUE;
570 for (entry = vm_map_copy_first_entry(copy);
571 entry != vm_map_copy_to_entry(copy);
572 entry = entry->vme_next) {
573 if (entry->is_sub_map ||
574 VME_OBJECT(entry) == VM_OBJECT_NULL ||
575 VM_OBJECT_OWNER(VME_OBJECT(entry)) == TASK_NULL) {
576 /* this memory is not "owned" */
577 user_entry->is_fully_owned = FALSE;
578 break;
579 }
580 }
581
582 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
583 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
584 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
585 permission, user_entry, KERN_SUCCESS);
586 return KERN_SUCCESS;
587 }
588
589 static kern_return_t
mach_make_memory_entry_share(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,__unused vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle,ipc_port_t parent_handle,vm_named_entry_t parent_entry)590 mach_make_memory_entry_share(
591 vm_map_t target_map,
592 memory_object_size_ut *size_u,
593 vm_map_offset_ut offset_u,
594 vm_prot_t permission,
595 __unused vm_named_entry_kernel_flags_t vmne_kflags,
596 ipc_port_t *object_handle,
597 ipc_port_t parent_handle,
598 vm_named_entry_t parent_entry)
599 {
600 vm_object_t object;
601 unsigned int access;
602 vm_prot_t protections;
603 bool mask_protections;
604 bool use_data_addr;
605 bool use_4K_compat;
606 vm_named_entry_t user_entry = NULL;
607 vm_map_copy_t copy;
608 vm_prot_t cur_prot, max_prot;
609 vm_map_kernel_flags_t vmk_flags;
610 vm_map_entry_t parent_copy_entry;
611 /*
612 * Stash the offset in the page for use by vm_map_enter_mem_object()
613 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
614 */
615 vm_object_offset_t offset_in_page;
616 unsigned int wimg_mode;
617 kern_return_t kr;
618 vm_map_size_t map_size;
619 vm_map_offset_t map_start, map_end, offset;
620
621 vmlp_api_start(MACH_MAKE_MEMORY_ENTRY_SHARE);
622
623 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
624 kr = mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
625 size_u, offset_u, permission, user_entry, object_handle);
626 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
627 return kr;
628 }
629
630 /*
631 * Sanitize addr and size. Permimssions have been sanitized prior to
632 * dispatch
633 */
634 kr = mach_make_memory_entry_generic_sanitize(target_map,
635 *size_u,
636 offset_u,
637 &map_start,
638 &map_end,
639 &map_size,
640 &offset);
641 if (__improbable(kr != KERN_SUCCESS)) {
642 kr = mach_make_memory_entry_cleanup(kr, target_map,
643 size_u, offset_u, permission, user_entry, object_handle);
644 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
645 return kr;
646 }
647
648 assert(map_size != 0);
649
650 vm_memory_entry_decode_perm(permission, &access, &protections,
651 &mask_protections, &use_data_addr, &use_4K_compat);
652
653 if (target_map == VM_MAP_NULL) {
654 kr = mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
655 size_u, offset_u, permission, user_entry, object_handle);
656 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
657 return kr;
658 }
659
660 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
661 vmk_flags.vmkf_range_id = kmem_needs_data_share_range() ?
662 KMEM_RANGE_ID_DATA_SHARED : KMEM_RANGE_ID_DATA;
663
664 parent_copy_entry = VM_MAP_ENTRY_NULL;
665 if (!(permission & MAP_MEM_VM_SHARE)) {
666 vm_map_t tmp_map, real_map;
667 vm_map_version_t version;
668 vm_object_t tmp_object;
669 vm_object_offset_t obj_off;
670 vm_prot_t prot;
671 boolean_t wired;
672 bool contended;
673
674 /* resolve any pending submap copy-on-write... */
675 if (protections & VM_PROT_WRITE) {
676 tmp_map = target_map;
677 vm_map_lock_read(tmp_map);
678 kr = vm_map_lookup_and_lock_object(&tmp_map,
679 map_start,
680 protections | (mask_protections ? VM_PROT_IS_MASK : 0),
681 OBJECT_LOCK_EXCLUSIVE,
682 &version,
683 &tmp_object,
684 &obj_off,
685 &prot,
686 &wired,
687 NULL, /* fault_info */
688 &real_map,
689 &contended);
690 if (kr != KERN_SUCCESS) {
691 vm_map_unlock_read(tmp_map);
692 } else {
693 vm_object_unlock(tmp_object);
694 vm_map_unlock_read(tmp_map);
695 if (real_map != tmp_map) {
696 vm_map_unlock_read(real_map);
697 }
698 }
699 }
700 /* ... and carry on */
701
702 /* stop extracting if VM object changes */
703 vmk_flags.vmkf_copy_single_object = TRUE;
704 if ((permission & MAP_MEM_NAMED_REUSE) &&
705 parent_entry != NULL &&
706 parent_entry->is_object) {
707 vm_map_copy_t parent_copy;
708 parent_copy = parent_entry->backing.copy;
709 /*
710 * Assert that the vm_map_copy is coming from the right
711 * zone and hasn't been forged
712 */
713 vm_map_copy_require(parent_copy);
714 assert(parent_copy->cpy_hdr.nentries == 1);
715 parent_copy_entry = vm_map_copy_first_entry(parent_copy);
716 assert(!parent_copy_entry->is_sub_map);
717 }
718 }
719
720 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
721 use_data_addr, use_4K_compat);
722
723 if (mask_protections) {
724 /*
725 * caller is asking for whichever proctections are
726 * available: no required protections.
727 */
728 cur_prot = VM_PROT_NONE;
729 max_prot = VM_PROT_NONE;
730 vmk_flags.vmkf_remap_legacy_mode = true;
731 } else {
732 /*
733 * Caller wants a memory entry with "protections".
734 * Make sure we extract only memory that matches that.
735 */
736 cur_prot = protections;
737 max_prot = protections;
738 }
739 if (target_map->pmap == kernel_pmap) {
740 /*
741 * Get "reserved" map entries to avoid deadlocking
742 * on the kernel map or a kernel submap if we
743 * run out of VM map entries and need to refill that
744 * zone.
745 */
746 vmk_flags.vmkf_copy_pageable = FALSE;
747 } else {
748 vmk_flags.vmkf_copy_pageable = TRUE;
749 }
750 vmk_flags.vmkf_copy_same_map = FALSE;
751 #if HAS_MTE
752 vmk_flags.vmkf_is_iokit = vmne_kflags.vmnekf_is_iokit;
753 vmk_flags.vmkf_copy_dest = VM_COPY_DESTINATION_UNKNOWN;
754 #endif /* HAS_MTE */
755 assert(map_size != 0);
756 kr = vm_map_copy_extract(target_map,
757 map_start,
758 map_size,
759 FALSE, /* copy */
760 ©,
761 &cur_prot,
762 &max_prot,
763 VM_INHERIT_SHARE,
764 vmk_flags);
765 if (kr != KERN_SUCCESS) {
766 kr = mach_make_memory_entry_cleanup(kr, target_map,
767 size_u, offset_u, permission, user_entry, object_handle);
768 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
769 return kr;
770 }
771 assert(copy != VM_MAP_COPY_NULL);
772
773 if (mask_protections) {
774 /*
775 * We just want as much of "original_protections"
776 * as we can get out of the actual "cur_prot".
777 */
778 protections &= cur_prot;
779 if (protections == VM_PROT_NONE) {
780 /* no access at all: fail */
781 vm_map_copy_discard(copy);
782 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
783 target_map, size_u, offset_u, permission, user_entry,
784 object_handle);
785 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
786 return kr;
787 }
788 } else {
789 /*
790 * We want exactly "original_protections"
791 * out of "cur_prot".
792 */
793 assert((cur_prot & protections) == protections);
794 assert((max_prot & protections) == protections);
795 /* XXX FBDP TODO: no longer needed? */
796 if ((cur_prot & protections) != protections) {
797 vm_map_copy_discard(copy);
798 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
799 target_map, size_u, offset_u, permission, user_entry,
800 object_handle);
801 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
802 return kr;
803 }
804 }
805
806 if (!(permission & MAP_MEM_VM_SHARE)) {
807 vm_map_entry_t copy_entry;
808
809 /* limit size to what's actually covered by "copy" */
810 assert(copy->cpy_hdr.nentries == 1);
811 copy_entry = vm_map_copy_first_entry(copy);
812 map_size = copy_entry->vme_end - copy_entry->vme_start;
813
814 if ((permission & MAP_MEM_NAMED_REUSE) &&
815 parent_copy_entry != VM_MAP_ENTRY_NULL &&
816 VME_OBJECT(copy_entry) == VME_OBJECT(parent_copy_entry) &&
817 VME_OFFSET(copy_entry) == VME_OFFSET(parent_copy_entry) &&
818 parent_entry->offset == 0 &&
819 parent_entry->size == map_size &&
820 (parent_entry->data_offset == offset_in_page)) {
821 /* we have a match: re-use "parent_entry" */
822
823 /* release our new "copy" */
824 vm_map_copy_discard(copy);
825 /* get extra send right on handle */
826 parent_handle = ipc_port_copy_send_any(parent_handle);
827
828 *size_u = vm_sanitize_wrap_size(parent_entry->size -
829 parent_entry->data_offset);
830 *object_handle = parent_handle;
831 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
832 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
833 permission, user_entry, KERN_SUCCESS);
834 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, KERN_SUCCESS);
835 return KERN_SUCCESS;
836 }
837
838 /* no match: we need to create a new entry */
839 object = VME_OBJECT(copy_entry);
840
841 if (object == VM_OBJECT_NULL) {
842 /* object can be null when protection == max_protection == VM_PROT_NONE
843 * return a failure because the code that follows and other APIs that consume
844 * a named-entry expect to have non-null object */
845 vm_map_copy_discard(copy);
846 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
847 target_map, size_u, offset_u, permission, user_entry,
848 object_handle);
849 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
850 return kr;
851 }
852
853 vm_object_lock(object);
854 wimg_mode = object->wimg_bits;
855 if (!(object->nophyscache)) {
856 vm_prot_to_wimg(access, &wimg_mode);
857 }
858 if (object->wimg_bits != wimg_mode) {
859 #if HAS_MTE
860 if (vm_object_is_mte_mappable(object)) {
861 vm_object_unlock(object);
862 vm_map_copy_discard(copy);
863 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT,
864 target_map, size_u, offset_u, permission, user_entry,
865 object_handle);
866 }
867 #endif /* HAS_MTE */
868 vm_object_change_wimg_mode(object, wimg_mode);
869 }
870 vm_object_unlock(object);
871 }
872
873 user_entry = mach_memory_entry_allocate(object_handle);
874 user_entry->backing.copy = copy;
875 user_entry->is_sub_map = FALSE;
876 user_entry->is_object = FALSE;
877 user_entry->internal = FALSE;
878 user_entry->protection = protections;
879 user_entry->size = map_size;
880 user_entry->data_offset = offset_in_page;
881
882 if (permission & MAP_MEM_VM_SHARE) {
883 vm_map_entry_t copy_entry;
884
885 user_entry->is_copy = TRUE;
886 user_entry->offset = 0;
887
888 /* is all memory in this named entry "owned"? */
889 user_entry->is_fully_owned = TRUE;
890 for (copy_entry = vm_map_copy_first_entry(copy);
891 copy_entry != vm_map_copy_to_entry(copy);
892 copy_entry = copy_entry->vme_next) {
893 if (copy_entry->is_sub_map) {
894 /* submaps can't be owned */
895 user_entry->is_fully_owned = FALSE;
896 break;
897 }
898 if (VM_OBJECT_OWNER(VME_OBJECT(copy_entry)) == TASK_NULL) {
899 object = VME_OBJECT(copy_entry);
900 if (object && !object->internal) {
901 /* external objects can be "owned",
902 * is_fully_owned remains TRUE as far as this entry is concerned */
903 continue;
904 }
905 /* this memory is not "owned" */
906 user_entry->is_fully_owned = FALSE;
907 break;
908 }
909 }
910 } else {
911 assert3p(object, !=, VM_OBJECT_NULL); /* Sanity, this was set above */
912 user_entry->is_object = TRUE;
913 assert3p(object, ==, vm_named_entry_to_vm_object(user_entry)); /* Sanity, this was set above */
914 user_entry->internal = object->internal;
915 user_entry->offset = VME_OFFSET(vm_map_copy_first_entry(copy));
916 user_entry->access = GET_MAP_MEM(permission);
917 /* is all memory in this named entry "owned"? */
918 user_entry->is_fully_owned = FALSE;
919 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
920 /* object is owned */
921 user_entry->is_fully_owned = TRUE;
922 } else if (!object->internal) {
923 /* external objects can become "owned" */
924 user_entry->is_fully_owned = TRUE;
925 }
926 }
927
928 *size_u = vm_sanitize_wrap_size(user_entry->size -
929 user_entry->data_offset);
930 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
931 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
932 permission, user_entry, KERN_SUCCESS);
933
934 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, KERN_SUCCESS);
935 return KERN_SUCCESS;
936 }
937
938 static __attribute__((always_inline, warn_unused_result))
939 kern_return_t
mach_make_memory_entry_from_parent_entry_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_t parent_entry,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset,vm_map_offset_t * user_entry_offset)940 mach_make_memory_entry_from_parent_entry_sanitize(
941 vm_map_t target_map,
942 memory_object_size_ut size_u,
943 vm_map_offset_ut offset_u,
944 vm_prot_t permission,
945 vm_named_entry_t parent_entry,
946 vm_map_offset_t *map_start,
947 vm_map_offset_t *map_end,
948 vm_map_size_t *map_size,
949 vm_map_offset_t *offset,
950 vm_map_offset_t *user_entry_offset)
951 {
952 bool mask_protections;
953 unsigned int access;
954 vm_prot_t protections;
955 bool use_data_addr;
956 bool use_4K_compat;
957 vm_map_offset_t start_mask = vm_map_page_mask(target_map);
958 kern_return_t kr;
959
960 vm_memory_entry_decode_perm(permission, &access, &protections,
961 &mask_protections, &use_data_addr, &use_4K_compat);
962
963 if (use_data_addr || use_4K_compat) {
964 /*
965 * Validate offset doesn't overflow when added to parent entry's offset
966 */
967 if (vm_sanitize_add_overflow(offset_u, parent_entry->data_offset,
968 &offset_u)) {
969 return KERN_INVALID_ARGUMENT;
970 }
971 start_mask = PAGE_MASK;
972 }
973
974 /*
975 * Currently the map_start is truncated using page mask from target_map
976 * when use_data_addr || use_4K_compat is false, while map_end uses
977 * PAGE_MASK. In order to maintain that behavior, we
978 * request for unaligned values and perform the truncing/rounding
979 * explicitly.
980 */
981 kr = vm_sanitize_addr_size(offset_u, size_u,
982 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY, PAGE_MASK,
983 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
984 map_start, map_end, map_size);
985 if (__improbable(kr != KERN_SUCCESS)) {
986 return kr;
987 }
988
989 *map_start = vm_map_trunc_page_mask(*map_start, start_mask);
990 *map_end = vm_map_round_page_mask(*map_end, PAGE_MASK);
991 *map_size = *map_end - *map_start;
992
993 /*
994 * Additional checks to make sure explicitly computed aligned start and end
995 * still make sense.
996 */
997 if (__improbable(*map_end <= *map_start) || (*map_end > parent_entry->size)) {
998 return KERN_INVALID_ARGUMENT;
999 }
1000
1001 /*
1002 * Validate offset
1003 */
1004 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
1005 *map_start, *map_end, offset);
1006 if (__improbable(kr != KERN_SUCCESS)) {
1007 return kr;
1008 }
1009
1010 if (__improbable(os_add_overflow(parent_entry->offset, *map_start,
1011 user_entry_offset))) {
1012 return KERN_INVALID_ARGUMENT;
1013 }
1014
1015 return KERN_SUCCESS;
1016 }
1017
1018 static kern_return_t
mach_make_memory_entry_from_parent_entry(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)1019 mach_make_memory_entry_from_parent_entry(
1020 vm_map_t target_map,
1021 memory_object_size_ut *size_u,
1022 vm_map_offset_ut offset_u,
1023 vm_prot_t permission,
1024 ipc_port_t *object_handle,
1025 vm_named_entry_t parent_entry)
1026 {
1027 vm_object_t object;
1028 unsigned int access;
1029 vm_prot_t protections;
1030 bool mask_protections;
1031 bool use_data_addr;
1032 bool use_4K_compat;
1033 vm_named_entry_t user_entry = NULL;
1034 kern_return_t kr;
1035 /*
1036 * Stash the offset in the page for use by vm_map_enter_mem_object()
1037 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
1038 */
1039 vm_object_offset_t offset_in_page;
1040 vm_map_offset_t map_start, map_end;
1041 vm_map_size_t map_size;
1042 vm_map_offset_t user_entry_offset, offset;
1043
1044 vm_memory_entry_decode_perm(permission, &access, &protections,
1045 &mask_protections, &use_data_addr, &use_4K_compat);
1046
1047 /*
1048 * Sanitize addr and size. Permimssions have been sanitized prior to
1049 * dispatch
1050 */
1051 kr = mach_make_memory_entry_from_parent_entry_sanitize(target_map,
1052 *size_u,
1053 offset_u,
1054 permission,
1055 parent_entry,
1056 &map_start,
1057 &map_end,
1058 &map_size,
1059 &offset,
1060 &user_entry_offset);
1061 if (__improbable(kr != KERN_SUCCESS)) {
1062 return mach_make_memory_entry_cleanup(kr, target_map,
1063 size_u, offset_u, permission, user_entry, object_handle);
1064 }
1065
1066 if (use_data_addr || use_4K_compat) {
1067 /*
1068 * submaps and pagers should only be accessible from within
1069 * the kernel, which shouldn't use the data address flag, so can fail here.
1070 */
1071 if (parent_entry->is_sub_map) {
1072 panic("Shouldn't be using data address with a parent entry that is a submap.");
1073 }
1074 }
1075
1076 if (mask_protections) {
1077 /*
1078 * The caller asked us to use the "protections" as
1079 * a mask, so restrict "protections" to what this
1080 * mapping actually allows.
1081 */
1082 protections &= parent_entry->protection;
1083 }
1084 if ((protections & parent_entry->protection) != protections) {
1085 return mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE, target_map,
1086 size_u, offset_u, permission, user_entry, object_handle);
1087 }
1088
1089 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
1090 use_data_addr, use_4K_compat);
1091
1092 user_entry = mach_memory_entry_allocate(object_handle);
1093 user_entry->size = map_size;
1094 user_entry->offset = user_entry_offset;
1095 user_entry->data_offset = offset_in_page;
1096 user_entry->is_sub_map = parent_entry->is_sub_map;
1097 user_entry->is_copy = parent_entry->is_copy;
1098 user_entry->protection = protections;
1099
1100 if (access != MAP_MEM_NOOP) {
1101 user_entry->access = access;
1102 }
1103
1104 if (parent_entry->is_sub_map) {
1105 vm_map_t map = parent_entry->backing.map;
1106 vm_map_reference(map);
1107 user_entry->backing.map = map;
1108 } else {
1109 object = vm_named_entry_to_vm_object(parent_entry);
1110 assert(object != VM_OBJECT_NULL);
1111 assert(object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC);
1112 vm_named_entry_associate_vm_object(
1113 user_entry,
1114 object,
1115 user_entry->offset,
1116 user_entry->size,
1117 (user_entry->protection & VM_PROT_ALL));
1118 assert(user_entry->is_object);
1119 /* we now point to this object, hold on */
1120 vm_object_lock(object);
1121 vm_object_reference_locked(object);
1122 #if VM_OBJECT_TRACKING_OP_TRUESHARE
1123 if (!object->true_share &&
1124 vm_object_tracking_btlog) {
1125 btlog_record(vm_object_tracking_btlog, object,
1126 VM_OBJECT_TRACKING_OP_TRUESHARE,
1127 btref_get(__builtin_frame_address(0), 0));
1128 }
1129 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
1130
1131 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
1132 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1133 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1134 }
1135 vm_object_unlock(object);
1136 }
1137 *size_u = vm_sanitize_wrap_size(user_entry->size -
1138 user_entry->data_offset);
1139 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
1140 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1141 permission, user_entry, KERN_SUCCESS);
1142 return KERN_SUCCESS;
1143 }
1144
1145 static inline kern_return_t
mach_make_memory_entry_sanitize_perm(vm_prot_ut permission_u,vm_prot_t * permission)1146 mach_make_memory_entry_sanitize_perm(
1147 vm_prot_ut permission_u,
1148 vm_prot_t *permission)
1149 {
1150 return vm_sanitize_memory_entry_perm(permission_u,
1151 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
1152 VM_SANITIZE_FLAGS_CHECK_USER_MEM_MAP_FLAGS,
1153 VM_PROT_IS_MASK, permission);
1154 }
1155
1156 kern_return_t
mach_make_memory_entry_internal(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle,ipc_port_t parent_handle)1157 mach_make_memory_entry_internal(
1158 vm_map_t target_map,
1159 memory_object_size_ut *size_u,
1160 memory_object_offset_ut offset_u,
1161 vm_prot_ut permission_u,
1162 vm_named_entry_kernel_flags_t vmne_kflags,
1163 ipc_port_t *object_handle,
1164 ipc_port_t parent_handle)
1165 {
1166 vm_named_entry_t user_entry __unused = NULL;
1167 vm_named_entry_t parent_entry;
1168 kern_return_t kr;
1169 vm_prot_t permission;
1170
1171 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x\n",
1172 target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u), VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1173 VM_SANITIZE_UNSAFE_UNWRAP(permission_u));
1174
1175 /*
1176 * Validate permissions as we need to dispatch the corresponding flavor
1177 */
1178 kr = mach_make_memory_entry_sanitize_perm(permission_u, &permission);
1179 if (__improbable(kr != KERN_SUCCESS)) {
1180 return mach_make_memory_entry_cleanup(kr, target_map,
1181 size_u, offset_u, permission, user_entry, object_handle);
1182 }
1183
1184 if (permission & MAP_MEM_LEDGER_TAGGED) {
1185 vmne_kflags.vmnekf_ledger_tag = VM_LEDGER_TAG_DEFAULT;
1186 }
1187
1188 parent_entry = mach_memory_entry_from_port(parent_handle);
1189 if (parent_entry && parent_entry->is_copy) {
1190 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
1191 size_u, offset_u, permission, user_entry, object_handle);
1192 }
1193
1194 if (permission & MAP_MEM_ONLY) {
1195 return mach_make_memory_entry_mem_only(target_map, size_u, offset_u,
1196 permission, object_handle, parent_entry);
1197 }
1198
1199 if (permission & MAP_MEM_NAMED_CREATE) {
1200 return mach_make_memory_entry_named_create(target_map, size_u, offset_u,
1201 permission, vmne_kflags, object_handle);
1202 }
1203
1204 if (permission & MAP_MEM_VM_COPY) {
1205 return mach_make_memory_entry_copy(target_map, size_u, offset_u,
1206 permission, vmne_kflags, object_handle);
1207 }
1208
1209 if ((permission & MAP_MEM_VM_SHARE)
1210 || parent_entry == NULL
1211 || (permission & MAP_MEM_NAMED_REUSE)) {
1212 return mach_make_memory_entry_share(target_map, size_u, offset_u,
1213 permission, vmne_kflags, object_handle, parent_handle,
1214 parent_entry);
1215 }
1216
1217 /*
1218 * This function will compute map start, end and size by including the
1219 * parent entry's offset. Therefore redo validation.
1220 */
1221 return mach_make_memory_entry_from_parent_entry(target_map, size_u,
1222 offset_u, permission, object_handle, parent_entry);
1223 }
1224
1225 kern_return_t
_mach_make_memory_entry(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1226 _mach_make_memory_entry(
1227 vm_map_t target_map,
1228 memory_object_size_ut *size_u,
1229 memory_object_offset_ut offset_u,
1230 vm_prot_ut permission_u,
1231 ipc_port_t *object_handle,
1232 ipc_port_t parent_entry)
1233 {
1234 return mach_make_memory_entry_64(target_map, size_u,
1235 offset_u, permission_u, object_handle, parent_entry);
1236 }
1237
1238 kern_return_t
mach_make_memory_entry(vm_map_t target_map,vm_size_ut * size_u,vm_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1239 mach_make_memory_entry(
1240 vm_map_t target_map,
1241 vm_size_ut *size_u,
1242 vm_offset_ut offset_u,
1243 vm_prot_ut permission_u,
1244 ipc_port_t *object_handle,
1245 ipc_port_t parent_entry)
1246 {
1247 kern_return_t kr;
1248
1249 kr = mach_make_memory_entry_64(target_map, size_u,
1250 offset_u, permission_u, object_handle, parent_entry);
1251 return kr;
1252 }
1253
1254 __private_extern__ vm_named_entry_t
mach_memory_entry_allocate(ipc_port_t * user_handle_p)1255 mach_memory_entry_allocate(ipc_port_t *user_handle_p)
1256 {
1257 vm_named_entry_t user_entry;
1258
1259 user_entry = kalloc_type(struct vm_named_entry,
1260 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1261 named_entry_lock_init(user_entry);
1262
1263 *user_handle_p = ipc_kobject_alloc_port(user_entry, IKOT_NAMED_ENTRY,
1264 IPC_KOBJECT_ALLOC_MAKE_SEND);
1265
1266 #if VM_NAMED_ENTRY_DEBUG
1267 /* backtrace at allocation time, for debugging only */
1268 user_entry->named_entry_bt = btref_get(__builtin_frame_address(0), 0);
1269 #endif /* VM_NAMED_ENTRY_DEBUG */
1270 return user_entry;
1271 }
1272
1273 static __attribute__((always_inline, warn_unused_result))
1274 kern_return_t
mach_memory_object_memory_entry_64_sanitize(vm_object_size_ut size_u,vm_prot_ut permission_u,vm_object_size_t * size,vm_prot_t * permission)1275 mach_memory_object_memory_entry_64_sanitize(
1276 vm_object_size_ut size_u,
1277 vm_prot_ut permission_u,
1278 vm_object_size_t *size,
1279 vm_prot_t *permission)
1280 {
1281 kern_return_t kr;
1282
1283 kr = vm_sanitize_object_size(size_u,
1284 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1285 VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS, size);
1286 if (__improbable(kr != KERN_SUCCESS)) {
1287 return kr;
1288 }
1289 kr = vm_sanitize_memory_entry_perm(permission_u,
1290 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1291 VM_SANITIZE_FLAGS_NONE, VM_PROT_NONE,
1292 permission);
1293 if (__improbable(kr != KERN_SUCCESS)) {
1294 return kr;
1295 }
1296
1297 return KERN_SUCCESS;
1298 }
1299
1300 /*
1301 * mach_memory_object_memory_entry_64
1302 *
1303 * Create a named entry backed by the provided pager.
1304 *
1305 */
1306 kern_return_t
mach_memory_object_memory_entry_64(host_t host,boolean_t internal,vm_object_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1307 mach_memory_object_memory_entry_64(
1308 host_t host,
1309 boolean_t internal,
1310 vm_object_size_ut size_u,
1311 vm_prot_ut permission_u,
1312 memory_object_t pager,
1313 ipc_port_t *entry_handle)
1314 {
1315 vm_named_entry_t user_entry;
1316 ipc_port_t user_handle;
1317 vm_object_t object;
1318 vm_object_size_t size;
1319 vm_prot_t permission;
1320 kern_return_t kr;
1321
1322 if (host == HOST_NULL) {
1323 return KERN_INVALID_HOST;
1324 }
1325
1326 /*
1327 * Validate size and permission
1328 */
1329 kr = mach_memory_object_memory_entry_64_sanitize(size_u,
1330 permission_u,
1331 &size,
1332 &permission);
1333 if (__improbable(kr != KERN_SUCCESS)) {
1334 return vm_sanitize_get_kr(kr);
1335 }
1336
1337 if (pager == MEMORY_OBJECT_NULL && internal) {
1338 object = vm_object_allocate(size, VM_MAP_SERIAL_NONE);
1339 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1340 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1341 }
1342 } else {
1343 object = memory_object_to_vm_object(pager);
1344 if (object != VM_OBJECT_NULL) {
1345 vm_object_reference(object);
1346 }
1347 }
1348 if (object == VM_OBJECT_NULL) {
1349 return KERN_INVALID_ARGUMENT;
1350 }
1351
1352 user_entry = mach_memory_entry_allocate(&user_handle);
1353 user_entry->size = size;
1354 user_entry->offset = 0;
1355 user_entry->protection = permission & VM_PROT_ALL;
1356 user_entry->access = GET_MAP_MEM(permission);
1357 user_entry->is_sub_map = FALSE;
1358
1359 vm_named_entry_associate_vm_object(user_entry, object, 0, size,
1360 (user_entry->protection & VM_PROT_ALL));
1361 user_entry->internal = object->internal;
1362 assert(object->internal == internal);
1363 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
1364 /* all memory in this entry is "owned" */
1365 user_entry->is_fully_owned = TRUE;
1366 } else if (object && !object->internal) {
1367 /* external objects can become "owned" */
1368 user_entry->is_fully_owned = TRUE;
1369 }
1370
1371 *entry_handle = user_handle;
1372 return KERN_SUCCESS;
1373 }
1374
1375 kern_return_t
mach_memory_object_memory_entry(host_t host,boolean_t internal,vm_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1376 mach_memory_object_memory_entry(
1377 host_t host,
1378 boolean_t internal,
1379 vm_size_ut size_u,
1380 vm_prot_ut permission_u,
1381 memory_object_t pager,
1382 ipc_port_t *entry_handle)
1383 {
1384 return mach_memory_object_memory_entry_64( host, internal,
1385 size_u, permission_u, pager, entry_handle);
1386 }
1387
1388 kern_return_t
mach_memory_entry_purgable_control(ipc_port_t entry_port,vm_purgable_t control,int * state)1389 mach_memory_entry_purgable_control(
1390 ipc_port_t entry_port,
1391 vm_purgable_t control,
1392 int *state)
1393 {
1394 if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1395 /* not allowed from user-space */
1396 return KERN_INVALID_ARGUMENT;
1397 }
1398
1399 return memory_entry_purgeable_control_internal(entry_port, control, state);
1400 }
1401
1402 kern_return_t
memory_entry_purgeable_control_internal(ipc_port_t entry_port,vm_purgable_t control,int * state)1403 memory_entry_purgeable_control_internal(
1404 ipc_port_t entry_port,
1405 vm_purgable_t control,
1406 int *state)
1407 {
1408 kern_return_t kr;
1409 vm_named_entry_t mem_entry;
1410 vm_object_t object;
1411
1412 mem_entry = mach_memory_entry_from_port(entry_port);
1413 if (mem_entry == NULL) {
1414 return KERN_INVALID_ARGUMENT;
1415 }
1416
1417 if (control != VM_PURGABLE_SET_STATE &&
1418 control != VM_PURGABLE_GET_STATE &&
1419 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1420 return KERN_INVALID_ARGUMENT;
1421 }
1422
1423 if ((control == VM_PURGABLE_SET_STATE ||
1424 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
1425 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
1426 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
1427 return KERN_INVALID_ARGUMENT;
1428 }
1429
1430 named_entry_lock(mem_entry);
1431
1432 if (mem_entry->is_sub_map ||
1433 mem_entry->is_copy) {
1434 named_entry_unlock(mem_entry);
1435 return KERN_INVALID_ARGUMENT;
1436 }
1437
1438 assert(mem_entry->is_object);
1439 object = vm_named_entry_to_vm_object(mem_entry);
1440 if (object == VM_OBJECT_NULL) {
1441 named_entry_unlock(mem_entry);
1442 return KERN_INVALID_ARGUMENT;
1443 }
1444
1445 vm_object_lock(object);
1446
1447 /* check that named entry covers entire object ? */
1448 if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
1449 vm_object_unlock(object);
1450 named_entry_unlock(mem_entry);
1451 return KERN_INVALID_ARGUMENT;
1452 }
1453
1454 named_entry_unlock(mem_entry);
1455
1456 kr = vm_object_purgable_control(object, control, state);
1457
1458 vm_object_unlock(object);
1459
1460 return kr;
1461 }
1462
1463 static kern_return_t
memory_entry_access_tracking_internal(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1464 memory_entry_access_tracking_internal(
1465 ipc_port_t entry_port,
1466 int *access_tracking,
1467 uint32_t *access_tracking_reads,
1468 uint32_t *access_tracking_writes)
1469 {
1470 vm_named_entry_t mem_entry;
1471 vm_object_t object;
1472 kern_return_t kr;
1473
1474 mem_entry = mach_memory_entry_from_port(entry_port);
1475 if (mem_entry == NULL) {
1476 return KERN_INVALID_ARGUMENT;
1477 }
1478
1479 named_entry_lock(mem_entry);
1480
1481 if (mem_entry->is_sub_map ||
1482 mem_entry->is_copy) {
1483 named_entry_unlock(mem_entry);
1484 return KERN_INVALID_ARGUMENT;
1485 }
1486
1487 assert(mem_entry->is_object);
1488 object = vm_named_entry_to_vm_object(mem_entry);
1489 if (object == VM_OBJECT_NULL) {
1490 named_entry_unlock(mem_entry);
1491 return KERN_INVALID_ARGUMENT;
1492 }
1493
1494 #if VM_OBJECT_ACCESS_TRACKING
1495 vm_object_access_tracking(object,
1496 access_tracking,
1497 access_tracking_reads,
1498 access_tracking_writes);
1499 kr = KERN_SUCCESS;
1500 #else /* VM_OBJECT_ACCESS_TRACKING */
1501 (void) access_tracking;
1502 (void) access_tracking_reads;
1503 (void) access_tracking_writes;
1504 kr = KERN_NOT_SUPPORTED;
1505 #endif /* VM_OBJECT_ACCESS_TRACKING */
1506
1507 named_entry_unlock(mem_entry);
1508
1509 return kr;
1510 }
1511
1512 kern_return_t
mach_memory_entry_access_tracking(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1513 mach_memory_entry_access_tracking(
1514 ipc_port_t entry_port,
1515 int *access_tracking,
1516 uint32_t *access_tracking_reads,
1517 uint32_t *access_tracking_writes)
1518 {
1519 return memory_entry_access_tracking_internal(entry_port,
1520 access_tracking,
1521 access_tracking_reads,
1522 access_tracking_writes);
1523 }
1524
1525 #if DEVELOPMENT || DEBUG
1526 /* For dtrace probe in mach_memory_entry_ownership */
1527 extern int proc_selfpid(void);
1528 extern char *proc_name_address(void *p);
1529 #endif /* DEVELOPMENT || DEBUG */
1530
1531 /* Kernel call only, MIG uses *_from_user() below */
1532 kern_return_t
mach_memory_entry_ownership(ipc_port_t entry_port,task_t owner,int ledger_tag,int ledger_flags)1533 mach_memory_entry_ownership(
1534 ipc_port_t entry_port,
1535 task_t owner,
1536 int ledger_tag,
1537 int ledger_flags)
1538 {
1539 task_t cur_task;
1540 kern_return_t kr;
1541 vm_named_entry_t mem_entry;
1542 vm_object_t object;
1543
1544 if (ledger_flags & ~VM_LEDGER_FLAGS_ALL) {
1545 /* reject unexpected flags */
1546 return KERN_INVALID_ARGUMENT;
1547 }
1548
1549 cur_task = current_task();
1550 if (cur_task == kernel_task) {
1551 /* kernel thread: no entitlement needed */
1552 } else if (ledger_flags & VM_LEDGER_FLAG_FROM_KERNEL) {
1553 /* call is from trusted kernel code: no entitlement needed */
1554 } else if ((owner != cur_task && owner != TASK_NULL) ||
1555 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT) ||
1556 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) ||
1557 ledger_tag == VM_LEDGER_TAG_NETWORK) {
1558 bool transfer_ok = false;
1559
1560 /*
1561 * An entitlement is required to:
1562 * + tranfer memory ownership to someone else,
1563 * + request that the memory not count against the footprint,
1564 * + tag as "network" (since that implies "no footprint")
1565 *
1566 * Exception: task with task_no_footprint_for_debug == 1 on internal build
1567 */
1568 if (!cur_task->task_can_transfer_memory_ownership &&
1569 IOCurrentTaskHasEntitlement("com.apple.private.memory.ownership_transfer")) {
1570 cur_task->task_can_transfer_memory_ownership = TRUE;
1571 }
1572 if (cur_task->task_can_transfer_memory_ownership) {
1573 /* we're allowed to transfer ownership to any task */
1574 transfer_ok = true;
1575 }
1576 #if DEVELOPMENT || DEBUG
1577 if (!transfer_ok &&
1578 ledger_tag == VM_LEDGER_TAG_DEFAULT &&
1579 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) &&
1580 cur_task->task_no_footprint_for_debug) {
1581 int to_panic = 0;
1582 static bool init_bootarg = false;
1583
1584 /*
1585 * Allow performance tools running on internal builds to hide memory usage from phys_footprint even
1586 * WITHOUT an entitlement. This can be enabled by per task sysctl vm.task_no_footprint_for_debug=1
1587 * with the ledger tag VM_LEDGER_TAG_DEFAULT and flag VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG.
1588 *
1589 * If the boot-arg "panic_on_no_footprint_for_debug" is set, the kernel will
1590 * panic here in order to detect any abuse of this feature, which is intended solely for
1591 * memory debugging purpose.
1592 */
1593 if (!init_bootarg) {
1594 PE_parse_boot_argn("panic_on_no_footprint_for_debug", &to_panic, sizeof(to_panic));
1595 init_bootarg = true;
1596 }
1597 if (to_panic) {
1598 panic("%s: panic_on_no_footprint_for_debug is triggered by pid %d procname %s", __func__, proc_selfpid(), get_bsdtask_info(cur_task)? proc_name_address(get_bsdtask_info(cur_task)) : "?");
1599 }
1600
1601 /*
1602 * Flushing out user space processes using this interface:
1603 * $ dtrace -n 'task_no_footprint_for_debug {printf("%d[%s]\n", pid, execname); stack(); ustack();}'
1604 */
1605 DTRACE_VM(task_no_footprint_for_debug);
1606 transfer_ok = true;
1607 }
1608 #endif /* DEVELOPMENT || DEBUG */
1609 if (!transfer_ok) {
1610 char *our_id, *their_id;
1611 our_id = IOTaskGetEntitlement(current_task(), "com.apple.developer.memory.transfer-send");
1612 their_id = IOTaskGetEntitlement(owner, "com.apple.developer.memory.transfer-accept");
1613 if (our_id && their_id &&
1614 !strcmp(our_id, their_id)) { /* These are guaranteed to be null-terminated */
1615 /* allow transfer between tasks that have matching entitlements */
1616 transfer_ok = true;
1617 }
1618 if (our_id) {
1619 kfree_data_addr(our_id);
1620 }
1621 if (their_id) {
1622 kfree_data_addr(their_id);
1623 }
1624 }
1625 if (!transfer_ok) {
1626 /* transfer denied */
1627 return KERN_NO_ACCESS;
1628 }
1629
1630 if (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) {
1631 /*
1632 * We've made it past the checks above, so we either
1633 * have the entitlement or the sysctl.
1634 * Convert to VM_LEDGER_FLAG_NO_FOOTPRINT.
1635 */
1636 ledger_flags &= ~VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG;
1637 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
1638 }
1639 }
1640
1641 if (ledger_tag == VM_LEDGER_TAG_UNCHANGED) {
1642 /* leave "ledger_tag" unchanged */
1643 } else if (ledger_tag < 0 ||
1644 ledger_tag > VM_LEDGER_TAG_MAX) {
1645 return KERN_INVALID_ARGUMENT;
1646 }
1647 if (owner == TASK_NULL) {
1648 /* leave "owner" unchanged */
1649 owner = VM_OBJECT_OWNER_UNCHANGED;
1650 }
1651
1652 mem_entry = mach_memory_entry_from_port(entry_port);
1653 if (mem_entry == NULL) {
1654 return KERN_INVALID_ARGUMENT;
1655 }
1656
1657 named_entry_lock(mem_entry);
1658
1659 if (mem_entry->is_sub_map ||
1660 !mem_entry->is_fully_owned) {
1661 named_entry_unlock(mem_entry);
1662 return KERN_INVALID_ARGUMENT;
1663 }
1664
1665 if (mem_entry->is_object) {
1666 object = vm_named_entry_to_vm_object(mem_entry);
1667 if (object == VM_OBJECT_NULL) {
1668 named_entry_unlock(mem_entry);
1669 return KERN_INVALID_ARGUMENT;
1670 }
1671 vm_object_lock(object);
1672 if (object->internal) {
1673 /* check that named entry covers entire object ? */
1674 if (mem_entry->offset != 0 ||
1675 object->vo_size != mem_entry->size) {
1676 vm_object_unlock(object);
1677 named_entry_unlock(mem_entry);
1678 return KERN_INVALID_ARGUMENT;
1679 }
1680 }
1681 named_entry_unlock(mem_entry);
1682 kr = vm_object_ownership_change(object,
1683 ledger_tag,
1684 owner,
1685 ledger_flags,
1686 FALSE); /* task_objq_locked */
1687 vm_object_unlock(object);
1688 } else if (mem_entry->is_copy) {
1689 vm_map_copy_t copy;
1690 vm_map_entry_t entry;
1691
1692 copy = mem_entry->backing.copy;
1693 named_entry_unlock(mem_entry);
1694 for (entry = vm_map_copy_first_entry(copy);
1695 entry != vm_map_copy_to_entry(copy);
1696 entry = entry->vme_next) {
1697 object = VME_OBJECT(entry);
1698 if (entry->is_sub_map ||
1699 object == VM_OBJECT_NULL) {
1700 kr = KERN_INVALID_ARGUMENT;
1701 break;
1702 }
1703 vm_object_lock(object);
1704 if (object->internal) {
1705 if (VME_OFFSET(entry) != 0 ||
1706 entry->vme_end - entry->vme_start != object->vo_size) {
1707 vm_object_unlock(object);
1708 kr = KERN_INVALID_ARGUMENT;
1709 break;
1710 }
1711 }
1712 kr = vm_object_ownership_change(object,
1713 ledger_tag,
1714 owner,
1715 ledger_flags,
1716 FALSE); /* task_objq_locked */
1717 vm_object_unlock(object);
1718 if (kr != KERN_SUCCESS) {
1719 kr = KERN_INVALID_ARGUMENT;
1720 break;
1721 }
1722 }
1723 } else {
1724 named_entry_unlock(mem_entry);
1725 return KERN_INVALID_ARGUMENT;
1726 }
1727
1728 return kr;
1729 }
1730
1731 /* MIG call from userspace */
1732 kern_return_t
mach_memory_entry_ownership_from_user(ipc_port_t entry_port,mach_port_t owner_port,int ledger_tag,int ledger_flags)1733 mach_memory_entry_ownership_from_user(
1734 ipc_port_t entry_port,
1735 mach_port_t owner_port,
1736 int ledger_tag,
1737 int ledger_flags)
1738 {
1739 task_t owner = TASK_NULL;
1740 kern_return_t kr;
1741
1742 if (ledger_flags & ~VM_LEDGER_FLAGS_USER) {
1743 return KERN_INVALID_ARGUMENT;
1744 }
1745
1746 if (IP_VALID(owner_port)) {
1747 if (ip_type(owner_port) == IKOT_TASK_ID_TOKEN) {
1748 task_id_token_t token = convert_port_to_task_id_token(owner_port);
1749 (void)task_identity_token_get_task_grp(token, &owner, TASK_GRP_MIG);
1750 task_id_token_release(token);
1751 /* token ref released */
1752 } else {
1753 owner = convert_port_to_task_mig(owner_port);
1754 }
1755 }
1756 /* hold task ref on owner (Nullable) */
1757
1758 if (owner && task_is_a_corpse(owner)) {
1759 /* identity token can represent a corpse, disallow it */
1760 task_deallocate_mig(owner);
1761 owner = TASK_NULL;
1762 }
1763
1764 /* mach_memory_entry_ownership() will handle TASK_NULL owner */
1765 kr = mach_memory_entry_ownership(entry_port, owner, /* Nullable */
1766 ledger_tag, ledger_flags);
1767
1768 if (owner) {
1769 task_deallocate_mig(owner);
1770 }
1771
1772 if (kr == KERN_SUCCESS) {
1773 /* MIG rule, consume port right on success */
1774 ipc_port_release_send(owner_port);
1775 }
1776 return kr;
1777 }
1778
1779 kern_return_t
mach_memory_entry_get_page_counts(ipc_port_t entry_port,uint64_t * resident_page_count,uint64_t * dirty_page_count,uint64_t * swapped_page_count)1780 mach_memory_entry_get_page_counts(
1781 ipc_port_t entry_port,
1782 uint64_t *resident_page_count,
1783 uint64_t *dirty_page_count,
1784 uint64_t *swapped_page_count)
1785 {
1786 kern_return_t kr;
1787 vm_named_entry_t mem_entry;
1788 vm_object_t object;
1789 vm_object_offset_t offset;
1790 vm_object_size_t size;
1791
1792 mem_entry = mach_memory_entry_from_port(entry_port);
1793 if (mem_entry == NULL) {
1794 return KERN_INVALID_ARGUMENT;
1795 }
1796
1797 named_entry_lock(mem_entry);
1798
1799 if (mem_entry->is_sub_map ||
1800 mem_entry->is_copy) {
1801 named_entry_unlock(mem_entry);
1802 return KERN_INVALID_ARGUMENT;
1803 }
1804
1805 assert(mem_entry->is_object);
1806 object = vm_named_entry_to_vm_object(mem_entry);
1807 if (object == VM_OBJECT_NULL) {
1808 named_entry_unlock(mem_entry);
1809 return KERN_INVALID_ARGUMENT;
1810 }
1811
1812 vm_object_lock(object);
1813
1814 offset = mem_entry->offset;
1815 size = mem_entry->size;
1816 size = vm_object_round_page(offset + size) - vm_object_trunc_page(offset);
1817 offset = vm_object_trunc_page(offset);
1818
1819 named_entry_unlock(mem_entry);
1820
1821 kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count, swapped_page_count);
1822
1823 vm_object_unlock(object);
1824
1825 return kr;
1826 }
1827
1828 kern_return_t
mach_memory_entry_phys_page_offset(ipc_port_t entry_port,vm_object_offset_t * offset_p)1829 mach_memory_entry_phys_page_offset(
1830 ipc_port_t entry_port,
1831 vm_object_offset_t *offset_p)
1832 {
1833 vm_named_entry_t mem_entry;
1834 vm_object_t object;
1835 vm_object_offset_t offset;
1836 vm_object_offset_t data_offset;
1837
1838 mem_entry = mach_memory_entry_from_port(entry_port);
1839 if (mem_entry == NULL) {
1840 return KERN_INVALID_ARGUMENT;
1841 }
1842
1843 named_entry_lock(mem_entry);
1844
1845 if (mem_entry->is_sub_map ||
1846 mem_entry->is_copy) {
1847 named_entry_unlock(mem_entry);
1848 return KERN_INVALID_ARGUMENT;
1849 }
1850
1851 assert(mem_entry->is_object);
1852 object = vm_named_entry_to_vm_object(mem_entry);
1853 if (object == VM_OBJECT_NULL) {
1854 named_entry_unlock(mem_entry);
1855 return KERN_INVALID_ARGUMENT;
1856 }
1857
1858 offset = mem_entry->offset;
1859 data_offset = mem_entry->data_offset;
1860
1861 named_entry_unlock(mem_entry);
1862
1863 *offset_p = offset - vm_object_trunc_page(offset) + data_offset;
1864 return KERN_SUCCESS;
1865 }
1866
1867 static inline kern_return_t
mach_memory_entry_map_size_sanitize_locked(vm_map_t map,memory_object_offset_ut * offset_u,memory_object_size_ut size_u,vm_named_entry_t mem_entry,memory_object_offset_t * offset,memory_object_offset_t * end,mach_vm_size_t * map_size)1868 mach_memory_entry_map_size_sanitize_locked(
1869 vm_map_t map,
1870 memory_object_offset_ut *offset_u,
1871 memory_object_size_ut size_u,
1872 vm_named_entry_t mem_entry,
1873 memory_object_offset_t *offset,
1874 memory_object_offset_t *end,
1875 mach_vm_size_t *map_size)
1876 {
1877 kern_return_t kr;
1878
1879 if (mem_entry->is_object ||
1880 (mem_entry->is_copy &&
1881 (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) ==
1882 VM_MAP_PAGE_MASK(map)))) {
1883 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->offset,
1884 offset_u))) {
1885 return KERN_INVALID_ARGUMENT;
1886 }
1887 }
1888
1889 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->data_offset,
1890 offset_u))) {
1891 return KERN_INVALID_ARGUMENT;
1892 }
1893
1894 kr = vm_sanitize_addr_size(*offset_u, size_u,
1895 VM_SANITIZE_CALLER_MACH_MEMORY_ENTRY_MAP_SIZE, map,
1896 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH, offset, end, map_size);
1897 if (__improbable(kr != KERN_SUCCESS)) {
1898 return vm_sanitize_get_kr(kr);
1899 }
1900
1901 return KERN_SUCCESS;
1902 }
1903
1904 kern_return_t
mach_memory_entry_map_size(ipc_port_t entry_port,vm_map_t map,memory_object_offset_ut offset_u,memory_object_size_ut size_u,mach_vm_size_t * map_size_out)1905 mach_memory_entry_map_size(
1906 ipc_port_t entry_port,
1907 vm_map_t map,
1908 memory_object_offset_ut offset_u,
1909 memory_object_size_ut size_u,
1910 mach_vm_size_t *map_size_out)
1911 {
1912 vm_named_entry_t mem_entry;
1913 vm_object_t object;
1914 vm_map_copy_t copy_map, target_copy_map;
1915 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
1916 kern_return_t kr;
1917 memory_object_offset_t offset;
1918 memory_object_offset_t end;
1919 mach_vm_size_t map_size;
1920
1921 *map_size_out = 0;
1922
1923 mem_entry = mach_memory_entry_from_port(entry_port);
1924 if (mem_entry == NULL) {
1925 return KERN_INVALID_ARGUMENT;
1926 }
1927
1928 named_entry_lock(mem_entry);
1929
1930 if (mem_entry->is_sub_map) {
1931 named_entry_unlock(mem_entry);
1932 return KERN_INVALID_ARGUMENT;
1933 }
1934
1935 /*
1936 * Sanitize offset and size before use
1937 */
1938 kr = mach_memory_entry_map_size_sanitize_locked(map,
1939 &offset_u,
1940 size_u,
1941 mem_entry,
1942 &offset,
1943 &end,
1944 &map_size);
1945 if (__improbable(kr != KERN_SUCCESS)) {
1946 named_entry_unlock(mem_entry);
1947 return kr;
1948 }
1949
1950 if (mem_entry->is_object) {
1951 object = vm_named_entry_to_vm_object(mem_entry);
1952 if (object == VM_OBJECT_NULL) {
1953 named_entry_unlock(mem_entry);
1954 return KERN_INVALID_ARGUMENT;
1955 }
1956
1957 named_entry_unlock(mem_entry);
1958 *map_size_out = map_size;
1959 return KERN_SUCCESS;
1960 }
1961
1962 if (!mem_entry->is_copy) {
1963 panic("unsupported type of mem_entry %p", mem_entry);
1964 }
1965
1966 assert(mem_entry->is_copy);
1967 if (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) == VM_MAP_PAGE_MASK(map)) {
1968 DEBUG4K_SHARE("map %p (%d) mem_entry %p offset 0x%llx + 0x%llx + 0x%llx size 0x%llx -> map_size 0x%llx\n", map, VM_MAP_PAGE_MASK(map), mem_entry, mem_entry->offset, mem_entry->data_offset, offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u), map_size);
1969 named_entry_unlock(mem_entry);
1970 *map_size_out = map_size;
1971 return KERN_SUCCESS;
1972 }
1973
1974 DEBUG4K_SHARE("mem_entry %p copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx\n", mem_entry, mem_entry->backing.copy, VM_MAP_COPY_PAGE_SHIFT(mem_entry->backing.copy), map, VM_MAP_PAGE_SHIFT(map), offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u));
1975 copy_map = mem_entry->backing.copy;
1976 target_copy_map = VM_MAP_COPY_NULL;
1977 DEBUG4K_ADJUST("adjusting...\n");
1978 kr = vm_map_copy_adjust_to_target(copy_map,
1979 offset_u,
1980 size_u,
1981 map,
1982 FALSE,
1983 &target_copy_map,
1984 &overmap_start,
1985 &overmap_end,
1986 &trimmed_start);
1987 if (kr == KERN_SUCCESS) {
1988 if (target_copy_map->size != copy_map->size) {
1989 DEBUG4K_ADJUST("copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx map_size 0x%llx -> 0x%llx\n", copy_map, VM_MAP_COPY_PAGE_SHIFT(copy_map), map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, (uint64_t)VM_SANITIZE_UNSAFE_UNWRAP(size_u), (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)copy_map->size, (uint64_t)target_copy_map->size);
1990 }
1991 *map_size_out = target_copy_map->size;
1992 if (target_copy_map != copy_map) {
1993 vm_map_copy_discard(target_copy_map);
1994 }
1995 target_copy_map = VM_MAP_COPY_NULL;
1996 }
1997 named_entry_unlock(mem_entry);
1998 return kr;
1999 }
2000
2001 /*
2002 * mach_memory_entry_port_release:
2003 *
2004 * Release a send right on a named entry port. This is the correct
2005 * way to destroy a named entry. When the last right on the port is
2006 * released, mach_memory_entry_no_senders() willl be called.
2007 */
2008 void
mach_memory_entry_port_release(ipc_port_t port)2009 mach_memory_entry_port_release(
2010 ipc_port_t port)
2011 {
2012 assert(ip_type(port) == IKOT_NAMED_ENTRY);
2013 ipc_port_release_send(port);
2014 }
2015
2016 vm_named_entry_t
mach_memory_entry_from_port(ipc_port_t port)2017 mach_memory_entry_from_port(ipc_port_t port)
2018 {
2019 if (IP_VALID(port)) {
2020 return ipc_kobject_get_stable(port, IKOT_NAMED_ENTRY);
2021 }
2022 return NULL;
2023 }
2024
2025 void
mach_memory_entry_describe(vm_named_entry_t named_entry,kobject_description_t desc)2026 mach_memory_entry_describe(
2027 vm_named_entry_t named_entry,
2028 kobject_description_t desc)
2029 {
2030 vm_object_t vm_object;
2031 if (named_entry->is_object) {
2032 vm_object = vm_named_entry_to_vm_object(named_entry);
2033 vm_object_size_t size = vm_object->internal ?
2034 vm_object->vo_un1.vou_size : 0;
2035 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2036 "VM-OBJECT(0x%x, %lluKiB)",
2037 VM_OBJECT_ID(vm_object),
2038 BtoKiB(size));
2039 } else if (named_entry->is_copy) {
2040 vm_map_copy_t copy_map = named_entry->backing.copy;
2041 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2042 "VM-MAP-COPY(0x%lx, %lluKiB)",
2043 VM_KERNEL_ADDRHASH(copy_map),
2044 BtoKiB(copy_map->size));
2045 } else if (named_entry->is_sub_map) {
2046 vm_map_t submap = named_entry->backing.map;
2047 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2048 "VM-SUB-MAP(0x%lx, %lluKiB)",
2049 VM_KERNEL_ADDRHASH(submap),
2050 BtoKiB(submap->size));
2051 }
2052 }
2053
2054 /*
2055 * mach_memory_entry_no_senders:
2056 *
2057 * Destroys the memory entry associated with a mach port.
2058 * Memory entries have the exact same lifetime as their owning port.
2059 *
2060 * Releasing a memory entry is done by calling
2061 * mach_memory_entry_port_release() on its owning port.
2062 */
2063 static void
mach_memory_entry_no_senders(ipc_port_t port,mach_port_mscount_t mscount)2064 mach_memory_entry_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
2065 {
2066 vm_named_entry_t named_entry;
2067
2068 named_entry = ipc_kobject_dealloc_port(port, mscount, IKOT_NAMED_ENTRY);
2069
2070 if (named_entry->is_sub_map) {
2071 vm_map_deallocate(named_entry->backing.map);
2072 } else if (named_entry->is_copy) {
2073 vm_map_copy_discard(named_entry->backing.copy);
2074 } else if (named_entry->is_object) {
2075 assert(named_entry->backing.copy->cpy_hdr.nentries == 1);
2076 vm_map_copy_discard(named_entry->backing.copy);
2077 } else {
2078 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
2079 }
2080
2081 #if VM_NAMED_ENTRY_DEBUG
2082 btref_put(named_entry->named_entry_bt);
2083 #endif /* VM_NAMED_ENTRY_DEBUG */
2084
2085 named_entry_lock_destroy(named_entry);
2086 kfree_type(struct vm_named_entry, named_entry);
2087 }
2088
2089 #if XNU_PLATFORM_MacOSX
2090 /* Allow manipulation of individual page state. This is actually part of */
2091 /* the UPL regimen but takes place on the memory entry rather than on a UPL */
2092
2093 kern_return_t
mach_memory_entry_page_op(ipc_port_t entry_port,vm_object_offset_ut offset_u,int ops,ppnum_t * phys_entry,int * flags)2094 mach_memory_entry_page_op(
2095 ipc_port_t entry_port,
2096 vm_object_offset_ut offset_u,
2097 int ops,
2098 ppnum_t *phys_entry,
2099 int *flags)
2100 {
2101 vm_named_entry_t mem_entry;
2102 vm_object_t object;
2103 kern_return_t kr;
2104 /*
2105 * Unwrap offset as no mathematical operations are
2106 * performed on it.
2107 */
2108 vm_object_offset_t offset = VM_SANITIZE_UNSAFE_UNWRAP(offset_u);
2109
2110 mem_entry = mach_memory_entry_from_port(entry_port);
2111 if (mem_entry == NULL) {
2112 return KERN_INVALID_ARGUMENT;
2113 }
2114
2115 named_entry_lock(mem_entry);
2116
2117 if (mem_entry->is_sub_map ||
2118 mem_entry->is_copy) {
2119 named_entry_unlock(mem_entry);
2120 return KERN_INVALID_ARGUMENT;
2121 }
2122
2123 assert(mem_entry->is_object);
2124 object = vm_named_entry_to_vm_object(mem_entry);
2125 if (object == VM_OBJECT_NULL) {
2126 named_entry_unlock(mem_entry);
2127 return KERN_INVALID_ARGUMENT;
2128 }
2129
2130 vm_object_reference(object);
2131 named_entry_unlock(mem_entry);
2132
2133 kr = vm_object_page_op(object, offset, ops, phys_entry, flags);
2134
2135 vm_object_deallocate(object);
2136
2137 return kr;
2138 }
2139
2140 /*
2141 * mach_memory_entry_range_op offers performance enhancement over
2142 * mach_memory_entry_page_op for page_op functions which do not require page
2143 * level state to be returned from the call. Page_op was created to provide
2144 * a low-cost alternative to page manipulation via UPLs when only a single
2145 * page was involved. The range_op call establishes the ability in the _op
2146 * family of functions to work on multiple pages where the lack of page level
2147 * state handling allows the caller to avoid the overhead of the upl structures.
2148 */
2149
2150 kern_return_t
mach_memory_entry_range_op(ipc_port_t entry_port,vm_object_offset_ut offset_beg_u,vm_object_offset_ut offset_end_u,int ops,int * range)2151 mach_memory_entry_range_op(
2152 ipc_port_t entry_port,
2153 vm_object_offset_ut offset_beg_u,
2154 vm_object_offset_ut offset_end_u,
2155 int ops,
2156 int *range)
2157 {
2158 vm_named_entry_t mem_entry;
2159 vm_object_t object;
2160 kern_return_t kr;
2161 vm_object_offset_t offset_range;
2162 /*
2163 * Unwrap offset beginning and end as no mathematical operations are
2164 * performed on these quantities.
2165 */
2166 vm_object_offset_t offset_beg = VM_SANITIZE_UNSAFE_UNWRAP(offset_beg_u);
2167 vm_object_offset_t offset_end = VM_SANITIZE_UNSAFE_UNWRAP(offset_end_u);
2168
2169 mem_entry = mach_memory_entry_from_port(entry_port);
2170 if (mem_entry == NULL) {
2171 return KERN_INVALID_ARGUMENT;
2172 }
2173
2174 named_entry_lock(mem_entry);
2175
2176 if (__improbable(os_sub_overflow(offset_end, offset_beg, &offset_range) ||
2177 (offset_range > (uint32_t) -1))) {
2178 /* range is too big and would overflow "*range" */
2179 named_entry_unlock(mem_entry);
2180 return KERN_INVALID_ARGUMENT;
2181 }
2182
2183 if (mem_entry->is_sub_map ||
2184 mem_entry->is_copy) {
2185 named_entry_unlock(mem_entry);
2186 return KERN_INVALID_ARGUMENT;
2187 }
2188
2189 assert(mem_entry->is_object);
2190 object = vm_named_entry_to_vm_object(mem_entry);
2191 if (object == VM_OBJECT_NULL) {
2192 named_entry_unlock(mem_entry);
2193 return KERN_INVALID_ARGUMENT;
2194 }
2195
2196 vm_object_reference(object);
2197 named_entry_unlock(mem_entry);
2198
2199 kr = vm_object_range_op(object,
2200 offset_beg,
2201 offset_end,
2202 ops,
2203 (uint32_t *) range);
2204
2205 vm_object_deallocate(object);
2206
2207 return kr;
2208 }
2209 #endif /* XNU_PLATFORM_MacOSX */
2210
2211 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)2212 memory_entry_check_for_adjustment(
2213 vm_map_t src_map,
2214 ipc_port_t port,
2215 vm_map_offset_t *overmap_start,
2216 vm_map_offset_t *overmap_end)
2217 {
2218 kern_return_t kr = KERN_SUCCESS;
2219 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
2220
2221 assert(port);
2222 assertf(ip_type(port) == IKOT_NAMED_ENTRY,
2223 "Port Type expected: %d...received:%d\n",
2224 IKOT_NAMED_ENTRY, ip_type(port));
2225
2226 vm_named_entry_t named_entry;
2227
2228 named_entry = mach_memory_entry_from_port(port);
2229 named_entry_lock(named_entry);
2230 copy_map = named_entry->backing.copy;
2231 target_copy_map = copy_map;
2232
2233 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
2234 vm_map_offset_t trimmed_start;
2235
2236 trimmed_start = 0;
2237 DEBUG4K_ADJUST("adjusting...\n");
2238 kr = vm_map_copy_adjust_to_target(
2239 copy_map,
2240 vm_sanitize_wrap_addr(0), /* offset */
2241 vm_sanitize_wrap_size(copy_map->size), /* size */
2242 src_map,
2243 FALSE, /* copy */
2244 &target_copy_map,
2245 overmap_start,
2246 overmap_end,
2247 &trimmed_start);
2248 assert(trimmed_start == 0);
2249 }
2250 named_entry_unlock(named_entry);
2251
2252 return kr;
2253 }
2254
2255 vm_named_entry_t
vm_convert_port_to_named_entry(ipc_port_t port)2256 vm_convert_port_to_named_entry(
2257 ipc_port_t port)
2258 {
2259 /* Invalid / wrong port type? */
2260 if (!IP_VALID(port) || ip_type(port) != IKOT_NAMED_ENTRY) {
2261 return NULL;
2262 }
2263
2264 vm_named_entry_t named_entry = mach_memory_entry_from_port(port);
2265
2266 /* This is a no-op, it's here for reader clarity */
2267 if (!named_entry) {
2268 return NULL;
2269 }
2270
2271 return named_entry;
2272 }
2273
2274 vm_object_t
vm_convert_port_to_copy_object(ipc_port_t port)2275 vm_convert_port_to_copy_object(
2276 ipc_port_t port)
2277 {
2278 vm_named_entry_t named_entry = vm_convert_port_to_named_entry(port);
2279 /* We expect the named entry to point to an object. */
2280 if (!named_entry || !named_entry->is_object) {
2281 return NULL;
2282 }
2283 /* Pull out the copy map object... */
2284 return vm_named_entry_to_vm_object(named_entry);
2285 }
2286