1 /*
2 * Copyright (c) 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/memory_entry.h>
30 #include <mach/memory_entry_server.h>
31 #include <mach/vm_map_server.h>
32 #include <mach/mach_vm_server.h>
33 #include <vm/vm_purgeable_internal.h>
34 #include <mach/mach_host_server.h>
35 #include <IOKit/IOBSD.h>
36 #include <vm/vm_memory_entry_xnu.h>
37 #include <vm/vm_map_internal.h>
38 #include <vm/memory_object_internal.h>
39 #include <vm/vm_protos_internal.h>
40 #include <vm/vm_object_internal.h>
41 #include <vm/vm_iokit.h>
42
43 static void mach_memory_entry_no_senders(ipc_port_t, mach_port_mscount_t);
44
45 IPC_KOBJECT_DEFINE(IKOT_NAMED_ENTRY,
46 .iko_op_movable_send = true,
47 .iko_op_stable = true,
48 .iko_op_no_senders = mach_memory_entry_no_senders);
49
50 /*
51 * mach_make_memory_entry_64
52 *
53 * Think of it as a two-stage vm_remap() operation. First
54 * you get a handle. Second, you get map that handle in
55 * somewhere else. Rather than doing it all at once (and
56 * without needing access to the other whole map).
57 */
58 kern_return_t
mach_make_memory_entry_64(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_handle)59 mach_make_memory_entry_64(
60 vm_map_t target_map,
61 memory_object_size_ut *size_u,
62 memory_object_offset_ut offset_u,
63 vm_prot_ut permission_u,
64 ipc_port_t *object_handle,
65 ipc_port_t parent_handle)
66 {
67 return mach_make_memory_entry_internal(target_map,
68 size_u,
69 offset_u,
70 permission_u,
71 VM_NAMED_ENTRY_KERNEL_FLAGS_NONE,
72 object_handle,
73 parent_handle);
74 }
75
76 static inline void
vm_memory_entry_decode_perm(vm_prot_t permission,unsigned int * access,vm_prot_t * protections,bool * mask_protections,bool * use_data_addr,bool * use_4K_compat)77 vm_memory_entry_decode_perm(
78 vm_prot_t permission,
79 unsigned int *access,
80 vm_prot_t *protections,
81 bool *mask_protections,
82 bool *use_data_addr,
83 bool *use_4K_compat)
84 {
85 *protections = permission & VM_PROT_ALL;
86 *mask_protections = permission & VM_PROT_IS_MASK;
87 *access = GET_MAP_MEM(permission);
88 *use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0);
89 *use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0);
90 }
91
92 static inline vm_map_offset_t
vm_memory_entry_get_offset_in_page(vm_map_offset_t offset,vm_map_offset_t map_start,bool use_data_addr,bool use_4K_compat)93 vm_memory_entry_get_offset_in_page(
94 vm_map_offset_t offset,
95 vm_map_offset_t map_start,
96 bool use_data_addr,
97 bool use_4K_compat)
98 {
99 vm_map_offset_t offset_in_page;
100
101 if (use_data_addr || use_4K_compat) {
102 offset_in_page = offset - map_start;
103 if (use_4K_compat) {
104 offset_in_page &= ~((signed)(0xFFF));
105 }
106 } else {
107 offset_in_page = 0;
108 }
109
110 return offset_in_page;
111 }
112
113 static inline kern_return_t
mach_make_memory_entry_cleanup(kern_return_t kr,vm_map_t target_map __unused,memory_object_size_ut * size_u,vm_map_offset_ut offset_u __unused,vm_prot_t permission __unused,vm_named_entry_t user_entry __unused,ipc_port_t * object_handle)114 mach_make_memory_entry_cleanup(
115 kern_return_t kr,
116 vm_map_t target_map __unused,
117 memory_object_size_ut *size_u,
118 vm_map_offset_ut offset_u __unused,
119 vm_prot_t permission __unused,
120 vm_named_entry_t user_entry __unused,
121 ipc_port_t *object_handle)
122 {
123 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
124 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
125 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry,
126 vm_sanitize_get_kr(kr));
127 /*
128 * Set safe size and object_handle value on failed return
129 */
130 *size_u = vm_sanitize_wrap_size(0);
131 *object_handle = IPC_PORT_NULL;
132 return vm_sanitize_get_kr(kr);
133 }
134
135 static __attribute__((always_inline, warn_unused_result))
136 kern_return_t
mach_make_memory_entry_mem_only_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size)137 mach_make_memory_entry_mem_only_sanitize(
138 vm_map_t target_map,
139 memory_object_size_ut size_u,
140 vm_map_offset_ut offset_u,
141 vm_map_offset_t *map_start,
142 vm_map_offset_t *map_end,
143 vm_map_size_t *map_size)
144 {
145 /*
146 * This code path doesn't use offset and size. They don't need to be
147 * validated. However inorder to maintain backward compatibility some
148 * checks on offset and size have been left.
149 */
150 return vm_sanitize_addr_size(offset_u, size_u,
151 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
152 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
153 map_start, map_end, map_size);
154 }
155
156 static kern_return_t
mach_make_memory_entry_mem_only(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)157 mach_make_memory_entry_mem_only(
158 vm_map_t target_map,
159 memory_object_size_ut *size_u,
160 memory_object_offset_ut offset_u,
161 vm_prot_t permission,
162 ipc_port_t *object_handle,
163 vm_named_entry_t parent_entry)
164 {
165 boolean_t parent_is_object;
166 vm_object_t object;
167 unsigned int access;
168 vm_prot_t protections;
169 bool mask_protections;
170 unsigned int wimg_mode;
171 bool use_data_addr;
172 bool use_4K_compat;
173 vm_named_entry_t user_entry __unused = NULL;
174 kern_return_t kr;
175 vm_map_size_t map_size;
176 vm_map_offset_t map_start, map_end;
177
178 /*
179 * Sanitize addr and size. Permimssions have been sanitized prior to
180 * dispatch
181 */
182 kr = mach_make_memory_entry_mem_only_sanitize(target_map,
183 *size_u,
184 offset_u,
185 &map_start,
186 &map_end,
187 &map_size);
188 if (__improbable(kr != KERN_SUCCESS)) {
189 return mach_make_memory_entry_cleanup(kr, target_map,
190 size_u, offset_u, permission, user_entry, object_handle);
191 }
192
193 vm_memory_entry_decode_perm(permission, &access, &protections,
194 &mask_protections, &use_data_addr, &use_4K_compat);
195
196 if (use_data_addr || use_4K_compat || parent_entry == NULL) {
197 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
198 size_u, offset_u, permission, user_entry, object_handle);
199 }
200
201 parent_is_object = parent_entry->is_object;
202 if (!parent_is_object) {
203 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
204 size_u, offset_u, permission, user_entry, object_handle);
205 }
206
207 if ((access != parent_entry->access) &&
208 !(parent_entry->protection & VM_PROT_WRITE)) {
209 return mach_make_memory_entry_cleanup(KERN_INVALID_RIGHT, target_map,
210 size_u, offset_u, permission, user_entry, object_handle);
211 }
212
213 object = vm_named_entry_to_vm_object(parent_entry);
214 if (parent_is_object && object != VM_OBJECT_NULL) {
215 wimg_mode = object->wimg_bits;
216 } else {
217 wimg_mode = VM_WIMG_USE_DEFAULT;
218 }
219 vm_prot_to_wimg(access, &wimg_mode);
220 if (parent_is_object && object &&
221 (access != MAP_MEM_NOOP) &&
222 (!(object->nophyscache))) {
223 if (object->wimg_bits != wimg_mode) {
224 vm_object_lock(object);
225 vm_object_change_wimg_mode(object, wimg_mode);
226 vm_object_unlock(object);
227 }
228 }
229 if (access != MAP_MEM_NOOP) {
230 parent_entry->access = access;
231 }
232 if (object_handle) {
233 *object_handle = IP_NULL;
234 }
235 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
236 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
237 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry, KERN_SUCCESS);
238 /*
239 * TODO: Size isn't being set in this path
240 */
241 return KERN_SUCCESS;
242 }
243
244 static __attribute__((always_inline, warn_unused_result))
245 kern_return_t
mach_make_memory_entry_generic_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset)246 mach_make_memory_entry_generic_sanitize(
247 vm_map_t target_map,
248 memory_object_size_ut size_u,
249 vm_map_offset_ut offset_u,
250 vm_map_offset_t *map_start,
251 vm_map_offset_t *map_end,
252 vm_map_size_t *map_size,
253 vm_map_offset_t *offset)
254 {
255 kern_return_t kr;
256
257 /*
258 * Validate start and end
259 */
260 kr = vm_sanitize_addr_size(offset_u, size_u,
261 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
262 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
263 map_start, map_end, map_size);
264 if (__improbable(kr != KERN_SUCCESS)) {
265 return kr;
266 }
267 /*
268 * Validate offset
269 */
270 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
271 *map_start, *map_end, offset);
272 if (__improbable(kr != KERN_SUCCESS)) {
273 return kr;
274 }
275
276 return KERN_SUCCESS;
277 }
278
279 static kern_return_t
mach_make_memory_entry_named_create(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle)280 mach_make_memory_entry_named_create(
281 vm_map_t target_map,
282 memory_object_size_ut *size_u,
283 vm_map_offset_ut offset_u,
284 vm_prot_t permission,
285 vm_named_entry_kernel_flags_t vmne_kflags,
286 ipc_port_t *object_handle)
287 {
288 vm_object_t object;
289 unsigned int access;
290 vm_prot_t protections;
291 bool mask_protections;
292 unsigned int wimg_mode;
293 bool use_data_addr;
294 bool use_4K_compat;
295 int ledger_flags = 0;
296 task_t owner;
297 bool fully_owned = false;
298 vm_named_entry_t user_entry = NULL;
299 kern_return_t kr;
300 vm_map_size_t map_size;
301 vm_map_offset_t map_start, map_end, offset;
302
303 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
304 return mach_make_memory_entry_cleanup(KERN_SUCCESS, target_map,
305 size_u, offset_u, permission, user_entry, object_handle);
306 }
307
308 /*
309 * Sanitize addr and size. Permimssions have been sanitized prior to
310 * dispatch
311 */
312 kr = mach_make_memory_entry_generic_sanitize(target_map,
313 *size_u,
314 offset_u,
315 &map_start,
316 &map_end,
317 &map_size,
318 &offset);
319 if (__improbable(kr != KERN_SUCCESS)) {
320 return mach_make_memory_entry_cleanup(kr, target_map,
321 size_u, offset_u, permission, user_entry, object_handle);
322 }
323
324 assert(map_size != 0);
325
326 vm_memory_entry_decode_perm(permission, &access, &protections,
327 &mask_protections, &use_data_addr, &use_4K_compat);
328
329 if (use_data_addr || use_4K_compat) {
330 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
331 size_u, offset_u, permission, user_entry, object_handle);
332 }
333
334 /*
335 * Force the creation of the VM object now.
336 */
337 #if __LP64__
338 if (map_size > ANON_MAX_SIZE) {
339 return mach_make_memory_entry_cleanup(KERN_FAILURE, target_map,
340 size_u, offset_u, permission, user_entry, object_handle);
341 }
342 #endif /* __LP64__ */
343
344 object = vm_object_allocate(map_size, vm_map_maybe_serial_id(target_map));
345 assert(object != VM_OBJECT_NULL);
346 vm_object_lock(object);
347
348 /*
349 * XXX
350 * We use this path when we want to make sure that
351 * nobody messes with the object (coalesce, for
352 * example) before we map it.
353 * We might want to use these objects for transposition via
354 * vm_object_transpose() too, so we don't want any copy or
355 * shadow objects either...
356 */
357 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
358 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
359
360 owner = current_task();
361 if ((permission & MAP_MEM_PURGABLE) ||
362 vmne_kflags.vmnekf_ledger_tag) {
363 assert(object->vo_owner == NULL);
364 assert(object->resident_page_count == 0);
365 assert(object->wired_page_count == 0);
366 assert(owner != TASK_NULL);
367 if (vmne_kflags.vmnekf_ledger_no_footprint) {
368 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
369 object->vo_no_footprint = TRUE;
370 }
371 if (permission & MAP_MEM_PURGABLE) {
372 if (!(permission & VM_PROT_WRITE)) {
373 /* if we can't write, we can't purge */
374 vm_object_unlock(object);
375 vm_object_deallocate(object);
376 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT,
377 target_map, size_u, offset_u, permission, user_entry,
378 object_handle);
379 }
380 VM_OBJECT_SET_PURGABLE(object, VM_PURGABLE_NONVOLATILE);
381 if (permission & MAP_MEM_PURGABLE_KERNEL_ONLY) {
382 VM_OBJECT_SET_PURGEABLE_ONLY_BY_KERNEL(object, TRUE);
383 }
384 #if __arm64__
385 if (owner->task_legacy_footprint) {
386 /*
387 * For ios11, we failed to account for
388 * this memory. Keep doing that for
389 * legacy apps (built before ios12),
390 * for backwards compatibility's sake...
391 */
392 owner = kernel_task;
393 }
394 #endif /* __arm64__ */
395 vm_purgeable_nonvolatile_enqueue(object, owner);
396 /* all memory in this named entry is "owned" */
397 fully_owned = true;
398 }
399 }
400
401 if (vmne_kflags.vmnekf_ledger_tag) {
402 /*
403 * Bill this object to the current task's
404 * ledgers for the given tag.
405 */
406 if (vmne_kflags.vmnekf_ledger_no_footprint) {
407 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
408 }
409 kr = vm_object_ownership_change(
410 object,
411 vmne_kflags.vmnekf_ledger_tag,
412 owner, /* new owner */
413 ledger_flags,
414 FALSE); /* task_objq locked? */
415 if (kr != KERN_SUCCESS) {
416 vm_object_unlock(object);
417 vm_object_deallocate(object);
418 return mach_make_memory_entry_cleanup(kr, target_map,
419 size_u, offset_u, permission, user_entry, object_handle);
420 }
421 /* all memory in this named entry is "owned" */
422 fully_owned = true;
423 }
424
425 #if CONFIG_SECLUDED_MEMORY
426 if (secluded_for_iokit && /* global boot-arg */
427 ((permission & MAP_MEM_GRAB_SECLUDED))) {
428 object->can_grab_secluded = TRUE;
429 assert(!object->eligible_for_secluded);
430 }
431 #endif /* CONFIG_SECLUDED_MEMORY */
432
433 /*
434 * The VM object is brand new and nobody else knows about it,
435 * so we don't need to lock it.
436 */
437
438 wimg_mode = object->wimg_bits;
439 vm_prot_to_wimg(access, &wimg_mode);
440 if (access != MAP_MEM_NOOP) {
441 object->wimg_bits = wimg_mode;
442 }
443
444 vm_object_unlock(object);
445
446 /* the object has no pages, so no WIMG bits to update here */
447
448 user_entry = mach_memory_entry_allocate(object_handle);
449 vm_named_entry_associate_vm_object(
450 user_entry,
451 object,
452 0,
453 map_size,
454 (protections & VM_PROT_ALL));
455 user_entry->internal = TRUE;
456 user_entry->is_sub_map = FALSE;
457 user_entry->offset = 0;
458 user_entry->data_offset = 0;
459 user_entry->protection = protections;
460 user_entry->access = access;
461 user_entry->size = map_size;
462 user_entry->is_fully_owned = fully_owned;
463
464 /* user_object pager and internal fields are not used */
465 /* when the object field is filled in. */
466
467 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
468 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
469 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
470 permission, user_entry, KERN_SUCCESS);
471 return KERN_SUCCESS;
472 }
473
474 static kern_return_t
mach_make_memory_entry_copy(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,__unused vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle)475 mach_make_memory_entry_copy(
476 vm_map_t target_map,
477 memory_object_size_ut *size_u,
478 vm_map_offset_ut offset_u,
479 vm_prot_t permission,
480 __unused vm_named_entry_kernel_flags_t vmne_kflags,
481 ipc_port_t *object_handle)
482 {
483 unsigned int access;
484 vm_prot_t protections;
485 bool mask_protections;
486 bool use_data_addr;
487 bool use_4K_compat;
488 vm_named_entry_t user_entry = NULL;
489 vm_map_copy_t copy;
490 /*
491 * Stash the offset in the page for use by vm_map_enter_mem_object()
492 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
493 */
494 vm_object_offset_t offset_in_page;
495 kern_return_t kr;
496 vm_map_size_t map_size;
497 vm_map_offset_t map_start, map_end, offset;
498
499 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
500 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
501 size_u, offset_u, permission, user_entry, object_handle);
502 }
503
504 /*
505 * Sanitize addr and size. Permimssions have been sanitized prior to
506 * dispatch
507 */
508 kr = mach_make_memory_entry_generic_sanitize(target_map,
509 *size_u,
510 offset_u,
511 &map_start,
512 &map_end,
513 &map_size,
514 &offset);
515 if (__improbable(kr != KERN_SUCCESS)) {
516 return mach_make_memory_entry_cleanup(kr, target_map,
517 size_u, offset_u, permission, user_entry, object_handle);
518 }
519
520 assert(map_size != 0);
521
522 vm_memory_entry_decode_perm(permission, &access, &protections,
523 &mask_protections, &use_data_addr, &use_4K_compat);
524
525 if (target_map == VM_MAP_NULL) {
526 return mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
527 size_u, offset_u, permission, user_entry, object_handle);
528 }
529
530 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
531 use_data_addr, use_4K_compat);
532
533 int copyin_flags = VM_MAP_COPYIN_ENTRY_LIST;
534 kr = vm_map_copyin_internal(target_map,
535 map_start,
536 map_size,
537 copyin_flags,
538 ©);
539 if (kr != KERN_SUCCESS) {
540 return mach_make_memory_entry_cleanup(kr, target_map,
541 size_u, offset_u, permission, user_entry, object_handle);
542 }
543 assert(copy != VM_MAP_COPY_NULL);
544
545 user_entry = mach_memory_entry_allocate(object_handle);
546 user_entry->backing.copy = copy;
547 user_entry->internal = FALSE;
548 user_entry->is_sub_map = FALSE;
549 user_entry->is_copy = TRUE;
550 user_entry->offset = 0;
551 user_entry->protection = protections;
552 user_entry->size = map_size;
553 user_entry->data_offset = offset_in_page;
554
555 /* is all memory in this named entry "owned"? */
556 vm_map_entry_t entry;
557 user_entry->is_fully_owned = TRUE;
558 for (entry = vm_map_copy_first_entry(copy);
559 entry != vm_map_copy_to_entry(copy);
560 entry = entry->vme_next) {
561 if (entry->is_sub_map ||
562 VME_OBJECT(entry) == VM_OBJECT_NULL ||
563 VM_OBJECT_OWNER(VME_OBJECT(entry)) == TASK_NULL) {
564 /* this memory is not "owned" */
565 user_entry->is_fully_owned = FALSE;
566 break;
567 }
568 }
569
570 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
571 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
572 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
573 permission, user_entry, KERN_SUCCESS);
574 return KERN_SUCCESS;
575 }
576
577 static kern_return_t
mach_make_memory_entry_share(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,__unused vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle,ipc_port_t parent_handle,vm_named_entry_t parent_entry)578 mach_make_memory_entry_share(
579 vm_map_t target_map,
580 memory_object_size_ut *size_u,
581 vm_map_offset_ut offset_u,
582 vm_prot_t permission,
583 __unused vm_named_entry_kernel_flags_t vmne_kflags,
584 ipc_port_t *object_handle,
585 ipc_port_t parent_handle,
586 vm_named_entry_t parent_entry)
587 {
588 vm_object_t object;
589 unsigned int access;
590 vm_prot_t protections;
591 bool mask_protections;
592 bool use_data_addr;
593 bool use_4K_compat;
594 vm_named_entry_t user_entry = NULL;
595 vm_map_copy_t copy;
596 vm_prot_t cur_prot, max_prot;
597 vm_map_kernel_flags_t vmk_flags;
598 vm_map_entry_t parent_copy_entry;
599 /*
600 * Stash the offset in the page for use by vm_map_enter_mem_object()
601 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
602 */
603 vm_object_offset_t offset_in_page;
604 unsigned int wimg_mode;
605 kern_return_t kr;
606 vm_map_size_t map_size;
607 vm_map_offset_t map_start, map_end, offset;
608
609 vmlp_api_start(MACH_MAKE_MEMORY_ENTRY_SHARE);
610
611 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
612 kr = mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
613 size_u, offset_u, permission, user_entry, object_handle);
614 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
615 return kr;
616 }
617
618 /*
619 * Sanitize addr and size. Permimssions have been sanitized prior to
620 * dispatch
621 */
622 kr = mach_make_memory_entry_generic_sanitize(target_map,
623 *size_u,
624 offset_u,
625 &map_start,
626 &map_end,
627 &map_size,
628 &offset);
629 if (__improbable(kr != KERN_SUCCESS)) {
630 kr = mach_make_memory_entry_cleanup(kr, target_map,
631 size_u, offset_u, permission, user_entry, object_handle);
632 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
633 return kr;
634 }
635
636 assert(map_size != 0);
637
638 vm_memory_entry_decode_perm(permission, &access, &protections,
639 &mask_protections, &use_data_addr, &use_4K_compat);
640
641 if (target_map == VM_MAP_NULL) {
642 kr = mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
643 size_u, offset_u, permission, user_entry, object_handle);
644 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
645 return kr;
646 }
647
648 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
649 vmk_flags.vmkf_range_id = kmem_needs_data_share_range() ?
650 KMEM_RANGE_ID_DATA_SHARED : KMEM_RANGE_ID_DATA;
651
652 parent_copy_entry = VM_MAP_ENTRY_NULL;
653 if (!(permission & MAP_MEM_VM_SHARE)) {
654 vm_map_t tmp_map, real_map;
655 vm_map_version_t version;
656 vm_object_t tmp_object;
657 vm_object_offset_t obj_off;
658 vm_prot_t prot;
659 boolean_t wired;
660 bool contended;
661
662 /* resolve any pending submap copy-on-write... */
663 if (protections & VM_PROT_WRITE) {
664 tmp_map = target_map;
665 vm_map_lock_read(tmp_map);
666 kr = vm_map_lookup_and_lock_object(&tmp_map,
667 map_start,
668 protections | (mask_protections ? VM_PROT_IS_MASK : 0),
669 OBJECT_LOCK_EXCLUSIVE,
670 &version,
671 &tmp_object,
672 &obj_off,
673 &prot,
674 &wired,
675 NULL, /* fault_info */
676 &real_map,
677 &contended);
678 if (kr != KERN_SUCCESS) {
679 vm_map_unlock_read(tmp_map);
680 } else {
681 vm_object_unlock(tmp_object);
682 vm_map_unlock_read(tmp_map);
683 if (real_map != tmp_map) {
684 vm_map_unlock_read(real_map);
685 }
686 }
687 }
688 /* ... and carry on */
689
690 /* stop extracting if VM object changes */
691 vmk_flags.vmkf_copy_single_object = TRUE;
692 if ((permission & MAP_MEM_NAMED_REUSE) &&
693 parent_entry != NULL &&
694 parent_entry->is_object) {
695 vm_map_copy_t parent_copy;
696 parent_copy = parent_entry->backing.copy;
697 /*
698 * Assert that the vm_map_copy is coming from the right
699 * zone and hasn't been forged
700 */
701 vm_map_copy_require(parent_copy);
702 assert(parent_copy->cpy_hdr.nentries == 1);
703 parent_copy_entry = vm_map_copy_first_entry(parent_copy);
704 assert(!parent_copy_entry->is_sub_map);
705 }
706 }
707
708 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
709 use_data_addr, use_4K_compat);
710
711 if (mask_protections) {
712 /*
713 * caller is asking for whichever proctections are
714 * available: no required protections.
715 */
716 cur_prot = VM_PROT_NONE;
717 max_prot = VM_PROT_NONE;
718 vmk_flags.vmkf_remap_legacy_mode = true;
719 } else {
720 /*
721 * Caller wants a memory entry with "protections".
722 * Make sure we extract only memory that matches that.
723 */
724 cur_prot = protections;
725 max_prot = protections;
726 }
727 if (target_map->pmap == kernel_pmap) {
728 /*
729 * Get "reserved" map entries to avoid deadlocking
730 * on the kernel map or a kernel submap if we
731 * run out of VM map entries and need to refill that
732 * zone.
733 */
734 vmk_flags.vmkf_copy_pageable = FALSE;
735 } else {
736 vmk_flags.vmkf_copy_pageable = TRUE;
737 }
738 vmk_flags.vmkf_copy_same_map = FALSE;
739 assert(map_size != 0);
740 kr = vm_map_copy_extract(target_map,
741 map_start,
742 map_size,
743 FALSE, /* copy */
744 ©,
745 &cur_prot,
746 &max_prot,
747 VM_INHERIT_SHARE,
748 vmk_flags);
749 if (kr != KERN_SUCCESS) {
750 kr = mach_make_memory_entry_cleanup(kr, target_map,
751 size_u, offset_u, permission, user_entry, object_handle);
752 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
753 return kr;
754 }
755 assert(copy != VM_MAP_COPY_NULL);
756
757 if (mask_protections) {
758 /*
759 * We just want as much of "original_protections"
760 * as we can get out of the actual "cur_prot".
761 */
762 protections &= cur_prot;
763 if (protections == VM_PROT_NONE) {
764 /* no access at all: fail */
765 vm_map_copy_discard(copy);
766 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
767 target_map, size_u, offset_u, permission, user_entry,
768 object_handle);
769 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
770 return kr;
771 }
772 } else {
773 /*
774 * We want exactly "original_protections"
775 * out of "cur_prot".
776 */
777 assert((cur_prot & protections) == protections);
778 assert((max_prot & protections) == protections);
779 /* XXX FBDP TODO: no longer needed? */
780 if ((cur_prot & protections) != protections) {
781 vm_map_copy_discard(copy);
782 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
783 target_map, size_u, offset_u, permission, user_entry,
784 object_handle);
785 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
786 return kr;
787 }
788 }
789
790 if (!(permission & MAP_MEM_VM_SHARE)) {
791 vm_map_entry_t copy_entry;
792
793 /* limit size to what's actually covered by "copy" */
794 assert(copy->cpy_hdr.nentries == 1);
795 copy_entry = vm_map_copy_first_entry(copy);
796 map_size = copy_entry->vme_end - copy_entry->vme_start;
797
798 if ((permission & MAP_MEM_NAMED_REUSE) &&
799 parent_copy_entry != VM_MAP_ENTRY_NULL &&
800 VME_OBJECT(copy_entry) == VME_OBJECT(parent_copy_entry) &&
801 VME_OFFSET(copy_entry) == VME_OFFSET(parent_copy_entry) &&
802 parent_entry->offset == 0 &&
803 parent_entry->size == map_size &&
804 (parent_entry->data_offset == offset_in_page)) {
805 /* we have a match: re-use "parent_entry" */
806
807 /* release our new "copy" */
808 vm_map_copy_discard(copy);
809 /* get extra send right on handle */
810 parent_handle = ipc_port_copy_send_any(parent_handle);
811
812 *size_u = vm_sanitize_wrap_size(parent_entry->size -
813 parent_entry->data_offset);
814 *object_handle = parent_handle;
815 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
816 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
817 permission, user_entry, KERN_SUCCESS);
818 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, KERN_SUCCESS);
819 return KERN_SUCCESS;
820 }
821
822 /* no match: we need to create a new entry */
823 object = VME_OBJECT(copy_entry);
824
825 if (object == VM_OBJECT_NULL) {
826 /* object can be null when protection == max_protection == VM_PROT_NONE
827 * return a failure because the code that follows and other APIs that consume
828 * a named-entry expect to have non-null object */
829 vm_map_copy_discard(copy);
830 kr = mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
831 target_map, size_u, offset_u, permission, user_entry,
832 object_handle);
833 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, kr);
834 return kr;
835 }
836
837 vm_object_lock(object);
838 wimg_mode = object->wimg_bits;
839 if (!(object->nophyscache)) {
840 vm_prot_to_wimg(access, &wimg_mode);
841 }
842 if (object->wimg_bits != wimg_mode) {
843 vm_object_change_wimg_mode(object, wimg_mode);
844 }
845 vm_object_unlock(object);
846 }
847
848 user_entry = mach_memory_entry_allocate(object_handle);
849 user_entry->backing.copy = copy;
850 user_entry->is_sub_map = FALSE;
851 user_entry->is_object = FALSE;
852 user_entry->internal = FALSE;
853 user_entry->protection = protections;
854 user_entry->size = map_size;
855 user_entry->data_offset = offset_in_page;
856
857 if (permission & MAP_MEM_VM_SHARE) {
858 vm_map_entry_t copy_entry;
859
860 user_entry->is_copy = TRUE;
861 user_entry->offset = 0;
862
863 /* is all memory in this named entry "owned"? */
864 user_entry->is_fully_owned = TRUE;
865 for (copy_entry = vm_map_copy_first_entry(copy);
866 copy_entry != vm_map_copy_to_entry(copy);
867 copy_entry = copy_entry->vme_next) {
868 if (copy_entry->is_sub_map) {
869 /* submaps can't be owned */
870 user_entry->is_fully_owned = FALSE;
871 break;
872 }
873 if (VM_OBJECT_OWNER(VME_OBJECT(copy_entry)) == TASK_NULL) {
874 object = VME_OBJECT(copy_entry);
875 if (object && !object->internal) {
876 /* external objects can be "owned",
877 * is_fully_owned remains TRUE as far as this entry is concerned */
878 continue;
879 }
880 /* this memory is not "owned" */
881 user_entry->is_fully_owned = FALSE;
882 break;
883 }
884 }
885 } else {
886 assert3p(object, !=, VM_OBJECT_NULL); /* Sanity, this was set above */
887 user_entry->is_object = TRUE;
888 assert3p(object, ==, vm_named_entry_to_vm_object(user_entry)); /* Sanity, this was set above */
889 user_entry->internal = object->internal;
890 user_entry->offset = VME_OFFSET(vm_map_copy_first_entry(copy));
891 user_entry->access = GET_MAP_MEM(permission);
892 /* is all memory in this named entry "owned"? */
893 user_entry->is_fully_owned = FALSE;
894 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
895 /* object is owned */
896 user_entry->is_fully_owned = TRUE;
897 } else if (!object->internal) {
898 /* external objects can become "owned" */
899 user_entry->is_fully_owned = TRUE;
900 }
901 }
902
903 *size_u = vm_sanitize_wrap_size(user_entry->size -
904 user_entry->data_offset);
905 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
906 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
907 permission, user_entry, KERN_SUCCESS);
908
909 vmlp_api_end(MACH_MAKE_MEMORY_ENTRY_SHARE, KERN_SUCCESS);
910 return KERN_SUCCESS;
911 }
912
913 static __attribute__((always_inline, warn_unused_result))
914 kern_return_t
mach_make_memory_entry_from_parent_entry_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_t parent_entry,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset,vm_map_offset_t * user_entry_offset)915 mach_make_memory_entry_from_parent_entry_sanitize(
916 vm_map_t target_map,
917 memory_object_size_ut size_u,
918 vm_map_offset_ut offset_u,
919 vm_prot_t permission,
920 vm_named_entry_t parent_entry,
921 vm_map_offset_t *map_start,
922 vm_map_offset_t *map_end,
923 vm_map_size_t *map_size,
924 vm_map_offset_t *offset,
925 vm_map_offset_t *user_entry_offset)
926 {
927 bool mask_protections;
928 unsigned int access;
929 vm_prot_t protections;
930 bool use_data_addr;
931 bool use_4K_compat;
932 vm_map_offset_t start_mask = vm_map_page_mask(target_map);
933 kern_return_t kr;
934
935 vm_memory_entry_decode_perm(permission, &access, &protections,
936 &mask_protections, &use_data_addr, &use_4K_compat);
937
938 if (use_data_addr || use_4K_compat) {
939 /*
940 * Validate offset doesn't overflow when added to parent entry's offset
941 */
942 if (vm_sanitize_add_overflow(offset_u, parent_entry->data_offset,
943 &offset_u)) {
944 return KERN_INVALID_ARGUMENT;
945 }
946 start_mask = PAGE_MASK;
947 }
948
949 /*
950 * Currently the map_start is truncated using page mask from target_map
951 * when use_data_addr || use_4K_compat is false, while map_end uses
952 * PAGE_MASK. In order to maintain that behavior, we
953 * request for unaligned values and perform the truncing/rounding
954 * explicitly.
955 */
956 kr = vm_sanitize_addr_size(offset_u, size_u,
957 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY, PAGE_MASK,
958 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
959 map_start, map_end, map_size);
960 if (__improbable(kr != KERN_SUCCESS)) {
961 return kr;
962 }
963
964 *map_start = vm_map_trunc_page_mask(*map_start, start_mask);
965 *map_end = vm_map_round_page_mask(*map_end, PAGE_MASK);
966 *map_size = *map_end - *map_start;
967
968 /*
969 * Additional checks to make sure explicitly computed aligned start and end
970 * still make sense.
971 */
972 if (__improbable(*map_end <= *map_start) || (*map_end > parent_entry->size)) {
973 return KERN_INVALID_ARGUMENT;
974 }
975
976 /*
977 * Validate offset
978 */
979 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
980 *map_start, *map_end, offset);
981 if (__improbable(kr != KERN_SUCCESS)) {
982 return kr;
983 }
984
985 if (__improbable(os_add_overflow(parent_entry->offset, *map_start,
986 user_entry_offset))) {
987 return KERN_INVALID_ARGUMENT;
988 }
989
990 return KERN_SUCCESS;
991 }
992
993 static kern_return_t
mach_make_memory_entry_from_parent_entry(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)994 mach_make_memory_entry_from_parent_entry(
995 vm_map_t target_map,
996 memory_object_size_ut *size_u,
997 vm_map_offset_ut offset_u,
998 vm_prot_t permission,
999 ipc_port_t *object_handle,
1000 vm_named_entry_t parent_entry)
1001 {
1002 vm_object_t object;
1003 unsigned int access;
1004 vm_prot_t protections;
1005 bool mask_protections;
1006 bool use_data_addr;
1007 bool use_4K_compat;
1008 vm_named_entry_t user_entry = NULL;
1009 kern_return_t kr;
1010 /*
1011 * Stash the offset in the page for use by vm_map_enter_mem_object()
1012 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
1013 */
1014 vm_object_offset_t offset_in_page;
1015 vm_map_offset_t map_start, map_end;
1016 vm_map_size_t map_size;
1017 vm_map_offset_t user_entry_offset, offset;
1018
1019 vm_memory_entry_decode_perm(permission, &access, &protections,
1020 &mask_protections, &use_data_addr, &use_4K_compat);
1021
1022 /*
1023 * Sanitize addr and size. Permimssions have been sanitized prior to
1024 * dispatch
1025 */
1026 kr = mach_make_memory_entry_from_parent_entry_sanitize(target_map,
1027 *size_u,
1028 offset_u,
1029 permission,
1030 parent_entry,
1031 &map_start,
1032 &map_end,
1033 &map_size,
1034 &offset,
1035 &user_entry_offset);
1036 if (__improbable(kr != KERN_SUCCESS)) {
1037 return mach_make_memory_entry_cleanup(kr, target_map,
1038 size_u, offset_u, permission, user_entry, object_handle);
1039 }
1040
1041 if (use_data_addr || use_4K_compat) {
1042 /*
1043 * submaps and pagers should only be accessible from within
1044 * the kernel, which shouldn't use the data address flag, so can fail here.
1045 */
1046 if (parent_entry->is_sub_map) {
1047 panic("Shouldn't be using data address with a parent entry that is a submap.");
1048 }
1049 }
1050
1051 if (mask_protections) {
1052 /*
1053 * The caller asked us to use the "protections" as
1054 * a mask, so restrict "protections" to what this
1055 * mapping actually allows.
1056 */
1057 protections &= parent_entry->protection;
1058 }
1059 if ((protections & parent_entry->protection) != protections) {
1060 return mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE, target_map,
1061 size_u, offset_u, permission, user_entry, object_handle);
1062 }
1063
1064 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
1065 use_data_addr, use_4K_compat);
1066
1067 user_entry = mach_memory_entry_allocate(object_handle);
1068 user_entry->size = map_size;
1069 user_entry->offset = user_entry_offset;
1070 user_entry->data_offset = offset_in_page;
1071 user_entry->is_sub_map = parent_entry->is_sub_map;
1072 user_entry->is_copy = parent_entry->is_copy;
1073 user_entry->protection = protections;
1074
1075 if (access != MAP_MEM_NOOP) {
1076 user_entry->access = access;
1077 }
1078
1079 if (parent_entry->is_sub_map) {
1080 vm_map_t map = parent_entry->backing.map;
1081 vm_map_reference(map);
1082 user_entry->backing.map = map;
1083 } else {
1084 object = vm_named_entry_to_vm_object(parent_entry);
1085 assert(object != VM_OBJECT_NULL);
1086 assert(object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC);
1087 vm_named_entry_associate_vm_object(
1088 user_entry,
1089 object,
1090 user_entry->offset,
1091 user_entry->size,
1092 (user_entry->protection & VM_PROT_ALL));
1093 assert(user_entry->is_object);
1094 /* we now point to this object, hold on */
1095 vm_object_lock(object);
1096 vm_object_reference_locked(object);
1097 #if VM_OBJECT_TRACKING_OP_TRUESHARE
1098 if (!object->true_share &&
1099 vm_object_tracking_btlog) {
1100 btlog_record(vm_object_tracking_btlog, object,
1101 VM_OBJECT_TRACKING_OP_TRUESHARE,
1102 btref_get(__builtin_frame_address(0), 0));
1103 }
1104 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
1105
1106 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
1107 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1108 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1109 }
1110 vm_object_unlock(object);
1111 }
1112 *size_u = vm_sanitize_wrap_size(user_entry->size -
1113 user_entry->data_offset);
1114 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
1115 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1116 permission, user_entry, KERN_SUCCESS);
1117 return KERN_SUCCESS;
1118 }
1119
1120 static inline kern_return_t
mach_make_memory_entry_sanitize_perm(vm_prot_ut permission_u,vm_prot_t * permission)1121 mach_make_memory_entry_sanitize_perm(
1122 vm_prot_ut permission_u,
1123 vm_prot_t *permission)
1124 {
1125 return vm_sanitize_memory_entry_perm(permission_u,
1126 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
1127 VM_SANITIZE_FLAGS_CHECK_USER_MEM_MAP_FLAGS,
1128 VM_PROT_IS_MASK, permission);
1129 }
1130
1131 kern_return_t
mach_make_memory_entry_internal(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle,ipc_port_t parent_handle)1132 mach_make_memory_entry_internal(
1133 vm_map_t target_map,
1134 memory_object_size_ut *size_u,
1135 memory_object_offset_ut offset_u,
1136 vm_prot_ut permission_u,
1137 vm_named_entry_kernel_flags_t vmne_kflags,
1138 ipc_port_t *object_handle,
1139 ipc_port_t parent_handle)
1140 {
1141 vm_named_entry_t user_entry __unused = NULL;
1142 vm_named_entry_t parent_entry;
1143 kern_return_t kr;
1144 vm_prot_t permission;
1145
1146 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x\n",
1147 target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u), VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1148 VM_SANITIZE_UNSAFE_UNWRAP(permission_u));
1149
1150 /*
1151 * Validate permissions as we need to dispatch the corresponding flavor
1152 */
1153 kr = mach_make_memory_entry_sanitize_perm(permission_u, &permission);
1154 if (__improbable(kr != KERN_SUCCESS)) {
1155 return mach_make_memory_entry_cleanup(kr, target_map,
1156 size_u, offset_u, permission, user_entry, object_handle);
1157 }
1158
1159 if (permission & MAP_MEM_LEDGER_TAGGED) {
1160 vmne_kflags.vmnekf_ledger_tag = VM_LEDGER_TAG_DEFAULT;
1161 }
1162
1163 parent_entry = mach_memory_entry_from_port(parent_handle);
1164 if (parent_entry && parent_entry->is_copy) {
1165 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
1166 size_u, offset_u, permission, user_entry, object_handle);
1167 }
1168
1169 if (permission & MAP_MEM_ONLY) {
1170 return mach_make_memory_entry_mem_only(target_map, size_u, offset_u,
1171 permission, object_handle, parent_entry);
1172 }
1173
1174 if (permission & MAP_MEM_NAMED_CREATE) {
1175 return mach_make_memory_entry_named_create(target_map, size_u, offset_u,
1176 permission, vmne_kflags, object_handle);
1177 }
1178
1179 if (permission & MAP_MEM_VM_COPY) {
1180 return mach_make_memory_entry_copy(target_map, size_u, offset_u,
1181 permission, vmne_kflags, object_handle);
1182 }
1183
1184 if ((permission & MAP_MEM_VM_SHARE)
1185 || parent_entry == NULL
1186 || (permission & MAP_MEM_NAMED_REUSE)) {
1187 return mach_make_memory_entry_share(target_map, size_u, offset_u,
1188 permission, vmne_kflags, object_handle, parent_handle,
1189 parent_entry);
1190 }
1191
1192 /*
1193 * This function will compute map start, end and size by including the
1194 * parent entry's offset. Therefore redo validation.
1195 */
1196 return mach_make_memory_entry_from_parent_entry(target_map, size_u,
1197 offset_u, permission, object_handle, parent_entry);
1198 }
1199
1200 kern_return_t
_mach_make_memory_entry(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1201 _mach_make_memory_entry(
1202 vm_map_t target_map,
1203 memory_object_size_ut *size_u,
1204 memory_object_offset_ut offset_u,
1205 vm_prot_ut permission_u,
1206 ipc_port_t *object_handle,
1207 ipc_port_t parent_entry)
1208 {
1209 return mach_make_memory_entry_64(target_map, size_u,
1210 offset_u, permission_u, object_handle, parent_entry);
1211 }
1212
1213 kern_return_t
mach_make_memory_entry(vm_map_t target_map,vm_size_ut * size_u,vm_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1214 mach_make_memory_entry(
1215 vm_map_t target_map,
1216 vm_size_ut *size_u,
1217 vm_offset_ut offset_u,
1218 vm_prot_ut permission_u,
1219 ipc_port_t *object_handle,
1220 ipc_port_t parent_entry)
1221 {
1222 kern_return_t kr;
1223
1224 kr = mach_make_memory_entry_64(target_map, size_u,
1225 offset_u, permission_u, object_handle, parent_entry);
1226 return kr;
1227 }
1228
1229 __private_extern__ vm_named_entry_t
mach_memory_entry_allocate(ipc_port_t * user_handle_p)1230 mach_memory_entry_allocate(ipc_port_t *user_handle_p)
1231 {
1232 vm_named_entry_t user_entry;
1233
1234 user_entry = kalloc_type(struct vm_named_entry,
1235 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1236 named_entry_lock_init(user_entry);
1237
1238 *user_handle_p = ipc_kobject_alloc_port(user_entry, IKOT_NAMED_ENTRY,
1239 IPC_KOBJECT_ALLOC_MAKE_SEND);
1240
1241 #if VM_NAMED_ENTRY_DEBUG
1242 /* backtrace at allocation time, for debugging only */
1243 user_entry->named_entry_bt = btref_get(__builtin_frame_address(0), 0);
1244 #endif /* VM_NAMED_ENTRY_DEBUG */
1245 return user_entry;
1246 }
1247
1248 static __attribute__((always_inline, warn_unused_result))
1249 kern_return_t
mach_memory_object_memory_entry_64_sanitize(vm_object_size_ut size_u,vm_prot_ut permission_u,vm_object_size_t * size,vm_prot_t * permission)1250 mach_memory_object_memory_entry_64_sanitize(
1251 vm_object_size_ut size_u,
1252 vm_prot_ut permission_u,
1253 vm_object_size_t *size,
1254 vm_prot_t *permission)
1255 {
1256 kern_return_t kr;
1257
1258 kr = vm_sanitize_object_size(size_u,
1259 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1260 VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS, size);
1261 if (__improbable(kr != KERN_SUCCESS)) {
1262 return kr;
1263 }
1264 kr = vm_sanitize_memory_entry_perm(permission_u,
1265 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1266 VM_SANITIZE_FLAGS_NONE, VM_PROT_NONE,
1267 permission);
1268 if (__improbable(kr != KERN_SUCCESS)) {
1269 return kr;
1270 }
1271
1272 return KERN_SUCCESS;
1273 }
1274
1275 /*
1276 * mach_memory_object_memory_entry_64
1277 *
1278 * Create a named entry backed by the provided pager.
1279 *
1280 */
1281 kern_return_t
mach_memory_object_memory_entry_64(host_t host,boolean_t internal,vm_object_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1282 mach_memory_object_memory_entry_64(
1283 host_t host,
1284 boolean_t internal,
1285 vm_object_size_ut size_u,
1286 vm_prot_ut permission_u,
1287 memory_object_t pager,
1288 ipc_port_t *entry_handle)
1289 {
1290 vm_named_entry_t user_entry;
1291 ipc_port_t user_handle;
1292 vm_object_t object;
1293 vm_object_size_t size;
1294 vm_prot_t permission;
1295 kern_return_t kr;
1296
1297 if (host == HOST_NULL) {
1298 return KERN_INVALID_HOST;
1299 }
1300
1301 /*
1302 * Validate size and permission
1303 */
1304 kr = mach_memory_object_memory_entry_64_sanitize(size_u,
1305 permission_u,
1306 &size,
1307 &permission);
1308 if (__improbable(kr != KERN_SUCCESS)) {
1309 return vm_sanitize_get_kr(kr);
1310 }
1311
1312 if (pager == MEMORY_OBJECT_NULL && internal) {
1313 object = vm_object_allocate(size, VM_MAP_SERIAL_NONE);
1314 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1315 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1316 }
1317 } else {
1318 object = memory_object_to_vm_object(pager);
1319 if (object != VM_OBJECT_NULL) {
1320 vm_object_reference(object);
1321 }
1322 }
1323 if (object == VM_OBJECT_NULL) {
1324 return KERN_INVALID_ARGUMENT;
1325 }
1326
1327 user_entry = mach_memory_entry_allocate(&user_handle);
1328 user_entry->size = size;
1329 user_entry->offset = 0;
1330 user_entry->protection = permission & VM_PROT_ALL;
1331 user_entry->access = GET_MAP_MEM(permission);
1332 user_entry->is_sub_map = FALSE;
1333
1334 vm_named_entry_associate_vm_object(user_entry, object, 0, size,
1335 (user_entry->protection & VM_PROT_ALL));
1336 user_entry->internal = object->internal;
1337 assert(object->internal == internal);
1338 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
1339 /* all memory in this entry is "owned" */
1340 user_entry->is_fully_owned = TRUE;
1341 } else if (object && !object->internal) {
1342 /* external objects can become "owned" */
1343 user_entry->is_fully_owned = TRUE;
1344 }
1345
1346 *entry_handle = user_handle;
1347 return KERN_SUCCESS;
1348 }
1349
1350 kern_return_t
mach_memory_object_memory_entry(host_t host,boolean_t internal,vm_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1351 mach_memory_object_memory_entry(
1352 host_t host,
1353 boolean_t internal,
1354 vm_size_ut size_u,
1355 vm_prot_ut permission_u,
1356 memory_object_t pager,
1357 ipc_port_t *entry_handle)
1358 {
1359 return mach_memory_object_memory_entry_64( host, internal,
1360 size_u, permission_u, pager, entry_handle);
1361 }
1362
1363 kern_return_t
mach_memory_entry_purgable_control(ipc_port_t entry_port,vm_purgable_t control,int * state)1364 mach_memory_entry_purgable_control(
1365 ipc_port_t entry_port,
1366 vm_purgable_t control,
1367 int *state)
1368 {
1369 if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1370 /* not allowed from user-space */
1371 return KERN_INVALID_ARGUMENT;
1372 }
1373
1374 return memory_entry_purgeable_control_internal(entry_port, control, state);
1375 }
1376
1377 kern_return_t
memory_entry_purgeable_control_internal(ipc_port_t entry_port,vm_purgable_t control,int * state)1378 memory_entry_purgeable_control_internal(
1379 ipc_port_t entry_port,
1380 vm_purgable_t control,
1381 int *state)
1382 {
1383 kern_return_t kr;
1384 vm_named_entry_t mem_entry;
1385 vm_object_t object;
1386
1387 mem_entry = mach_memory_entry_from_port(entry_port);
1388 if (mem_entry == NULL) {
1389 return KERN_INVALID_ARGUMENT;
1390 }
1391
1392 if (control != VM_PURGABLE_SET_STATE &&
1393 control != VM_PURGABLE_GET_STATE &&
1394 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1395 return KERN_INVALID_ARGUMENT;
1396 }
1397
1398 if ((control == VM_PURGABLE_SET_STATE ||
1399 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
1400 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
1401 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
1402 return KERN_INVALID_ARGUMENT;
1403 }
1404
1405 named_entry_lock(mem_entry);
1406
1407 if (mem_entry->is_sub_map ||
1408 mem_entry->is_copy) {
1409 named_entry_unlock(mem_entry);
1410 return KERN_INVALID_ARGUMENT;
1411 }
1412
1413 assert(mem_entry->is_object);
1414 object = vm_named_entry_to_vm_object(mem_entry);
1415 if (object == VM_OBJECT_NULL) {
1416 named_entry_unlock(mem_entry);
1417 return KERN_INVALID_ARGUMENT;
1418 }
1419
1420 vm_object_lock(object);
1421
1422 /* check that named entry covers entire object ? */
1423 if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
1424 vm_object_unlock(object);
1425 named_entry_unlock(mem_entry);
1426 return KERN_INVALID_ARGUMENT;
1427 }
1428
1429 named_entry_unlock(mem_entry);
1430
1431 kr = vm_object_purgable_control(object, control, state);
1432
1433 vm_object_unlock(object);
1434
1435 return kr;
1436 }
1437
1438 static kern_return_t
memory_entry_access_tracking_internal(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1439 memory_entry_access_tracking_internal(
1440 ipc_port_t entry_port,
1441 int *access_tracking,
1442 uint32_t *access_tracking_reads,
1443 uint32_t *access_tracking_writes)
1444 {
1445 vm_named_entry_t mem_entry;
1446 vm_object_t object;
1447 kern_return_t kr;
1448
1449 mem_entry = mach_memory_entry_from_port(entry_port);
1450 if (mem_entry == NULL) {
1451 return KERN_INVALID_ARGUMENT;
1452 }
1453
1454 named_entry_lock(mem_entry);
1455
1456 if (mem_entry->is_sub_map ||
1457 mem_entry->is_copy) {
1458 named_entry_unlock(mem_entry);
1459 return KERN_INVALID_ARGUMENT;
1460 }
1461
1462 assert(mem_entry->is_object);
1463 object = vm_named_entry_to_vm_object(mem_entry);
1464 if (object == VM_OBJECT_NULL) {
1465 named_entry_unlock(mem_entry);
1466 return KERN_INVALID_ARGUMENT;
1467 }
1468
1469 #if VM_OBJECT_ACCESS_TRACKING
1470 vm_object_access_tracking(object,
1471 access_tracking,
1472 access_tracking_reads,
1473 access_tracking_writes);
1474 kr = KERN_SUCCESS;
1475 #else /* VM_OBJECT_ACCESS_TRACKING */
1476 (void) access_tracking;
1477 (void) access_tracking_reads;
1478 (void) access_tracking_writes;
1479 kr = KERN_NOT_SUPPORTED;
1480 #endif /* VM_OBJECT_ACCESS_TRACKING */
1481
1482 named_entry_unlock(mem_entry);
1483
1484 return kr;
1485 }
1486
1487 kern_return_t
mach_memory_entry_access_tracking(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1488 mach_memory_entry_access_tracking(
1489 ipc_port_t entry_port,
1490 int *access_tracking,
1491 uint32_t *access_tracking_reads,
1492 uint32_t *access_tracking_writes)
1493 {
1494 return memory_entry_access_tracking_internal(entry_port,
1495 access_tracking,
1496 access_tracking_reads,
1497 access_tracking_writes);
1498 }
1499
1500 #if DEVELOPMENT || DEBUG
1501 /* For dtrace probe in mach_memory_entry_ownership */
1502 extern int proc_selfpid(void);
1503 extern char *proc_name_address(void *p);
1504 #endif /* DEVELOPMENT || DEBUG */
1505
1506 /* Kernel call only, MIG uses *_from_user() below */
1507 kern_return_t
mach_memory_entry_ownership(ipc_port_t entry_port,task_t owner,int ledger_tag,int ledger_flags)1508 mach_memory_entry_ownership(
1509 ipc_port_t entry_port,
1510 task_t owner,
1511 int ledger_tag,
1512 int ledger_flags)
1513 {
1514 task_t cur_task;
1515 kern_return_t kr;
1516 vm_named_entry_t mem_entry;
1517 vm_object_t object;
1518
1519 if (ledger_flags & ~VM_LEDGER_FLAGS_ALL) {
1520 /* reject unexpected flags */
1521 return KERN_INVALID_ARGUMENT;
1522 }
1523
1524 cur_task = current_task();
1525 if (cur_task == kernel_task) {
1526 /* kernel thread: no entitlement needed */
1527 } else if (ledger_flags & VM_LEDGER_FLAG_FROM_KERNEL) {
1528 /* call is from trusted kernel code: no entitlement needed */
1529 } else if ((owner != cur_task && owner != TASK_NULL) ||
1530 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT) ||
1531 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) ||
1532 ledger_tag == VM_LEDGER_TAG_NETWORK) {
1533 bool transfer_ok = false;
1534
1535 /*
1536 * An entitlement is required to:
1537 * + tranfer memory ownership to someone else,
1538 * + request that the memory not count against the footprint,
1539 * + tag as "network" (since that implies "no footprint")
1540 *
1541 * Exception: task with task_no_footprint_for_debug == 1 on internal build
1542 */
1543 if (!cur_task->task_can_transfer_memory_ownership &&
1544 IOCurrentTaskHasEntitlement("com.apple.private.memory.ownership_transfer")) {
1545 cur_task->task_can_transfer_memory_ownership = TRUE;
1546 }
1547 if (cur_task->task_can_transfer_memory_ownership) {
1548 /* we're allowed to transfer ownership to any task */
1549 transfer_ok = true;
1550 }
1551 #if DEVELOPMENT || DEBUG
1552 if (!transfer_ok &&
1553 ledger_tag == VM_LEDGER_TAG_DEFAULT &&
1554 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) &&
1555 cur_task->task_no_footprint_for_debug) {
1556 int to_panic = 0;
1557 static bool init_bootarg = false;
1558
1559 /*
1560 * Allow performance tools running on internal builds to hide memory usage from phys_footprint even
1561 * WITHOUT an entitlement. This can be enabled by per task sysctl vm.task_no_footprint_for_debug=1
1562 * with the ledger tag VM_LEDGER_TAG_DEFAULT and flag VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG.
1563 *
1564 * If the boot-arg "panic_on_no_footprint_for_debug" is set, the kernel will
1565 * panic here in order to detect any abuse of this feature, which is intended solely for
1566 * memory debugging purpose.
1567 */
1568 if (!init_bootarg) {
1569 PE_parse_boot_argn("panic_on_no_footprint_for_debug", &to_panic, sizeof(to_panic));
1570 init_bootarg = true;
1571 }
1572 if (to_panic) {
1573 panic("%s: panic_on_no_footprint_for_debug is triggered by pid %d procname %s", __func__, proc_selfpid(), get_bsdtask_info(cur_task)? proc_name_address(get_bsdtask_info(cur_task)) : "?");
1574 }
1575
1576 /*
1577 * Flushing out user space processes using this interface:
1578 * $ dtrace -n 'task_no_footprint_for_debug {printf("%d[%s]\n", pid, execname); stack(); ustack();}'
1579 */
1580 DTRACE_VM(task_no_footprint_for_debug);
1581 transfer_ok = true;
1582 }
1583 #endif /* DEVELOPMENT || DEBUG */
1584 if (!transfer_ok) {
1585 char *our_id, *their_id;
1586 our_id = IOTaskGetEntitlement(current_task(), "com.apple.developer.memory.transfer-send");
1587 their_id = IOTaskGetEntitlement(owner, "com.apple.developer.memory.transfer-accept");
1588 if (our_id && their_id &&
1589 !strcmp(our_id, their_id)) { /* These are guaranteed to be null-terminated */
1590 /* allow transfer between tasks that have matching entitlements */
1591 transfer_ok = true;
1592 }
1593 if (our_id) {
1594 kfree_data_addr(our_id);
1595 }
1596 if (their_id) {
1597 kfree_data_addr(their_id);
1598 }
1599 }
1600 if (!transfer_ok) {
1601 /* transfer denied */
1602 return KERN_NO_ACCESS;
1603 }
1604
1605 if (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) {
1606 /*
1607 * We've made it past the checks above, so we either
1608 * have the entitlement or the sysctl.
1609 * Convert to VM_LEDGER_FLAG_NO_FOOTPRINT.
1610 */
1611 ledger_flags &= ~VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG;
1612 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
1613 }
1614 }
1615
1616 if (ledger_tag == VM_LEDGER_TAG_UNCHANGED) {
1617 /* leave "ledger_tag" unchanged */
1618 } else if (ledger_tag < 0 ||
1619 ledger_tag > VM_LEDGER_TAG_MAX) {
1620 return KERN_INVALID_ARGUMENT;
1621 }
1622 if (owner == TASK_NULL) {
1623 /* leave "owner" unchanged */
1624 owner = VM_OBJECT_OWNER_UNCHANGED;
1625 }
1626
1627 mem_entry = mach_memory_entry_from_port(entry_port);
1628 if (mem_entry == NULL) {
1629 return KERN_INVALID_ARGUMENT;
1630 }
1631
1632 named_entry_lock(mem_entry);
1633
1634 if (mem_entry->is_sub_map ||
1635 !mem_entry->is_fully_owned) {
1636 named_entry_unlock(mem_entry);
1637 return KERN_INVALID_ARGUMENT;
1638 }
1639
1640 if (mem_entry->is_object) {
1641 object = vm_named_entry_to_vm_object(mem_entry);
1642 if (object == VM_OBJECT_NULL) {
1643 named_entry_unlock(mem_entry);
1644 return KERN_INVALID_ARGUMENT;
1645 }
1646 vm_object_lock(object);
1647 if (object->internal) {
1648 /* check that named entry covers entire object ? */
1649 if (mem_entry->offset != 0 ||
1650 object->vo_size != mem_entry->size) {
1651 vm_object_unlock(object);
1652 named_entry_unlock(mem_entry);
1653 return KERN_INVALID_ARGUMENT;
1654 }
1655 }
1656 named_entry_unlock(mem_entry);
1657 kr = vm_object_ownership_change(object,
1658 ledger_tag,
1659 owner,
1660 ledger_flags,
1661 FALSE); /* task_objq_locked */
1662 vm_object_unlock(object);
1663 } else if (mem_entry->is_copy) {
1664 vm_map_copy_t copy;
1665 vm_map_entry_t entry;
1666
1667 copy = mem_entry->backing.copy;
1668 named_entry_unlock(mem_entry);
1669 for (entry = vm_map_copy_first_entry(copy);
1670 entry != vm_map_copy_to_entry(copy);
1671 entry = entry->vme_next) {
1672 object = VME_OBJECT(entry);
1673 if (entry->is_sub_map ||
1674 object == VM_OBJECT_NULL) {
1675 kr = KERN_INVALID_ARGUMENT;
1676 break;
1677 }
1678 vm_object_lock(object);
1679 if (object->internal) {
1680 if (VME_OFFSET(entry) != 0 ||
1681 entry->vme_end - entry->vme_start != object->vo_size) {
1682 vm_object_unlock(object);
1683 kr = KERN_INVALID_ARGUMENT;
1684 break;
1685 }
1686 }
1687 kr = vm_object_ownership_change(object,
1688 ledger_tag,
1689 owner,
1690 ledger_flags,
1691 FALSE); /* task_objq_locked */
1692 vm_object_unlock(object);
1693 if (kr != KERN_SUCCESS) {
1694 kr = KERN_INVALID_ARGUMENT;
1695 break;
1696 }
1697 }
1698 } else {
1699 named_entry_unlock(mem_entry);
1700 return KERN_INVALID_ARGUMENT;
1701 }
1702
1703 return kr;
1704 }
1705
1706 /* MIG call from userspace */
1707 kern_return_t
mach_memory_entry_ownership_from_user(ipc_port_t entry_port,mach_port_t owner_port,int ledger_tag,int ledger_flags)1708 mach_memory_entry_ownership_from_user(
1709 ipc_port_t entry_port,
1710 mach_port_t owner_port,
1711 int ledger_tag,
1712 int ledger_flags)
1713 {
1714 task_t owner = TASK_NULL;
1715 kern_return_t kr;
1716
1717 if (ledger_flags & ~VM_LEDGER_FLAGS_USER) {
1718 return KERN_INVALID_ARGUMENT;
1719 }
1720
1721 if (IP_VALID(owner_port)) {
1722 if (ip_type(owner_port) == IKOT_TASK_ID_TOKEN) {
1723 task_id_token_t token = convert_port_to_task_id_token(owner_port);
1724 (void)task_identity_token_get_task_grp(token, &owner, TASK_GRP_MIG);
1725 task_id_token_release(token);
1726 /* token ref released */
1727 } else {
1728 owner = convert_port_to_task_mig(owner_port);
1729 }
1730 }
1731 /* hold task ref on owner (Nullable) */
1732
1733 if (owner && task_is_a_corpse(owner)) {
1734 /* identity token can represent a corpse, disallow it */
1735 task_deallocate_mig(owner);
1736 owner = TASK_NULL;
1737 }
1738
1739 /* mach_memory_entry_ownership() will handle TASK_NULL owner */
1740 kr = mach_memory_entry_ownership(entry_port, owner, /* Nullable */
1741 ledger_tag, ledger_flags);
1742
1743 if (owner) {
1744 task_deallocate_mig(owner);
1745 }
1746
1747 if (kr == KERN_SUCCESS) {
1748 /* MIG rule, consume port right on success */
1749 ipc_port_release_send(owner_port);
1750 }
1751 return kr;
1752 }
1753
1754 kern_return_t
mach_memory_entry_get_page_counts(ipc_port_t entry_port,uint64_t * resident_page_count,uint64_t * dirty_page_count,uint64_t * swapped_page_count)1755 mach_memory_entry_get_page_counts(
1756 ipc_port_t entry_port,
1757 uint64_t *resident_page_count,
1758 uint64_t *dirty_page_count,
1759 uint64_t *swapped_page_count)
1760 {
1761 kern_return_t kr;
1762 vm_named_entry_t mem_entry;
1763 vm_object_t object;
1764 vm_object_offset_t offset;
1765 vm_object_size_t size;
1766
1767 mem_entry = mach_memory_entry_from_port(entry_port);
1768 if (mem_entry == NULL) {
1769 return KERN_INVALID_ARGUMENT;
1770 }
1771
1772 named_entry_lock(mem_entry);
1773
1774 if (mem_entry->is_sub_map ||
1775 mem_entry->is_copy) {
1776 named_entry_unlock(mem_entry);
1777 return KERN_INVALID_ARGUMENT;
1778 }
1779
1780 assert(mem_entry->is_object);
1781 object = vm_named_entry_to_vm_object(mem_entry);
1782 if (object == VM_OBJECT_NULL) {
1783 named_entry_unlock(mem_entry);
1784 return KERN_INVALID_ARGUMENT;
1785 }
1786
1787 vm_object_lock(object);
1788
1789 offset = mem_entry->offset;
1790 size = mem_entry->size;
1791 size = vm_object_round_page(offset + size) - vm_object_trunc_page(offset);
1792 offset = vm_object_trunc_page(offset);
1793
1794 named_entry_unlock(mem_entry);
1795
1796 kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count, swapped_page_count);
1797
1798 vm_object_unlock(object);
1799
1800 return kr;
1801 }
1802
1803 kern_return_t
mach_memory_entry_phys_page_offset(ipc_port_t entry_port,vm_object_offset_t * offset_p)1804 mach_memory_entry_phys_page_offset(
1805 ipc_port_t entry_port,
1806 vm_object_offset_t *offset_p)
1807 {
1808 vm_named_entry_t mem_entry;
1809 vm_object_t object;
1810 vm_object_offset_t offset;
1811 vm_object_offset_t data_offset;
1812
1813 mem_entry = mach_memory_entry_from_port(entry_port);
1814 if (mem_entry == NULL) {
1815 return KERN_INVALID_ARGUMENT;
1816 }
1817
1818 named_entry_lock(mem_entry);
1819
1820 if (mem_entry->is_sub_map ||
1821 mem_entry->is_copy) {
1822 named_entry_unlock(mem_entry);
1823 return KERN_INVALID_ARGUMENT;
1824 }
1825
1826 assert(mem_entry->is_object);
1827 object = vm_named_entry_to_vm_object(mem_entry);
1828 if (object == VM_OBJECT_NULL) {
1829 named_entry_unlock(mem_entry);
1830 return KERN_INVALID_ARGUMENT;
1831 }
1832
1833 offset = mem_entry->offset;
1834 data_offset = mem_entry->data_offset;
1835
1836 named_entry_unlock(mem_entry);
1837
1838 *offset_p = offset - vm_object_trunc_page(offset) + data_offset;
1839 return KERN_SUCCESS;
1840 }
1841
1842 static inline kern_return_t
mach_memory_entry_map_size_sanitize_locked(vm_map_t map,memory_object_offset_ut * offset_u,memory_object_size_ut size_u,vm_named_entry_t mem_entry,memory_object_offset_t * offset,memory_object_offset_t * end,mach_vm_size_t * map_size)1843 mach_memory_entry_map_size_sanitize_locked(
1844 vm_map_t map,
1845 memory_object_offset_ut *offset_u,
1846 memory_object_size_ut size_u,
1847 vm_named_entry_t mem_entry,
1848 memory_object_offset_t *offset,
1849 memory_object_offset_t *end,
1850 mach_vm_size_t *map_size)
1851 {
1852 kern_return_t kr;
1853
1854 if (mem_entry->is_object ||
1855 (mem_entry->is_copy &&
1856 (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) ==
1857 VM_MAP_PAGE_MASK(map)))) {
1858 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->offset,
1859 offset_u))) {
1860 return KERN_INVALID_ARGUMENT;
1861 }
1862 }
1863
1864 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->data_offset,
1865 offset_u))) {
1866 return KERN_INVALID_ARGUMENT;
1867 }
1868
1869 kr = vm_sanitize_addr_size(*offset_u, size_u,
1870 VM_SANITIZE_CALLER_MACH_MEMORY_ENTRY_MAP_SIZE, map,
1871 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH, offset, end, map_size);
1872 if (__improbable(kr != KERN_SUCCESS)) {
1873 return vm_sanitize_get_kr(kr);
1874 }
1875
1876 return KERN_SUCCESS;
1877 }
1878
1879 kern_return_t
mach_memory_entry_map_size(ipc_port_t entry_port,vm_map_t map,memory_object_offset_ut offset_u,memory_object_size_ut size_u,mach_vm_size_t * map_size_out)1880 mach_memory_entry_map_size(
1881 ipc_port_t entry_port,
1882 vm_map_t map,
1883 memory_object_offset_ut offset_u,
1884 memory_object_size_ut size_u,
1885 mach_vm_size_t *map_size_out)
1886 {
1887 vm_named_entry_t mem_entry;
1888 vm_object_t object;
1889 vm_map_copy_t copy_map, target_copy_map;
1890 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
1891 kern_return_t kr;
1892 memory_object_offset_t offset;
1893 memory_object_offset_t end;
1894 mach_vm_size_t map_size;
1895
1896 *map_size_out = 0;
1897
1898 mem_entry = mach_memory_entry_from_port(entry_port);
1899 if (mem_entry == NULL) {
1900 return KERN_INVALID_ARGUMENT;
1901 }
1902
1903 named_entry_lock(mem_entry);
1904
1905 if (mem_entry->is_sub_map) {
1906 named_entry_unlock(mem_entry);
1907 return KERN_INVALID_ARGUMENT;
1908 }
1909
1910 /*
1911 * Sanitize offset and size before use
1912 */
1913 kr = mach_memory_entry_map_size_sanitize_locked(map,
1914 &offset_u,
1915 size_u,
1916 mem_entry,
1917 &offset,
1918 &end,
1919 &map_size);
1920 if (__improbable(kr != KERN_SUCCESS)) {
1921 named_entry_unlock(mem_entry);
1922 return kr;
1923 }
1924
1925 if (mem_entry->is_object) {
1926 object = vm_named_entry_to_vm_object(mem_entry);
1927 if (object == VM_OBJECT_NULL) {
1928 named_entry_unlock(mem_entry);
1929 return KERN_INVALID_ARGUMENT;
1930 }
1931
1932 named_entry_unlock(mem_entry);
1933 *map_size_out = map_size;
1934 return KERN_SUCCESS;
1935 }
1936
1937 if (!mem_entry->is_copy) {
1938 panic("unsupported type of mem_entry %p", mem_entry);
1939 }
1940
1941 assert(mem_entry->is_copy);
1942 if (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) == VM_MAP_PAGE_MASK(map)) {
1943 DEBUG4K_SHARE("map %p (%d) mem_entry %p offset 0x%llx + 0x%llx + 0x%llx size 0x%llx -> map_size 0x%llx\n", map, VM_MAP_PAGE_MASK(map), mem_entry, mem_entry->offset, mem_entry->data_offset, offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u), map_size);
1944 named_entry_unlock(mem_entry);
1945 *map_size_out = map_size;
1946 return KERN_SUCCESS;
1947 }
1948
1949 DEBUG4K_SHARE("mem_entry %p copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx\n", mem_entry, mem_entry->backing.copy, VM_MAP_COPY_PAGE_SHIFT(mem_entry->backing.copy), map, VM_MAP_PAGE_SHIFT(map), offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u));
1950 copy_map = mem_entry->backing.copy;
1951 target_copy_map = VM_MAP_COPY_NULL;
1952 DEBUG4K_ADJUST("adjusting...\n");
1953 kr = vm_map_copy_adjust_to_target(copy_map,
1954 offset_u,
1955 size_u,
1956 map,
1957 FALSE,
1958 &target_copy_map,
1959 &overmap_start,
1960 &overmap_end,
1961 &trimmed_start);
1962 if (kr == KERN_SUCCESS) {
1963 if (target_copy_map->size != copy_map->size) {
1964 DEBUG4K_ADJUST("copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx map_size 0x%llx -> 0x%llx\n", copy_map, VM_MAP_COPY_PAGE_SHIFT(copy_map), map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, (uint64_t)VM_SANITIZE_UNSAFE_UNWRAP(size_u), (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)copy_map->size, (uint64_t)target_copy_map->size);
1965 }
1966 *map_size_out = target_copy_map->size;
1967 if (target_copy_map != copy_map) {
1968 vm_map_copy_discard(target_copy_map);
1969 }
1970 target_copy_map = VM_MAP_COPY_NULL;
1971 }
1972 named_entry_unlock(mem_entry);
1973 return kr;
1974 }
1975
1976 /*
1977 * mach_memory_entry_port_release:
1978 *
1979 * Release a send right on a named entry port. This is the correct
1980 * way to destroy a named entry. When the last right on the port is
1981 * released, mach_memory_entry_no_senders() willl be called.
1982 */
1983 void
mach_memory_entry_port_release(ipc_port_t port)1984 mach_memory_entry_port_release(
1985 ipc_port_t port)
1986 {
1987 assert(ip_type(port) == IKOT_NAMED_ENTRY);
1988 ipc_port_release_send(port);
1989 }
1990
1991 vm_named_entry_t
mach_memory_entry_from_port(ipc_port_t port)1992 mach_memory_entry_from_port(ipc_port_t port)
1993 {
1994 if (IP_VALID(port)) {
1995 return ipc_kobject_get_stable(port, IKOT_NAMED_ENTRY);
1996 }
1997 return NULL;
1998 }
1999
2000 void
mach_memory_entry_describe(vm_named_entry_t named_entry,kobject_description_t desc)2001 mach_memory_entry_describe(
2002 vm_named_entry_t named_entry,
2003 kobject_description_t desc)
2004 {
2005 vm_object_t vm_object;
2006 if (named_entry->is_object) {
2007 vm_object = vm_named_entry_to_vm_object(named_entry);
2008 vm_object_size_t size = vm_object->internal ?
2009 vm_object->vo_un1.vou_size : 0;
2010 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2011 "VM-OBJECT(0x%x, %lluKiB)",
2012 VM_OBJECT_ID(vm_object),
2013 BtoKiB(size));
2014 } else if (named_entry->is_copy) {
2015 vm_map_copy_t copy_map = named_entry->backing.copy;
2016 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2017 "VM-MAP-COPY(0x%lx, %lluKiB)",
2018 VM_KERNEL_ADDRHASH(copy_map),
2019 BtoKiB(copy_map->size));
2020 } else if (named_entry->is_sub_map) {
2021 vm_map_t submap = named_entry->backing.map;
2022 snprintf(desc, KOBJECT_DESCRIPTION_LENGTH,
2023 "VM-SUB-MAP(0x%lx, %lluKiB)",
2024 VM_KERNEL_ADDRHASH(submap),
2025 BtoKiB(submap->size));
2026 }
2027 }
2028
2029 /*
2030 * mach_memory_entry_no_senders:
2031 *
2032 * Destroys the memory entry associated with a mach port.
2033 * Memory entries have the exact same lifetime as their owning port.
2034 *
2035 * Releasing a memory entry is done by calling
2036 * mach_memory_entry_port_release() on its owning port.
2037 */
2038 static void
mach_memory_entry_no_senders(ipc_port_t port,mach_port_mscount_t mscount)2039 mach_memory_entry_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
2040 {
2041 vm_named_entry_t named_entry;
2042
2043 named_entry = ipc_kobject_dealloc_port(port, mscount, IKOT_NAMED_ENTRY);
2044
2045 if (named_entry->is_sub_map) {
2046 vm_map_deallocate(named_entry->backing.map);
2047 } else if (named_entry->is_copy) {
2048 vm_map_copy_discard(named_entry->backing.copy);
2049 } else if (named_entry->is_object) {
2050 assert(named_entry->backing.copy->cpy_hdr.nentries == 1);
2051 vm_map_copy_discard(named_entry->backing.copy);
2052 } else {
2053 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
2054 }
2055
2056 #if VM_NAMED_ENTRY_DEBUG
2057 btref_put(named_entry->named_entry_bt);
2058 #endif /* VM_NAMED_ENTRY_DEBUG */
2059
2060 named_entry_lock_destroy(named_entry);
2061 kfree_type(struct vm_named_entry, named_entry);
2062 }
2063
2064 #if XNU_PLATFORM_MacOSX
2065 /* Allow manipulation of individual page state. This is actually part of */
2066 /* the UPL regimen but takes place on the memory entry rather than on a UPL */
2067
2068 kern_return_t
mach_memory_entry_page_op(ipc_port_t entry_port,vm_object_offset_ut offset_u,int ops,ppnum_t * phys_entry,int * flags)2069 mach_memory_entry_page_op(
2070 ipc_port_t entry_port,
2071 vm_object_offset_ut offset_u,
2072 int ops,
2073 ppnum_t *phys_entry,
2074 int *flags)
2075 {
2076 vm_named_entry_t mem_entry;
2077 vm_object_t object;
2078 kern_return_t kr;
2079 /*
2080 * Unwrap offset as no mathematical operations are
2081 * performed on it.
2082 */
2083 vm_object_offset_t offset = VM_SANITIZE_UNSAFE_UNWRAP(offset_u);
2084
2085 mem_entry = mach_memory_entry_from_port(entry_port);
2086 if (mem_entry == NULL) {
2087 return KERN_INVALID_ARGUMENT;
2088 }
2089
2090 named_entry_lock(mem_entry);
2091
2092 if (mem_entry->is_sub_map ||
2093 mem_entry->is_copy) {
2094 named_entry_unlock(mem_entry);
2095 return KERN_INVALID_ARGUMENT;
2096 }
2097
2098 assert(mem_entry->is_object);
2099 object = vm_named_entry_to_vm_object(mem_entry);
2100 if (object == VM_OBJECT_NULL) {
2101 named_entry_unlock(mem_entry);
2102 return KERN_INVALID_ARGUMENT;
2103 }
2104
2105 vm_object_reference(object);
2106 named_entry_unlock(mem_entry);
2107
2108 kr = vm_object_page_op(object, offset, ops, phys_entry, flags);
2109
2110 vm_object_deallocate(object);
2111
2112 return kr;
2113 }
2114
2115 /*
2116 * mach_memory_entry_range_op offers performance enhancement over
2117 * mach_memory_entry_page_op for page_op functions which do not require page
2118 * level state to be returned from the call. Page_op was created to provide
2119 * a low-cost alternative to page manipulation via UPLs when only a single
2120 * page was involved. The range_op call establishes the ability in the _op
2121 * family of functions to work on multiple pages where the lack of page level
2122 * state handling allows the caller to avoid the overhead of the upl structures.
2123 */
2124
2125 kern_return_t
mach_memory_entry_range_op(ipc_port_t entry_port,vm_object_offset_ut offset_beg_u,vm_object_offset_ut offset_end_u,int ops,int * range)2126 mach_memory_entry_range_op(
2127 ipc_port_t entry_port,
2128 vm_object_offset_ut offset_beg_u,
2129 vm_object_offset_ut offset_end_u,
2130 int ops,
2131 int *range)
2132 {
2133 vm_named_entry_t mem_entry;
2134 vm_object_t object;
2135 kern_return_t kr;
2136 vm_object_offset_t offset_range;
2137 /*
2138 * Unwrap offset beginning and end as no mathematical operations are
2139 * performed on these quantities.
2140 */
2141 vm_object_offset_t offset_beg = VM_SANITIZE_UNSAFE_UNWRAP(offset_beg_u);
2142 vm_object_offset_t offset_end = VM_SANITIZE_UNSAFE_UNWRAP(offset_end_u);
2143
2144 mem_entry = mach_memory_entry_from_port(entry_port);
2145 if (mem_entry == NULL) {
2146 return KERN_INVALID_ARGUMENT;
2147 }
2148
2149 named_entry_lock(mem_entry);
2150
2151 if (__improbable(os_sub_overflow(offset_end, offset_beg, &offset_range) ||
2152 (offset_range > (uint32_t) -1))) {
2153 /* range is too big and would overflow "*range" */
2154 named_entry_unlock(mem_entry);
2155 return KERN_INVALID_ARGUMENT;
2156 }
2157
2158 if (mem_entry->is_sub_map ||
2159 mem_entry->is_copy) {
2160 named_entry_unlock(mem_entry);
2161 return KERN_INVALID_ARGUMENT;
2162 }
2163
2164 assert(mem_entry->is_object);
2165 object = vm_named_entry_to_vm_object(mem_entry);
2166 if (object == VM_OBJECT_NULL) {
2167 named_entry_unlock(mem_entry);
2168 return KERN_INVALID_ARGUMENT;
2169 }
2170
2171 vm_object_reference(object);
2172 named_entry_unlock(mem_entry);
2173
2174 kr = vm_object_range_op(object,
2175 offset_beg,
2176 offset_end,
2177 ops,
2178 (uint32_t *) range);
2179
2180 vm_object_deallocate(object);
2181
2182 return kr;
2183 }
2184 #endif /* XNU_PLATFORM_MacOSX */
2185
2186 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)2187 memory_entry_check_for_adjustment(
2188 vm_map_t src_map,
2189 ipc_port_t port,
2190 vm_map_offset_t *overmap_start,
2191 vm_map_offset_t *overmap_end)
2192 {
2193 kern_return_t kr = KERN_SUCCESS;
2194 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
2195
2196 assert(port);
2197 assertf(ip_type(port) == IKOT_NAMED_ENTRY,
2198 "Port Type expected: %d...received:%d\n",
2199 IKOT_NAMED_ENTRY, ip_type(port));
2200
2201 vm_named_entry_t named_entry;
2202
2203 named_entry = mach_memory_entry_from_port(port);
2204 named_entry_lock(named_entry);
2205 copy_map = named_entry->backing.copy;
2206 target_copy_map = copy_map;
2207
2208 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
2209 vm_map_offset_t trimmed_start;
2210
2211 trimmed_start = 0;
2212 DEBUG4K_ADJUST("adjusting...\n");
2213 kr = vm_map_copy_adjust_to_target(
2214 copy_map,
2215 vm_sanitize_wrap_addr(0), /* offset */
2216 vm_sanitize_wrap_size(copy_map->size), /* size */
2217 src_map,
2218 FALSE, /* copy */
2219 &target_copy_map,
2220 overmap_start,
2221 overmap_end,
2222 &trimmed_start);
2223 assert(trimmed_start == 0);
2224 }
2225 named_entry_unlock(named_entry);
2226
2227 return kr;
2228 }
2229
2230 vm_named_entry_t
vm_convert_port_to_named_entry(ipc_port_t port)2231 vm_convert_port_to_named_entry(
2232 ipc_port_t port)
2233 {
2234 /* Invalid / wrong port type? */
2235 if (!IP_VALID(port) || ip_type(port) != IKOT_NAMED_ENTRY) {
2236 return NULL;
2237 }
2238
2239 vm_named_entry_t named_entry = mach_memory_entry_from_port(port);
2240
2241 /* This is a no-op, it's here for reader clarity */
2242 if (!named_entry) {
2243 return NULL;
2244 }
2245
2246 return named_entry;
2247 }
2248
2249 vm_object_t
vm_convert_port_to_copy_object(ipc_port_t port)2250 vm_convert_port_to_copy_object(
2251 ipc_port_t port)
2252 {
2253 vm_named_entry_t named_entry = vm_convert_port_to_named_entry(port);
2254 /* We expect the named entry to point to an object. */
2255 if (!named_entry || !named_entry->is_object) {
2256 return NULL;
2257 }
2258 /* Pull out the copy map object... */
2259 return vm_named_entry_to_vm_object(named_entry);
2260 }
2261