1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78 #include <kern/ledger.h>
79 #include <kern/bits.h>
80 #include <kern/startup.h>
81
82 #include <string.h>
83
84 #include <libkern/OSDebug.h>
85 #include <libkern/crypto/sha2.h>
86 #include <libkern/section_keywords.h>
87 #include <sys/kdebug.h>
88
89 #include <san/kasan.h>
90
91 /*
92 * Variables exported by this module.
93 */
94
95 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96 vm_map_t kernel_pageable_map;
97
98 /*
99 * Forward declarations for internal functions.
100 */
101 extern kern_return_t kmem_alloc_pages(
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
105
106 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)107 kmem_alloc_contig(
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
114 kma_flags_t flags,
115 vm_tag_t tag)
116 {
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 return KERN_INVALID_ARGUMENT;
130 }
131
132 map_size = vm_map_round_page(size,
133 VM_MAP_PAGE_MASK(map));
134 map_mask = (vm_map_offset_t)mask;
135
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
150 } else {
151 object = vm_object_allocate(map_size);
152 }
153
154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask,
155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
158 return kr;
159 }
160
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
171 vm_map_unlock(map);
172
173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174
175 if (kr != KERN_SUCCESS) {
176 vm_map_remove(map,
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
182 vm_object_deallocate(object);
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
188 for (i = 0; i < map_size; i += PAGE_SIZE) {
189 m = pages;
190 pages = NEXT_PAGE(m);
191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 m->vmp_busy = FALSE;
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
197 kr = vm_map_wire_kernel(map,
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
204
205 if (kr != KERN_SUCCESS) {
206 if (object == kernel_object) {
207 vm_object_lock(object);
208 vm_object_page_remove(object, offset, offset + map_size);
209 vm_object_unlock(object);
210 }
211 vm_map_remove(map,
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
217 vm_object_deallocate(object);
218 return kr;
219 }
220 vm_object_deallocate(object);
221
222 if (object == kernel_object) {
223 vm_map_simplify(map, map_addr);
224 vm_tag_update_size(tag, map_size);
225 }
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
228
229 return KERN_SUCCESS;
230 }
231
232 /*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_NOPAGEWAIT don't wait for pages if unavailable
241 * KMA_KOBJECT use kernel_object
242 * KMA_LOMEM support for 32 bit devices in a 64 bit world
243 * if set and a lomemory pool is available
244 * grab pages from it... this also implies
245 * KMA_NOPAGEWAIT
246 */
247
248 kern_return_t
kernel_memory_allocate(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)249 kernel_memory_allocate(
250 vm_map_t map,
251 vm_offset_t *addrp,
252 vm_size_t size,
253 vm_offset_t mask,
254 kma_flags_t flags,
255 vm_tag_t tag)
256 {
257 vm_object_t object;
258 vm_object_offset_t offset;
259 vm_object_offset_t pg_offset;
260 vm_map_entry_t entry = NULL;
261 vm_map_offset_t map_addr, fill_start;
262 vm_map_offset_t map_mask;
263 vm_map_size_t map_size, fill_size;
264 kern_return_t kr, pe_result;
265 vm_page_t mem;
266 vm_page_t guard_left = VM_PAGE_NULL;
267 vm_page_t guard_right = VM_PAGE_NULL;
268 vm_page_t wired_page_list = VM_PAGE_NULL;
269 int wired_page_count = 0;
270 vm_map_kernel_flags_t vmk_flags;
271
272 if (kernel_map == VM_MAP_NULL) {
273 panic("kernel_memory_allocate: VM is not ready");
274 }
275 if (map->pmap != kernel_pmap) {
276 panic("kernel_memory_allocate: %p is not a kernel map", map);
277 }
278
279 #if DEBUG || DEVELOPMENT
280 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
281 size, 0, 0, 0);
282 #endif
283
284 map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
285 map_mask = (vm_map_offset_t) mask;
286
287 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
288
289 /* Check for zero allocation size (either directly or via overflow) */
290 if (__improbable(map_size == 0)) {
291 kr = KERN_INVALID_ARGUMENT;
292 goto out;
293 }
294
295 /*
296 * limit the size of a single extent of wired memory
297 * to try and limit the damage to the system if
298 * too many pages get wired down
299 * limit raised to 2GB with 128GB max physical limit,
300 * but scaled by installed memory above this
301 */
302 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
303 map_size > MAX(1ULL << 31, sane_size / 64)) {
304 kr = KERN_RESOURCE_SHORTAGE;
305 goto out;
306 }
307
308 /*
309 * Guard pages:
310 *
311 * Guard pages are implemented as fictitious pages.
312 *
313 * By placing guard pages on either end of a stack,
314 * they can help detect cases where a thread walks
315 * off either end of its stack.
316 *
317 * They are allocated and set up here and attempts
318 * to access those pages are trapped in vm_fault_page().
319 *
320 * The map_size we were passed may include extra space for
321 * guard pages. fill_size represents the actual size to populate.
322 * Similarly, fill_start indicates where the actual pages
323 * will begin in the range.
324 */
325
326 fill_start = 0;
327 fill_size = map_size;
328
329 if (flags & KMA_GUARD_FIRST) {
330 vmk_flags.vmkf_guard_before = TRUE;
331 fill_start += PAGE_SIZE_64;
332 if (os_sub_overflow(fill_size, PAGE_SIZE_64, &fill_size)) {
333 /* no space for a guard page */
334 kr = KERN_INVALID_ARGUMENT;
335 goto out;
336 }
337 if (!map->never_faults) {
338 guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
339 if (guard_left == VM_PAGE_NULL) {
340 kr = KERN_RESOURCE_SHORTAGE;
341 goto out;
342 }
343 }
344 }
345 if (flags & KMA_GUARD_LAST) {
346 vmk_flags.vmkf_guard_after = TRUE;
347 if (os_sub_overflow(fill_size, PAGE_SIZE_64, &fill_size)) {
348 /* no space for a guard page */
349 kr = KERN_INVALID_ARGUMENT;
350 goto out;
351 }
352 if (!map->never_faults) {
353 guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
354 if (guard_right == VM_PAGE_NULL) {
355 kr = KERN_RESOURCE_SHORTAGE;
356 goto out;
357 }
358 }
359 }
360
361 wired_page_count = (int)atop(fill_size);
362 assert(ptoa_64(wired_page_count) == fill_size);
363
364 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
365 kr = vm_page_alloc_list(wired_page_count, flags,
366 &wired_page_list);
367 if (kr != KERN_SUCCESS) {
368 goto out;
369 }
370 }
371
372 /*
373 * Allocate a new object (if necessary). We must do this before
374 * locking the map, or risk deadlock with the default pager.
375 */
376 if ((flags & KMA_KOBJECT) != 0) {
377 object = kernel_object;
378 vm_object_reference(object);
379 } else if ((flags & KMA_COMPRESSOR) != 0) {
380 object = compressor_object;
381 vm_object_reference(object);
382 } else {
383 object = vm_object_allocate(map_size);
384 }
385
386 if (flags & KMA_ATOMIC) {
387 vmk_flags.vmkf_atomic_entry = TRUE;
388 }
389
390 if (flags & KMA_LAST_FREE) {
391 vmk_flags.vmkf_last_free = true;
392 }
393
394 kr = vm_map_find_space(map, &map_addr,
395 fill_size, map_mask, vmk_flags, tag, &entry);
396
397 if (KERN_SUCCESS != kr) {
398 vm_object_deallocate(object);
399 goto out;
400 }
401
402 if (object == kernel_object || object == compressor_object) {
403 offset = map_addr;
404 } else {
405 offset = 0;
406 }
407 VME_OBJECT_SET(entry, object);
408 VME_OFFSET_SET(entry, offset);
409
410 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
411 entry->wired_count++;
412 }
413
414 if (flags & KMA_PERMANENT) {
415 entry->permanent = TRUE;
416 }
417
418 if (object != kernel_object && object != compressor_object) {
419 vm_object_reference(object);
420 }
421
422 vm_object_lock(object);
423 vm_map_unlock(map);
424
425 if (guard_left) {
426 vm_page_insert(guard_left, object, offset);
427 guard_left->vmp_busy = FALSE;
428 guard_left = VM_PAGE_NULL;
429 } else {
430 assert(fill_start == 0 || map->never_faults);
431 }
432
433 #if KASAN
434 if (!(flags & KMA_VAONLY)) {
435 /* for VAONLY mappings we notify in populate only */
436 kasan_notify_address(map_addr, size);
437 }
438 #endif
439
440 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
441 pg_offset = fill_start + fill_size;
442 } else {
443 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
444 if (wired_page_list == NULL) {
445 panic("kernel_memory_allocate: wired_page_list == NULL");
446 }
447
448 mem = wired_page_list;
449 wired_page_list = mem->vmp_snext;
450 mem->vmp_snext = NULL;
451
452 assert(mem->vmp_wire_count == 0);
453 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
454
455 mem->vmp_q_state = VM_PAGE_IS_WIRED;
456 mem->vmp_wire_count++;
457 if (__improbable(mem->vmp_wire_count == 0)) {
458 panic("kernel_memory_allocate(%p): wire_count overflow",
459 mem);
460 }
461
462 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
463
464 mem->vmp_busy = FALSE;
465 mem->vmp_pmapped = TRUE;
466 mem->vmp_wpmapped = TRUE;
467
468 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
469 /* fault_phys_offset */ 0, mem,
470 VM_PROT_DEFAULT, VM_PROT_NONE,
471 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
472 PMAP_OPTIONS_NOWAIT, pe_result);
473
474 if (pe_result == KERN_RESOURCE_SHORTAGE) {
475 vm_object_unlock(object);
476
477 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
478 VM_PROT_DEFAULT, VM_PROT_NONE,
479 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
480 pe_result);
481
482 vm_object_lock(object);
483 }
484
485 assert(pe_result == KERN_SUCCESS);
486
487 if (flags & KMA_NOENCRYPT) {
488 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
489
490 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
491 }
492 }
493 if (kernel_object == object) {
494 vm_tag_update_size(tag, fill_size);
495 }
496 }
497
498 if (guard_right) {
499 vm_page_insert(guard_right, object, offset + pg_offset);
500 guard_right->vmp_busy = FALSE;
501 guard_right = VM_PAGE_NULL;
502 } else {
503 assert(fill_start + fill_size == map_size || map->never_faults);
504 }
505
506 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
507 vm_page_lockspin_queues();
508 vm_page_wire_count += wired_page_count;
509 vm_page_unlock_queues();
510 }
511
512 vm_object_unlock(object);
513
514 /*
515 * now that the pages are wired, we no longer have to fear coalesce
516 */
517 if (object == kernel_object || object == compressor_object) {
518 vm_map_simplify(map, map_addr);
519 } else {
520 vm_object_deallocate(object);
521 }
522
523 #if DEBUG || DEVELOPMENT
524 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
525 wired_page_count, 0, 0, 0);
526 #endif
527
528 *addrp = CAST_DOWN(vm_offset_t, map_addr);
529 return KERN_SUCCESS;
530
531 out:
532 if (guard_left) {
533 guard_left->vmp_snext = wired_page_list;
534 wired_page_list = guard_left;
535 }
536 if (guard_right) {
537 guard_right->vmp_snext = wired_page_list;
538 wired_page_list = guard_right;
539 }
540 if (wired_page_list) {
541 vm_page_free_list(wired_page_list, FALSE);
542 }
543 *addrp = 0;
544
545 #if DEBUG || DEVELOPMENT
546 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
547 wired_page_count, 0, 0, 0);
548 #endif
549 return kr;
550 }
551
552 void
kernel_memory_populate_with_pages(vm_map_t map,vm_offset_t addr,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)553 kernel_memory_populate_with_pages(
554 vm_map_t map,
555 vm_offset_t addr,
556 vm_size_t size,
557 vm_page_t page_list,
558 kma_flags_t flags,
559 vm_tag_t tag,
560 vm_prot_t prot)
561 {
562 vm_object_t object;
563 kern_return_t pe_result;
564 vm_page_t mem;
565 int page_count = atop_64(size);
566
567 if (flags & KMA_COMPRESSOR) {
568 panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
569 map, (uint64_t) addr, (uint64_t) size, flags);
570 }
571
572 if (flags & KMA_KOBJECT) {
573 object = kernel_object;
574
575 vm_object_lock(object);
576 } else {
577 /*
578 * If it's not the kernel object, we need to:
579 * lock map;
580 * lookup entry;
581 * lock object;
582 * take reference on object;
583 * unlock map;
584 */
585 panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
586 map, (uint64_t) addr, (uint64_t) size, flags);
587 }
588
589 for (vm_object_offset_t pg_offset = 0;
590 pg_offset < size;
591 pg_offset += PAGE_SIZE_64) {
592 if (page_list == NULL) {
593 panic("%s: page_list too short", __func__);
594 }
595
596 mem = page_list;
597 page_list = mem->vmp_snext;
598 mem->vmp_snext = NULL;
599
600 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
601 mem->vmp_q_state = VM_PAGE_IS_WIRED;
602 mem->vmp_wire_count++;
603 if (mem->vmp_wire_count == 0) {
604 panic("%s(%p): wire_count overflow", __func__, mem);
605 }
606
607 vm_page_insert_wired(mem, object, addr + pg_offset, tag);
608
609 mem->vmp_busy = FALSE;
610 mem->vmp_pmapped = TRUE;
611 mem->vmp_wpmapped = TRUE;
612
613 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
614 0, /* fault_phys_offset */
615 mem,
616 prot, VM_PROT_NONE,
617 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
618 PMAP_OPTIONS_NOWAIT, pe_result);
619
620 if (pe_result == KERN_RESOURCE_SHORTAGE) {
621 vm_object_unlock(object);
622
623 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
624 prot, VM_PROT_NONE,
625 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
626 pe_result);
627
628 vm_object_lock(object);
629 }
630
631 assert(pe_result == KERN_SUCCESS);
632
633 if (flags & KMA_NOENCRYPT) {
634 __nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
635 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
636 }
637 }
638 if (page_list) {
639 panic("%s: page_list too long", __func__);
640 }
641 vm_object_unlock(object);
642
643 vm_page_lockspin_queues();
644 vm_page_wire_count += page_count;
645 vm_page_unlock_queues();
646 vm_tag_update_size(tag, size);
647
648 #if KASAN
649 if (map == compressor_map) {
650 kasan_notify_address_nopoison(addr, size);
651 } else {
652 kasan_notify_address(addr, size);
653 }
654 #endif
655 }
656
657 kern_return_t
kernel_memory_populate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)658 kernel_memory_populate(
659 vm_map_t map,
660 vm_offset_t addr,
661 vm_size_t size,
662 kma_flags_t flags,
663 vm_tag_t tag)
664 {
665 vm_object_t object;
666 vm_object_offset_t offset, pg_offset;
667 kern_return_t kr = KERN_SUCCESS;
668 vm_page_t mem;
669 vm_page_t page_list = NULL;
670 int page_count = atop_64(size);
671
672 #if DEBUG || DEVELOPMENT
673 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
674 size, 0, 0, 0);
675 #endif
676
677 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
678
679 if (flags & KMA_COMPRESSOR) {
680 pg_offset = page_count * PAGE_SIZE_64;
681
682 do {
683 for (;;) {
684 mem = vm_page_grab();
685
686 if (mem != VM_PAGE_NULL) {
687 break;
688 }
689
690 VM_PAGE_WAIT();
691 }
692 if (KMA_ZERO & flags) {
693 vm_page_zero_fill(mem);
694 }
695 mem->vmp_snext = page_list;
696 page_list = mem;
697
698 pg_offset -= PAGE_SIZE_64;
699
700 kr = pmap_enter_options(kernel_pmap,
701 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
702 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
703 PMAP_OPTIONS_INTERNAL, NULL);
704 assert(kr == KERN_SUCCESS);
705 } while (pg_offset);
706
707 offset = addr;
708 object = compressor_object;
709
710 vm_object_lock(object);
711
712 for (pg_offset = 0;
713 pg_offset < size;
714 pg_offset += PAGE_SIZE_64) {
715 mem = page_list;
716 page_list = mem->vmp_snext;
717 mem->vmp_snext = NULL;
718
719 vm_page_insert(mem, object, offset + pg_offset);
720 assert(mem->vmp_busy);
721
722 mem->vmp_busy = FALSE;
723 mem->vmp_pmapped = TRUE;
724 mem->vmp_wpmapped = TRUE;
725 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
726 }
727 vm_object_unlock(object);
728
729 #if KASAN
730 if (map == compressor_map) {
731 kasan_notify_address_nopoison(addr, size);
732 } else {
733 kasan_notify_address(addr, size);
734 }
735 #endif
736
737 #if DEBUG || DEVELOPMENT
738 task_t task = current_task();
739 if (task != NULL) {
740 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
741 }
742 #endif
743 } else {
744 kr = vm_page_alloc_list(page_count, flags, &page_list);
745 if (kr == KERN_SUCCESS) {
746 kernel_memory_populate_with_pages(map, addr, size,
747 page_list, flags, tag, VM_PROT_READ | VM_PROT_WRITE);
748 }
749 }
750
751 #if DEBUG || DEVELOPMENT
752 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
753 page_count, 0, 0, 0);
754 #endif
755 return kr;
756 }
757
758
759 void
kernel_memory_depopulate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)760 kernel_memory_depopulate(
761 vm_map_t map,
762 vm_offset_t addr,
763 vm_size_t size,
764 kma_flags_t flags,
765 vm_tag_t tag)
766 {
767 vm_object_t object;
768 vm_object_offset_t offset, pg_offset;
769 vm_page_t mem;
770 vm_page_t local_freeq = NULL;
771 unsigned int pages_unwired;
772
773 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
774
775 if (flags & KMA_COMPRESSOR) {
776 offset = addr;
777 object = compressor_object;
778
779 vm_object_lock(object);
780 } else if (flags & KMA_KOBJECT) {
781 offset = addr;
782 object = kernel_object;
783 vm_object_lock(object);
784 } else {
785 offset = 0;
786 object = NULL;
787 /*
788 * If it's not the kernel object, we need to:
789 * lock map;
790 * lookup entry;
791 * lock object;
792 * unlock map;
793 */
794 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
795 "!KMA_KOBJECT",
796 map, (uint64_t) addr, (uint64_t) size, flags);
797 }
798 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
799
800 for (pg_offset = 0, pages_unwired = 0;
801 pg_offset < size;
802 pg_offset += PAGE_SIZE_64) {
803 mem = vm_page_lookup(object, offset + pg_offset);
804
805 assert(mem);
806
807 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
808 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
809 pages_unwired++;
810 }
811
812 mem->vmp_busy = TRUE;
813
814 assert(mem->vmp_tabled);
815 vm_page_remove(mem, TRUE);
816 assert(mem->vmp_busy);
817
818 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
819 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
820 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
821
822 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
823 mem->vmp_snext = local_freeq;
824 local_freeq = mem;
825 }
826 vm_object_unlock(object);
827
828
829 if (local_freeq) {
830 vm_page_free_list(local_freeq, TRUE);
831 if (pages_unwired != 0) {
832 vm_page_lockspin_queues();
833 vm_page_wire_count -= pages_unwired;
834 vm_page_unlock_queues();
835 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
836 }
837 }
838 }
839
840 /*
841 * kmem_realloc:
842 *
843 * Reallocate wired-down memory in the kernel's address map
844 * or a submap. Newly allocated pages are not zeroed.
845 * This can only be used on regions allocated with kmem_alloc.
846 *
847 * If successful, the pages in the old region are mapped twice.
848 * The old region is unchanged. Use kmem_free to get rid of it.
849 */
850 kern_return_t
kmem_realloc(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_offset_t * newaddrp,vm_size_t newsize,vm_tag_t tag)851 kmem_realloc(
852 vm_map_t map,
853 vm_offset_t oldaddr,
854 vm_size_t oldsize,
855 vm_offset_t *newaddrp,
856 vm_size_t newsize,
857 vm_tag_t tag)
858 {
859 vm_object_t object;
860 vm_object_offset_t offset;
861 vm_map_offset_t oldmapmin;
862 vm_map_offset_t oldmapmax;
863 vm_map_offset_t newmapaddr;
864 vm_map_size_t oldmapsize;
865 vm_map_size_t newmapsize;
866 vm_map_entry_t oldentry;
867 vm_map_entry_t newentry;
868 vm_page_t mem;
869 kern_return_t kr;
870 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
871
872 oldmapmin = vm_map_trunc_page(oldaddr,
873 VM_MAP_PAGE_MASK(map));
874 oldmapmax = vm_map_round_page(oldaddr + oldsize,
875 VM_MAP_PAGE_MASK(map));
876 oldmapsize = oldmapmax - oldmapmin;
877 newmapsize = vm_map_round_page(newsize,
878 VM_MAP_PAGE_MASK(map));
879 if (newmapsize < newsize) {
880 /* overflow */
881 *newaddrp = 0;
882 return KERN_INVALID_ARGUMENT;
883 }
884
885 /*
886 * Find the VM object backing the old region.
887 */
888
889 vm_map_lock(map);
890
891 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
892 panic("kmem_realloc");
893 }
894 if (oldentry->vme_atomic) {
895 vmk_flags.vmkf_atomic_entry = TRUE;
896 }
897 object = VME_OBJECT(oldentry);
898
899 /*
900 * Increase the size of the object and
901 * fill in the new region.
902 */
903
904 vm_object_reference(object);
905 /* by grabbing the object lock before unlocking the map */
906 /* we guarantee that we will panic if more than one */
907 /* attempt is made to realloc a kmem_alloc'd area */
908 vm_object_lock(object);
909 vm_map_unlock(map);
910 if (object->vo_size != oldmapsize) {
911 panic("kmem_realloc");
912 }
913 object->vo_size = newmapsize;
914 vm_object_unlock(object);
915
916 /* allocate the new pages while expanded portion of the */
917 /* object is still not mapped */
918 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
919 vm_object_round_page(newmapsize - oldmapsize));
920
921 /*
922 * Find space for the new region.
923 */
924
925 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
926 (vm_map_offset_t)0, vmk_flags, tag, &newentry);
927 if (kr != KERN_SUCCESS) {
928 vm_object_lock(object);
929 for (offset = oldmapsize;
930 offset < newmapsize; offset += PAGE_SIZE) {
931 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
932 VM_PAGE_FREE(mem);
933 }
934 }
935 object->vo_size = oldmapsize;
936 vm_object_unlock(object);
937 vm_object_deallocate(object);
938 return kr;
939 }
940 VME_OBJECT_SET(newentry, object);
941 VME_OFFSET_SET(newentry, 0);
942 assert(newentry->wired_count == 0);
943
944
945 /* add an extra reference in case we have someone doing an */
946 /* unexpected deallocate */
947 vm_object_reference(object);
948 vm_map_unlock(map);
949
950 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
951 VM_PROT_DEFAULT, tag, FALSE);
952 if (KERN_SUCCESS != kr) {
953 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
954 vm_object_lock(object);
955 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
956 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
957 VM_PAGE_FREE(mem);
958 }
959 }
960 object->vo_size = oldmapsize;
961 vm_object_unlock(object);
962 vm_object_deallocate(object);
963 return kr;
964 }
965 vm_object_deallocate(object);
966
967 if (kernel_object == object) {
968 vm_tag_update_size(tag, newmapsize);
969 }
970
971 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
972 return KERN_SUCCESS;
973 }
974
975 /*
976 * kmem_alloc:
977 *
978 * Allocate wired-down memory in the kernel's address map
979 * or a submap. The memory is not zero-filled.
980 */
981
982 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)983 kmem_alloc_external(
984 vm_map_t map,
985 vm_offset_t *addrp,
986 vm_size_t size)
987 {
988 return kmem_alloc(map, addrp, size, vm_tag_bt());
989 }
990
991
992 /*
993 * kmem_alloc_kobject:
994 *
995 * Allocate wired-down memory in the kernel's address map
996 * or a submap. The memory is not zero-filled.
997 *
998 * The memory is allocated in the kernel_object.
999 * It may not be copied with vm_map_copy, and
1000 * it may not be reallocated with kmem_realloc.
1001 */
1002
1003 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1004 kmem_alloc_kobject_external(
1005 vm_map_t map,
1006 vm_offset_t *addrp,
1007 vm_size_t size)
1008 {
1009 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1010 }
1011
1012 /*
1013 * kmem_alloc_pageable:
1014 *
1015 * Allocate pageable memory in the kernel's address map.
1016 */
1017
1018 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1019 kmem_alloc_pageable_external(
1020 vm_map_t map,
1021 vm_offset_t *addrp,
1022 vm_size_t size)
1023 {
1024 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1025 }
1026
1027 /*
1028 * kmem_free:
1029 *
1030 * Release a region of kernel virtual memory allocated
1031 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1032 * and return the physical pages associated with that region.
1033 */
1034
1035 void
kmem_free(vm_map_t map,vm_offset_t addr,vm_size_t size)1036 kmem_free(
1037 vm_map_t map,
1038 vm_offset_t addr,
1039 vm_size_t size)
1040 {
1041 kern_return_t kr;
1042
1043 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1044
1045 if (size == 0) {
1046 #if MACH_ASSERT
1047 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1048 #endif
1049 return;
1050 }
1051
1052 kr = vm_map_remove(map,
1053 vm_map_trunc_page(addr,
1054 VM_MAP_PAGE_MASK(map)),
1055 vm_map_round_page(addr + size,
1056 VM_MAP_PAGE_MASK(map)),
1057 VM_MAP_REMOVE_KUNWIRE);
1058 if (kr != KERN_SUCCESS) {
1059 panic("kmem_free");
1060 }
1061 }
1062
1063 /*
1064 * Allocate new pages in an object.
1065 */
1066
1067 kern_return_t
kmem_alloc_pages(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size)1068 kmem_alloc_pages(
1069 vm_object_t object,
1070 vm_object_offset_t offset,
1071 vm_object_size_t size)
1072 {
1073 vm_object_size_t alloc_size;
1074
1075 alloc_size = vm_object_round_page(size);
1076 vm_object_lock(object);
1077 while (alloc_size) {
1078 vm_page_t mem;
1079
1080
1081 /*
1082 * Allocate a page
1083 */
1084 while (VM_PAGE_NULL ==
1085 (mem = vm_page_alloc(object, offset))) {
1086 vm_object_unlock(object);
1087 VM_PAGE_WAIT();
1088 vm_object_lock(object);
1089 }
1090 mem->vmp_busy = FALSE;
1091
1092 alloc_size -= PAGE_SIZE;
1093 offset += PAGE_SIZE;
1094 }
1095 vm_object_unlock(object);
1096 return KERN_SUCCESS;
1097 }
1098
1099 /*
1100 * kmem_suballoc:
1101 *
1102 * Allocates a map to manage a subrange
1103 * of the kernel virtual address space.
1104 *
1105 * Arguments are as follows:
1106 *
1107 * parent Map to take range from
1108 * addr Address of start of range (IN/OUT)
1109 * size Size of range to find
1110 * pageable Can region be paged
1111 * anywhere Can region be located anywhere in map
1112 * new_map Pointer to new submap
1113 */
1114 kern_return_t
kmem_suballoc(vm_map_t parent,vm_offset_t * addr,vm_size_t size,vm_map_create_options_t vmc_options,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t * new_map)1115 kmem_suballoc(
1116 vm_map_t parent,
1117 vm_offset_t *addr,
1118 vm_size_t size,
1119 vm_map_create_options_t vmc_options,
1120 int flags,
1121 vm_map_kernel_flags_t vmk_flags,
1122 vm_tag_t tag,
1123 vm_map_t *new_map)
1124 {
1125 vm_map_t map;
1126 vm_map_offset_t map_addr;
1127 vm_map_size_t map_size;
1128 kern_return_t kr;
1129
1130 map_size = vm_map_round_page(size, VM_MAP_PAGE_MASK(parent));
1131 if (map_size < size) {
1132 /* overflow */
1133 *addr = 0;
1134 return KERN_INVALID_ARGUMENT;
1135 }
1136
1137 /*
1138 * Need reference on submap object because it is internal
1139 * to the vm_system. vm_object_enter will never be called
1140 * on it (usual source of reference for vm_map_enter).
1141 */
1142 vm_object_reference(vm_submap_object);
1143
1144 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1145 ? vm_map_min(parent)
1146 : vm_map_trunc_page(*addr,
1147 VM_MAP_PAGE_MASK(parent)));
1148
1149 kr = vm_map_enter(parent, &map_addr, map_size,
1150 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1151 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1152 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1153 if (kr != KERN_SUCCESS) {
1154 vm_object_deallocate(vm_submap_object);
1155 return kr;
1156 }
1157
1158 pmap_reference(vm_map_pmap(parent));
1159 map = vm_map_create_options(vm_map_pmap(parent), map_addr,
1160 map_addr + map_size, vmc_options);
1161 /* inherit the parent map's page size */
1162 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1163
1164 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1165 if (kr != KERN_SUCCESS) {
1166 /*
1167 * See comment preceding vm_map_submap().
1168 */
1169 vm_map_remove(parent, map_addr, map_addr + map_size,
1170 VM_MAP_REMOVE_NO_FLAGS);
1171 vm_map_deallocate(map); /* also removes ref to pmap */
1172 vm_object_deallocate(vm_submap_object);
1173 return kr;
1174 }
1175 *addr = CAST_DOWN(vm_offset_t, map_addr);
1176 *new_map = map;
1177 return KERN_SUCCESS;
1178 }
1179 /*
1180 * The default percentage of memory that can be mlocked is scaled based on the total
1181 * amount of memory in the system. These percentages are caclulated
1182 * offline and stored in this table. We index this table by
1183 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1184 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1185 *
1186 * Note that these values were picked for mac.
1187 * If we ever have very large memory config arm devices, we may want to revisit
1188 * since the kernel overhead is smaller there due to the larger page size.
1189 */
1190
1191 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1192 #define VM_USER_WIREABLE_MIN_CONFIG 32
1193 #if CONFIG_JETSAM
1194 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1195 * pressure.
1196 */
1197 static vm_map_size_t wire_limit_percents[] =
1198 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1199 #else
1200 static vm_map_size_t wire_limit_percents[] =
1201 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1202 #endif /* CONFIG_JETSAM */
1203
1204 /*
1205 * Sets the default global user wire limit which limits the amount of
1206 * memory that can be locked via mlock() based on the above algorithm..
1207 * This can be overridden via a sysctl.
1208 */
1209 static void
kmem_set_user_wire_limits(void)1210 kmem_set_user_wire_limits(void)
1211 {
1212 uint64_t available_mem_log;
1213 uint64_t max_wire_percent;
1214 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1215 sizeof(vm_map_size_t);
1216 vm_map_size_t limit;
1217 uint64_t config_memsize = max_mem;
1218 #if defined(XNU_TARGET_OS_OSX)
1219 config_memsize = max_mem_actual;
1220 #endif /* defined(XNU_TARGET_OS_OSX) */
1221
1222 available_mem_log = bit_floor(config_memsize);
1223
1224 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1225 available_mem_log = 0;
1226 } else {
1227 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1228 }
1229 if (available_mem_log >= wire_limit_percents_length) {
1230 available_mem_log = wire_limit_percents_length - 1;
1231 }
1232 max_wire_percent = wire_limit_percents[available_mem_log];
1233
1234 limit = config_memsize * max_wire_percent / 100;
1235 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1236 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1237 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1238 }
1239
1240 vm_global_user_wire_limit = limit;
1241 /* the default per task limit is the same as the global limit */
1242 vm_per_task_user_wire_limit = limit;
1243 vm_add_wire_count_over_global_limit = 0;
1244 vm_add_wire_count_over_user_limit = 0;
1245 }
1246
1247
1248 /*
1249 * kmem_init:
1250 *
1251 * Initialize the kernel's virtual memory map, taking
1252 * into account all memory allocated up to this time.
1253 */
1254 __startup_func
1255 void
kmem_init(vm_offset_t start,vm_offset_t end)1256 kmem_init(
1257 vm_offset_t start,
1258 vm_offset_t end)
1259 {
1260 vm_map_offset_t map_start;
1261 vm_map_offset_t map_end;
1262 vm_map_kernel_flags_t vmk_flags;
1263
1264 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1265 vmk_flags.vmkf_permanent = TRUE;
1266 vmk_flags.vmkf_no_pmap_check = TRUE;
1267
1268 map_start = vm_map_trunc_page(start,
1269 VM_MAP_PAGE_MASK(kernel_map));
1270 map_end = vm_map_round_page(end,
1271 VM_MAP_PAGE_MASK(kernel_map));
1272
1273 #if defined(__arm__) || defined(__arm64__)
1274 kernel_map = vm_map_create_options(pmap_kernel(),
1275 VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1276 VM_MAX_KERNEL_ADDRESS,
1277 VM_MAP_CREATE_DEFAULT);
1278 /*
1279 * Reserve virtual memory allocated up to this time.
1280 */
1281 {
1282 unsigned int region_select = 0;
1283 vm_map_offset_t region_start;
1284 vm_map_size_t region_size;
1285 vm_map_offset_t map_addr;
1286 kern_return_t kr;
1287
1288 while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
1289 map_addr = region_start;
1290 kr = vm_map_enter(kernel_map, &map_addr,
1291 vm_map_round_page(region_size,
1292 VM_MAP_PAGE_MASK(kernel_map)),
1293 (vm_map_offset_t) 0,
1294 VM_FLAGS_FIXED,
1295 vmk_flags,
1296 VM_KERN_MEMORY_NONE,
1297 VM_OBJECT_NULL,
1298 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1299 VM_INHERIT_DEFAULT);
1300
1301 if (kr != KERN_SUCCESS) {
1302 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1303 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1304 (uint64_t) region_size, kr);
1305 }
1306
1307 region_select++;
1308 }
1309 }
1310 #else
1311 kernel_map = vm_map_create_options(pmap_kernel(),
1312 VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
1313 VM_MAP_CREATE_DEFAULT);
1314 /*
1315 * Reserve virtual memory allocated up to this time.
1316 */
1317 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1318 vm_map_offset_t map_addr;
1319 kern_return_t kr;
1320
1321 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1322 vmk_flags.vmkf_no_pmap_check = TRUE;
1323
1324 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1325 kr = vm_map_enter(kernel_map,
1326 &map_addr,
1327 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1328 (vm_map_offset_t) 0,
1329 VM_FLAGS_FIXED,
1330 vmk_flags,
1331 VM_KERN_MEMORY_NONE,
1332 VM_OBJECT_NULL,
1333 (vm_object_offset_t) 0, FALSE,
1334 VM_PROT_NONE, VM_PROT_NONE,
1335 VM_INHERIT_DEFAULT);
1336
1337 if (kr != KERN_SUCCESS) {
1338 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1339 (uint64_t) start, (uint64_t) end,
1340 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1341 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1342 kr);
1343 }
1344 }
1345 #endif
1346
1347 kmem_set_user_wire_limits();
1348 }
1349
1350 /*
1351 * Routine: copyinmap
1352 * Purpose:
1353 * Like copyin, except that fromaddr is an address
1354 * in the specified VM map. This implementation
1355 * is incomplete; it handles the current user map
1356 * and the kernel map/submaps.
1357 */
1358 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)1359 copyinmap(
1360 vm_map_t map,
1361 vm_map_offset_t fromaddr,
1362 void *todata,
1363 vm_size_t length)
1364 {
1365 kern_return_t kr = KERN_SUCCESS;
1366 vm_map_t oldmap;
1367
1368 if (vm_map_pmap(map) == pmap_kernel()) {
1369 /* assume a correct copy */
1370 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1371 } else if (current_map() == map) {
1372 if (copyin(fromaddr, todata, length) != 0) {
1373 kr = KERN_INVALID_ADDRESS;
1374 }
1375 } else {
1376 vm_map_reference(map);
1377 oldmap = vm_map_switch(map);
1378 if (copyin(fromaddr, todata, length) != 0) {
1379 kr = KERN_INVALID_ADDRESS;
1380 }
1381 vm_map_switch(oldmap);
1382 vm_map_deallocate(map);
1383 }
1384 return kr;
1385 }
1386
1387 /*
1388 * Routine: copyoutmap
1389 * Purpose:
1390 * Like copyout, except that toaddr is an address
1391 * in the specified VM map.
1392 */
1393 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)1394 copyoutmap(
1395 vm_map_t map,
1396 void *fromdata,
1397 vm_map_address_t toaddr,
1398 vm_size_t length)
1399 {
1400 kern_return_t kr = KERN_SUCCESS;
1401 vm_map_t oldmap;
1402
1403 if (vm_map_pmap(map) == pmap_kernel()) {
1404 /* assume a correct copy */
1405 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1406 } else if (current_map() == map) {
1407 if (copyout(fromdata, toaddr, length) != 0) {
1408 kr = KERN_INVALID_ADDRESS;
1409 }
1410 } else {
1411 vm_map_reference(map);
1412 oldmap = vm_map_switch(map);
1413 if (copyout(fromdata, toaddr, length) != 0) {
1414 kr = KERN_INVALID_ADDRESS;
1415 }
1416 vm_map_switch(oldmap);
1417 vm_map_deallocate(map);
1418 }
1419 return kr;
1420 }
1421
1422 /*
1423 * Routine: copyoutmap_atomic{32, 64}
1424 * Purpose:
1425 * Like copyoutmap, except that the operation is atomic.
1426 * Takes in value rather than *fromdata pointer.
1427 */
1428 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)1429 copyoutmap_atomic32(
1430 vm_map_t map,
1431 uint32_t value,
1432 vm_map_address_t toaddr)
1433 {
1434 kern_return_t kr = KERN_SUCCESS;
1435 vm_map_t oldmap;
1436
1437 if (vm_map_pmap(map) == pmap_kernel()) {
1438 /* assume a correct toaddr */
1439 *(uint32_t *)toaddr = value;
1440 } else if (current_map() == map) {
1441 if (copyout_atomic32(value, toaddr) != 0) {
1442 kr = KERN_INVALID_ADDRESS;
1443 }
1444 } else {
1445 vm_map_reference(map);
1446 oldmap = vm_map_switch(map);
1447 if (copyout_atomic32(value, toaddr) != 0) {
1448 kr = KERN_INVALID_ADDRESS;
1449 }
1450 vm_map_switch(oldmap);
1451 vm_map_deallocate(map);
1452 }
1453 return kr;
1454 }
1455
1456 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)1457 copyoutmap_atomic64(
1458 vm_map_t map,
1459 uint64_t value,
1460 vm_map_address_t toaddr)
1461 {
1462 kern_return_t kr = KERN_SUCCESS;
1463 vm_map_t oldmap;
1464
1465 if (vm_map_pmap(map) == pmap_kernel()) {
1466 /* assume a correct toaddr */
1467 *(uint64_t *)toaddr = value;
1468 } else if (current_map() == map) {
1469 if (copyout_atomic64(value, toaddr) != 0) {
1470 kr = KERN_INVALID_ADDRESS;
1471 }
1472 } else {
1473 vm_map_reference(map);
1474 oldmap = vm_map_switch(map);
1475 if (copyout_atomic64(value, toaddr) != 0) {
1476 kr = KERN_INVALID_ADDRESS;
1477 }
1478 vm_map_switch(oldmap);
1479 vm_map_deallocate(map);
1480 }
1481 return kr;
1482 }
1483
1484 /*
1485 *
1486 * The following two functions are to be used when exposing kernel
1487 * addresses to userspace via any of the various debug or info
1488 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1489 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1490 * are exported to KEXTs.
1491 *
1492 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1493 */
1494
1495 static void
vm_kernel_addrhash_internal(vm_offset_t addr,vm_offset_t * hash_addr,uint64_t salt)1496 vm_kernel_addrhash_internal(
1497 vm_offset_t addr,
1498 vm_offset_t *hash_addr,
1499 uint64_t salt)
1500 {
1501 assert(salt != 0);
1502
1503 if (addr == 0) {
1504 *hash_addr = 0;
1505 return;
1506 }
1507
1508 if (VM_KERNEL_IS_SLID(addr)) {
1509 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1510 return;
1511 }
1512
1513 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1514 SHA256_CTX sha_ctx;
1515
1516 SHA256_Init(&sha_ctx);
1517 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1518 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1519 SHA256_Final(sha_digest, &sha_ctx);
1520
1521 *hash_addr = sha_digest[0];
1522 }
1523
1524 void
vm_kernel_addrhash_external(vm_offset_t addr,vm_offset_t * hash_addr)1525 vm_kernel_addrhash_external(
1526 vm_offset_t addr,
1527 vm_offset_t *hash_addr)
1528 {
1529 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1530 }
1531
1532 vm_offset_t
vm_kernel_addrhash(vm_offset_t addr)1533 vm_kernel_addrhash(vm_offset_t addr)
1534 {
1535 vm_offset_t hash_addr;
1536 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1537 return hash_addr;
1538 }
1539
1540 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)1541 vm_kernel_addrhide(
1542 vm_offset_t addr,
1543 vm_offset_t *hide_addr)
1544 {
1545 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
1546 }
1547
1548 /*
1549 * vm_kernel_addrperm_external:
1550 * vm_kernel_unslide_or_perm_external:
1551 *
1552 * Use these macros when exposing an address to userspace that could come from
1553 * either kernel text/data *or* the heap.
1554 */
1555 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)1556 vm_kernel_addrperm_external(
1557 vm_offset_t addr,
1558 vm_offset_t *perm_addr)
1559 {
1560 if (VM_KERNEL_IS_SLID(addr)) {
1561 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1562 } else if (VM_KERNEL_ADDRESS(addr)) {
1563 *perm_addr = addr + vm_kernel_addrperm_ext;
1564 } else {
1565 *perm_addr = addr;
1566 }
1567 }
1568
1569 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)1570 vm_kernel_unslide_or_perm_external(
1571 vm_offset_t addr,
1572 vm_offset_t *up_addr)
1573 {
1574 vm_kernel_addrperm_external(addr, up_addr);
1575 }
1576
1577 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)1578 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1579 {
1580 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1581 panic("pointer %p can't be packed: low %d bits aren't 0",
1582 (void *)ptr, params.vmpp_shift);
1583 } else if (ptr <= params.vmpp_base) {
1584 panic("pointer %p can't be packed: below base %p",
1585 (void *)ptr, (void *)params.vmpp_base);
1586 } else {
1587 panic("pointer %p can't be packed: maximum encodable pointer is %p",
1588 (void *)ptr, (void *)vm_packing_max_packable(params));
1589 }
1590 }
1591
1592 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)1593 vm_packing_verify_range(
1594 const char *subsystem,
1595 vm_offset_t min_address,
1596 vm_offset_t max_address,
1597 vm_packing_params_t params)
1598 {
1599 if (min_address > max_address) {
1600 panic("%s: %s range invalid min:%p > max:%p",
1601 __func__, subsystem, (void *)min_address, (void *)max_address);
1602 }
1603
1604 if (!params.vmpp_base_relative) {
1605 return;
1606 }
1607
1608 if (min_address <= params.vmpp_base) {
1609 panic("%s: %s range invalid min:%p <= base:%p",
1610 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1611 }
1612
1613 if (max_address > vm_packing_max_packable(params)) {
1614 panic("%s: %s range invalid max:%p >= max packable:%p",
1615 __func__, subsystem, (void *)max_address,
1616 (void *)vm_packing_max_packable(params));
1617 }
1618 }
1619