1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78 #include <kern/ledger.h>
79 #include <kern/bits.h>
80 #include <kern/startup.h>
81
82 #include <string.h>
83
84 #include <libkern/OSDebug.h>
85 #include <libkern/crypto/sha2.h>
86 #include <libkern/section_keywords.h>
87 #include <sys/kdebug.h>
88
89 #include <san/kasan.h>
90
91 /*
92 * Variables exported by this module.
93 */
94
95 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96 vm_map_t kernel_pageable_map;
97
98 /*
99 * Forward declarations for internal functions.
100 */
101 extern kern_return_t kmem_alloc_pages(
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
105
106 kern_return_t
kmem_alloc_contig(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,ppnum_t max_pnum,ppnum_t pnum_mask,kma_flags_t flags,vm_tag_t tag)107 kmem_alloc_contig(
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
114 kma_flags_t flags,
115 vm_tag_t tag)
116 {
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 return KERN_INVALID_ARGUMENT;
130 }
131
132 map_size = vm_map_round_page(size,
133 VM_MAP_PAGE_MASK(map));
134 map_mask = (vm_map_offset_t)mask;
135
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
150 } else {
151 object = vm_object_allocate(map_size);
152 }
153
154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
158 return kr;
159 }
160
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
171 vm_map_unlock(map);
172
173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174
175 if (kr != KERN_SUCCESS) {
176 vm_map_remove(map,
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
182 vm_object_deallocate(object);
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
188 for (i = 0; i < map_size; i += PAGE_SIZE) {
189 m = pages;
190 pages = NEXT_PAGE(m);
191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 m->vmp_busy = FALSE;
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
197 kr = vm_map_wire_kernel(map,
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
204
205 if (kr != KERN_SUCCESS) {
206 if (object == kernel_object) {
207 vm_object_lock(object);
208 vm_object_page_remove(object, offset, offset + map_size);
209 vm_object_unlock(object);
210 }
211 vm_map_remove(map,
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
217 vm_object_deallocate(object);
218 return kr;
219 }
220 vm_object_deallocate(object);
221
222 if (object == kernel_object) {
223 vm_map_simplify(map, map_addr);
224 vm_tag_update_size(tag, map_size);
225 }
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
228
229 return KERN_SUCCESS;
230 }
231
232 /*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
246 * KMA_NOPAGEWAIT
247 */
248
249 kern_return_t
kernel_memory_allocate(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag)250 kernel_memory_allocate(
251 vm_map_t map,
252 vm_offset_t *addrp,
253 vm_size_t size,
254 vm_offset_t mask,
255 kma_flags_t flags,
256 vm_tag_t tag)
257 {
258 return kernel_memory_allocate_prot(map, addrp, size, mask, flags, tag,
259 VM_PROT_DEFAULT, VM_PROT_ALL);
260 }
261
262 kern_return_t
kernel_memory_allocate_prot(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_offset_t mask,kma_flags_t flags,vm_tag_t tag,vm_prot_t protection,vm_prot_t max_protection)263 kernel_memory_allocate_prot(
264 vm_map_t map,
265 vm_offset_t *addrp,
266 vm_size_t size,
267 vm_offset_t mask,
268 kma_flags_t flags,
269 vm_tag_t tag,
270 vm_prot_t protection,
271 vm_prot_t max_protection)
272 {
273 vm_object_t object;
274 vm_object_offset_t offset;
275 vm_object_offset_t pg_offset;
276 vm_map_entry_t entry = NULL;
277 vm_map_offset_t map_addr, fill_start;
278 vm_map_offset_t map_mask;
279 vm_map_size_t map_size, fill_size;
280 kern_return_t kr, pe_result;
281 vm_page_t mem;
282 vm_page_t guard_page_list = NULL;
283 vm_page_t wired_page_list = NULL;
284 int guard_page_count = 0;
285 int wired_page_count = 0;
286 int vm_alloc_flags;
287 vm_map_kernel_flags_t vmk_flags;
288 vm_prot_t kma_prot;
289
290 if (startup_phase < STARTUP_SUB_KMEM) {
291 panic("kernel_memory_allocate: VM is not ready");
292 }
293
294 map_size = vm_map_round_page(size,
295 VM_MAP_PAGE_MASK(map));
296 map_mask = (vm_map_offset_t) mask;
297
298 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
299 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
300
301 /* Check for zero allocation size (either directly or via overflow) */
302 if (map_size == 0) {
303 *addrp = 0;
304 return KERN_INVALID_ARGUMENT;
305 }
306
307 /*
308 * limit the size of a single extent of wired memory
309 * to try and limit the damage to the system if
310 * too many pages get wired down
311 * limit raised to 2GB with 128GB max physical limit,
312 * but scaled by installed memory above this
313 */
314 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
315 map_size > MAX(1ULL << 31, sane_size / 64)) {
316 return KERN_RESOURCE_SHORTAGE;
317 }
318
319 /*
320 * Guard pages:
321 *
322 * Guard pages are implemented as ficticious pages. By placing guard pages
323 * on either end of a stack, they can help detect cases where a thread walks
324 * off either end of its stack. They are allocated and set up here and attempts
325 * to access those pages are trapped in vm_fault_page().
326 *
327 * The map_size we were passed may include extra space for
328 * guard pages. If those were requested, then back it out of fill_size
329 * since vm_map_find_space() takes just the actual size not including
330 * guard pages. Similarly, fill_start indicates where the actual pages
331 * will begin in the range.
332 */
333
334 fill_start = 0;
335 fill_size = map_size;
336
337 if (flags & KMA_GUARD_FIRST) {
338 vmk_flags.vmkf_guard_before = TRUE;
339 fill_start += PAGE_SIZE_64;
340 fill_size -= PAGE_SIZE_64;
341 if (map_size < fill_start + fill_size) {
342 /* no space for a guard page */
343 *addrp = 0;
344 return KERN_INVALID_ARGUMENT;
345 }
346 guard_page_count++;
347 }
348 if (flags & KMA_GUARD_LAST) {
349 vmk_flags.vmkf_guard_after = TRUE;
350 fill_size -= PAGE_SIZE_64;
351 if (map_size <= fill_start + fill_size) {
352 /* no space for a guard page */
353 *addrp = 0;
354 return KERN_INVALID_ARGUMENT;
355 }
356 guard_page_count++;
357 }
358 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
359 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
360
361 #if DEBUG || DEVELOPMENT
362 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
363 size, 0, 0, 0);
364 #endif
365
366 for (int i = 0; i < guard_page_count; i++) {
367 mem = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
368 if (mem == VM_PAGE_NULL) {
369 kr = KERN_RESOURCE_SHORTAGE;
370 goto out;
371 }
372 mem->vmp_snext = guard_page_list;
373 guard_page_list = mem;
374 }
375
376 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
377 kr = vm_page_alloc_list(wired_page_count, flags,
378 &wired_page_list);
379 if (kr != KERN_SUCCESS) {
380 goto out;
381 }
382 }
383
384 /*
385 * Allocate a new object (if necessary). We must do this before
386 * locking the map, or risk deadlock with the default pager.
387 */
388 if ((flags & KMA_KOBJECT) != 0) {
389 object = kernel_object;
390 vm_object_reference(object);
391 } else if ((flags & KMA_COMPRESSOR) != 0) {
392 object = compressor_object;
393 vm_object_reference(object);
394 } else {
395 object = vm_object_allocate(map_size);
396 }
397
398 if (flags & KMA_ATOMIC) {
399 vmk_flags.vmkf_atomic_entry = TRUE;
400 }
401
402 if (flags & KMA_LAST_FREE) {
403 vm_alloc_flags |= VM_MAP_FIND_LAST_FREE;
404 }
405
406 kr = vm_map_find_space(map, &map_addr,
407 fill_size, map_mask,
408 vm_alloc_flags, vmk_flags, tag, &entry);
409
410 if (KERN_SUCCESS != kr) {
411 vm_object_deallocate(object);
412 goto out;
413 }
414
415 entry->protection = protection;
416 entry->max_protection = max_protection;
417
418 if (object == kernel_object || object == compressor_object) {
419 offset = map_addr;
420 } else {
421 offset = 0;
422 }
423 VME_OBJECT_SET(entry, object);
424 VME_OFFSET_SET(entry, offset);
425
426 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
427 entry->wired_count++;
428 }
429
430 if (flags & KMA_PERMANENT) {
431 entry->permanent = TRUE;
432 }
433
434 if (object != kernel_object && object != compressor_object) {
435 vm_object_reference(object);
436 }
437
438 vm_object_lock(object);
439 vm_map_unlock(map);
440
441 pg_offset = 0;
442
443 if (fill_start) {
444 if (guard_page_list == NULL) {
445 panic("kernel_memory_allocate: guard_page_list == NULL");
446 }
447
448 mem = guard_page_list;
449 guard_page_list = mem->vmp_snext;
450 mem->vmp_snext = NULL;
451
452 vm_page_insert(mem, object, offset + pg_offset);
453
454 mem->vmp_busy = FALSE;
455 pg_offset += PAGE_SIZE_64;
456 }
457
458 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
459
460 #if KASAN
461 if (!(flags & KMA_VAONLY)) {
462 /* for VAONLY mappings we notify in populate only */
463 kasan_notify_address(map_addr, size);
464 }
465 #endif
466
467 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
468 pg_offset = fill_start + fill_size;
469 } else {
470 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
471 if (wired_page_list == NULL) {
472 panic("kernel_memory_allocate: wired_page_list == NULL");
473 }
474
475 mem = wired_page_list;
476 wired_page_list = mem->vmp_snext;
477 mem->vmp_snext = NULL;
478
479 assert(mem->vmp_wire_count == 0);
480 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
481
482 mem->vmp_q_state = VM_PAGE_IS_WIRED;
483 mem->vmp_wire_count++;
484 if (__improbable(mem->vmp_wire_count == 0)) {
485 panic("kernel_memory_allocate(%p): wire_count overflow",
486 mem);
487 }
488
489 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
490
491 mem->vmp_busy = FALSE;
492 mem->vmp_pmapped = TRUE;
493 mem->vmp_wpmapped = TRUE;
494
495 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
496 0, /* fault_phys_offset */
497 mem,
498 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
499 PMAP_OPTIONS_NOWAIT, pe_result);
500
501 if (pe_result == KERN_RESOURCE_SHORTAGE) {
502 vm_object_unlock(object);
503
504 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
505 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
506 pe_result);
507
508 vm_object_lock(object);
509 }
510
511 assert(pe_result == KERN_SUCCESS);
512
513 if (flags & KMA_NOENCRYPT) {
514 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
515
516 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
517 }
518 }
519 if (kernel_object == object) {
520 vm_tag_update_size(tag, fill_size);
521 }
522 }
523 if ((fill_start + fill_size) < map_size) {
524 if (guard_page_list == NULL) {
525 panic("kernel_memory_allocate: guard_page_list == NULL");
526 }
527
528 mem = guard_page_list;
529 guard_page_list = mem->vmp_snext;
530 mem->vmp_snext = NULL;
531
532 vm_page_insert(mem, object, offset + pg_offset);
533
534 mem->vmp_busy = FALSE;
535 }
536 if (guard_page_list || wired_page_list) {
537 panic("kernel_memory_allocate: non empty list");
538 }
539
540 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
541 vm_page_lockspin_queues();
542 vm_page_wire_count += wired_page_count;
543 vm_page_unlock_queues();
544 }
545
546 vm_object_unlock(object);
547
548 /*
549 * now that the pages are wired, we no longer have to fear coalesce
550 */
551 if (object == kernel_object || object == compressor_object) {
552 vm_map_simplify(map, map_addr);
553 } else {
554 vm_object_deallocate(object);
555 }
556
557 #if DEBUG || DEVELOPMENT
558 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
559 wired_page_count, 0, 0, 0);
560 #endif
561 /*
562 * Return the memory, not zeroed.
563 */
564 *addrp = CAST_DOWN(vm_offset_t, map_addr);
565 return KERN_SUCCESS;
566
567 out:
568 if (guard_page_list) {
569 vm_page_free_list(guard_page_list, FALSE);
570 }
571
572 if (wired_page_list) {
573 vm_page_free_list(wired_page_list, FALSE);
574 }
575
576 #if DEBUG || DEVELOPMENT
577 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
578 wired_page_count, 0, 0, 0);
579 #endif
580 return kr;
581 }
582
583 void
kernel_memory_populate_with_pages(vm_map_t map,vm_offset_t addr,vm_size_t size,vm_page_t page_list,kma_flags_t flags,vm_tag_t tag,vm_prot_t prot)584 kernel_memory_populate_with_pages(
585 vm_map_t map,
586 vm_offset_t addr,
587 vm_size_t size,
588 vm_page_t page_list,
589 kma_flags_t flags,
590 vm_tag_t tag,
591 vm_prot_t prot)
592 {
593 vm_object_t object;
594 kern_return_t pe_result;
595 vm_page_t mem;
596 int page_count = atop_64(size);
597
598 if (flags & KMA_COMPRESSOR) {
599 panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
600 map, (uint64_t) addr, (uint64_t) size, flags);
601 }
602
603 if (flags & KMA_KOBJECT) {
604 object = kernel_object;
605
606 vm_object_lock(object);
607 } else {
608 /*
609 * If it's not the kernel object, we need to:
610 * lock map;
611 * lookup entry;
612 * lock object;
613 * take reference on object;
614 * unlock map;
615 */
616 panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
617 map, (uint64_t) addr, (uint64_t) size, flags);
618 }
619
620 for (vm_object_offset_t pg_offset = 0;
621 pg_offset < size;
622 pg_offset += PAGE_SIZE_64) {
623 if (page_list == NULL) {
624 panic("%s: page_list too short", __func__);
625 }
626
627 mem = page_list;
628 page_list = mem->vmp_snext;
629 mem->vmp_snext = NULL;
630
631 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
632 mem->vmp_q_state = VM_PAGE_IS_WIRED;
633 mem->vmp_wire_count++;
634 if (mem->vmp_wire_count == 0) {
635 panic("%s(%p): wire_count overflow", __func__, mem);
636 }
637
638 vm_page_insert_wired(mem, object, addr + pg_offset, tag);
639
640 mem->vmp_busy = FALSE;
641 mem->vmp_pmapped = TRUE;
642 mem->vmp_wpmapped = TRUE;
643
644 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
645 0, /* fault_phys_offset */
646 mem,
647 prot, VM_PROT_NONE,
648 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
649 PMAP_OPTIONS_NOWAIT, pe_result);
650
651 if (pe_result == KERN_RESOURCE_SHORTAGE) {
652 vm_object_unlock(object);
653
654 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
655 prot, VM_PROT_NONE,
656 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
657 pe_result);
658
659 vm_object_lock(object);
660 }
661
662 assert(pe_result == KERN_SUCCESS);
663
664 if (flags & KMA_NOENCRYPT) {
665 __nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
666 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
667 }
668 }
669 if (page_list) {
670 panic("%s: page_list too long", __func__);
671 }
672 vm_object_unlock(object);
673
674 vm_page_lockspin_queues();
675 vm_page_wire_count += page_count;
676 vm_page_unlock_queues();
677 vm_tag_update_size(tag, size);
678
679 #if KASAN
680 if (map == compressor_map) {
681 kasan_notify_address_nopoison(addr, size);
682 } else {
683 kasan_notify_address(addr, size);
684 }
685 #endif
686 }
687
688 kern_return_t
kernel_memory_populate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)689 kernel_memory_populate(
690 vm_map_t map,
691 vm_offset_t addr,
692 vm_size_t size,
693 kma_flags_t flags,
694 vm_tag_t tag)
695 {
696 vm_object_t object;
697 vm_object_offset_t offset, pg_offset;
698 kern_return_t kr = KERN_SUCCESS;
699 vm_page_t mem;
700 vm_page_t page_list = NULL;
701 int page_count = atop_64(size);
702
703 #if DEBUG || DEVELOPMENT
704 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
705 size, 0, 0, 0);
706 #endif
707
708 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
709
710 if (flags & KMA_COMPRESSOR) {
711 pg_offset = page_count * PAGE_SIZE_64;
712
713 do {
714 for (;;) {
715 mem = vm_page_grab();
716
717 if (mem != VM_PAGE_NULL) {
718 break;
719 }
720
721 VM_PAGE_WAIT();
722 }
723 if (KMA_ZERO & flags) {
724 vm_page_zero_fill(mem);
725 }
726 mem->vmp_snext = page_list;
727 page_list = mem;
728
729 pg_offset -= PAGE_SIZE_64;
730
731 kr = pmap_enter_options(kernel_pmap,
732 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
733 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
734 PMAP_OPTIONS_INTERNAL, NULL);
735 assert(kr == KERN_SUCCESS);
736 } while (pg_offset);
737
738 offset = addr;
739 object = compressor_object;
740
741 vm_object_lock(object);
742
743 for (pg_offset = 0;
744 pg_offset < size;
745 pg_offset += PAGE_SIZE_64) {
746 mem = page_list;
747 page_list = mem->vmp_snext;
748 mem->vmp_snext = NULL;
749
750 vm_page_insert(mem, object, offset + pg_offset);
751 assert(mem->vmp_busy);
752
753 mem->vmp_busy = FALSE;
754 mem->vmp_pmapped = TRUE;
755 mem->vmp_wpmapped = TRUE;
756 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
757 }
758 vm_object_unlock(object);
759
760 #if KASAN
761 if (map == compressor_map) {
762 kasan_notify_address_nopoison(addr, size);
763 } else {
764 kasan_notify_address(addr, size);
765 }
766 #endif
767
768 #if DEBUG || DEVELOPMENT
769 task_t task = current_task();
770 if (task != NULL) {
771 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
772 }
773 #endif
774 } else {
775 kr = vm_page_alloc_list(page_count, flags, &page_list);
776 if (kr == KERN_SUCCESS) {
777 kernel_memory_populate_with_pages(map, addr, size,
778 page_list, flags, tag, VM_PROT_READ | VM_PROT_WRITE);
779 }
780 }
781
782 #if DEBUG || DEVELOPMENT
783 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
784 page_count, 0, 0, 0);
785 #endif
786 return kr;
787 }
788
789
790 void
kernel_memory_depopulate(vm_map_t map,vm_offset_t addr,vm_size_t size,kma_flags_t flags,vm_tag_t tag)791 kernel_memory_depopulate(
792 vm_map_t map,
793 vm_offset_t addr,
794 vm_size_t size,
795 kma_flags_t flags,
796 vm_tag_t tag)
797 {
798 vm_object_t object;
799 vm_object_offset_t offset, pg_offset;
800 vm_page_t mem;
801 vm_page_t local_freeq = NULL;
802 unsigned int pages_unwired;
803
804 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
805
806 if (flags & KMA_COMPRESSOR) {
807 offset = addr;
808 object = compressor_object;
809
810 vm_object_lock(object);
811 } else if (flags & KMA_KOBJECT) {
812 offset = addr;
813 object = kernel_object;
814 vm_object_lock(object);
815 } else {
816 offset = 0;
817 object = NULL;
818 /*
819 * If it's not the kernel object, we need to:
820 * lock map;
821 * lookup entry;
822 * lock object;
823 * unlock map;
824 */
825 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
826 "!KMA_KOBJECT",
827 map, (uint64_t) addr, (uint64_t) size, flags);
828 }
829 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
830
831 for (pg_offset = 0, pages_unwired = 0;
832 pg_offset < size;
833 pg_offset += PAGE_SIZE_64) {
834 mem = vm_page_lookup(object, offset + pg_offset);
835
836 assert(mem);
837
838 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
839 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
840 pages_unwired++;
841 }
842
843 mem->vmp_busy = TRUE;
844
845 assert(mem->vmp_tabled);
846 vm_page_remove(mem, TRUE);
847 assert(mem->vmp_busy);
848
849 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
850 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
851 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
852
853 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
854 mem->vmp_snext = local_freeq;
855 local_freeq = mem;
856 }
857 vm_object_unlock(object);
858
859
860 if (local_freeq) {
861 vm_page_free_list(local_freeq, TRUE);
862 if (pages_unwired != 0) {
863 vm_page_lockspin_queues();
864 vm_page_wire_count -= pages_unwired;
865 vm_page_unlock_queues();
866 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
867 }
868 }
869 }
870
871 /*
872 * kmem_alloc:
873 *
874 * Allocate wired-down memory in the kernel's address map
875 * or a submap. The memory is not zero-filled.
876 */
877
878 kern_return_t
kmem_alloc_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)879 kmem_alloc_external(
880 vm_map_t map,
881 vm_offset_t *addrp,
882 vm_size_t size)
883 {
884 return kmem_alloc(map, addrp, size, vm_tag_bt());
885 }
886
887
888 kern_return_t
kmem_alloc(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)889 kmem_alloc(
890 vm_map_t map,
891 vm_offset_t *addrp,
892 vm_size_t size,
893 vm_tag_t tag)
894 {
895 return kmem_alloc_flags(map, addrp, size, tag, 0);
896 }
897
898 kern_return_t
kmem_alloc_flags(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag,kma_flags_t flags)899 kmem_alloc_flags(
900 vm_map_t map,
901 vm_offset_t *addrp,
902 vm_size_t size,
903 vm_tag_t tag,
904 kma_flags_t flags)
905 {
906 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
907 if (kr == KERN_SUCCESS) {
908 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
909 }
910 return kr;
911 }
912
913 /*
914 * kmem_realloc:
915 *
916 * Reallocate wired-down memory in the kernel's address map
917 * or a submap. Newly allocated pages are not zeroed.
918 * This can only be used on regions allocated with kmem_alloc.
919 *
920 * If successful, the pages in the old region are mapped twice.
921 * The old region is unchanged. Use kmem_free to get rid of it.
922 */
923 kern_return_t
kmem_realloc(vm_map_t map,vm_offset_t oldaddr,vm_size_t oldsize,vm_offset_t * newaddrp,vm_size_t newsize,vm_tag_t tag)924 kmem_realloc(
925 vm_map_t map,
926 vm_offset_t oldaddr,
927 vm_size_t oldsize,
928 vm_offset_t *newaddrp,
929 vm_size_t newsize,
930 vm_tag_t tag)
931 {
932 vm_object_t object;
933 vm_object_offset_t offset;
934 vm_map_offset_t oldmapmin;
935 vm_map_offset_t oldmapmax;
936 vm_map_offset_t newmapaddr;
937 vm_map_size_t oldmapsize;
938 vm_map_size_t newmapsize;
939 vm_map_entry_t oldentry;
940 vm_map_entry_t newentry;
941 vm_page_t mem;
942 kern_return_t kr;
943 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
944
945 oldmapmin = vm_map_trunc_page(oldaddr,
946 VM_MAP_PAGE_MASK(map));
947 oldmapmax = vm_map_round_page(oldaddr + oldsize,
948 VM_MAP_PAGE_MASK(map));
949 oldmapsize = oldmapmax - oldmapmin;
950 newmapsize = vm_map_round_page(newsize,
951 VM_MAP_PAGE_MASK(map));
952 if (newmapsize < newsize) {
953 /* overflow */
954 *newaddrp = 0;
955 return KERN_INVALID_ARGUMENT;
956 }
957
958 /*
959 * Find the VM object backing the old region.
960 */
961
962 vm_map_lock(map);
963
964 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
965 panic("kmem_realloc");
966 }
967 if (oldentry->vme_atomic) {
968 vmk_flags.vmkf_atomic_entry = TRUE;
969 }
970 object = VME_OBJECT(oldentry);
971
972 /*
973 * Increase the size of the object and
974 * fill in the new region.
975 */
976
977 vm_object_reference(object);
978 /* by grabbing the object lock before unlocking the map */
979 /* we guarantee that we will panic if more than one */
980 /* attempt is made to realloc a kmem_alloc'd area */
981 vm_object_lock(object);
982 vm_map_unlock(map);
983 if (object->vo_size != oldmapsize) {
984 panic("kmem_realloc");
985 }
986 object->vo_size = newmapsize;
987 vm_object_unlock(object);
988
989 /* allocate the new pages while expanded portion of the */
990 /* object is still not mapped */
991 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
992 vm_object_round_page(newmapsize - oldmapsize));
993
994 /*
995 * Find space for the new region.
996 */
997
998 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
999 (vm_map_offset_t) 0, 0,
1000 vmk_flags,
1001 tag,
1002 &newentry);
1003 if (kr != KERN_SUCCESS) {
1004 vm_object_lock(object);
1005 for (offset = oldmapsize;
1006 offset < newmapsize; offset += PAGE_SIZE) {
1007 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1008 VM_PAGE_FREE(mem);
1009 }
1010 }
1011 object->vo_size = oldmapsize;
1012 vm_object_unlock(object);
1013 vm_object_deallocate(object);
1014 return kr;
1015 }
1016 VME_OBJECT_SET(newentry, object);
1017 VME_OFFSET_SET(newentry, 0);
1018 assert(newentry->wired_count == 0);
1019
1020
1021 /* add an extra reference in case we have someone doing an */
1022 /* unexpected deallocate */
1023 vm_object_reference(object);
1024 vm_map_unlock(map);
1025
1026 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
1027 VM_PROT_DEFAULT, tag, FALSE);
1028 if (KERN_SUCCESS != kr) {
1029 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
1030 vm_object_lock(object);
1031 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1032 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1033 VM_PAGE_FREE(mem);
1034 }
1035 }
1036 object->vo_size = oldmapsize;
1037 vm_object_unlock(object);
1038 vm_object_deallocate(object);
1039 return kr;
1040 }
1041 vm_object_deallocate(object);
1042
1043 if (kernel_object == object) {
1044 vm_tag_update_size(tag, newmapsize);
1045 }
1046
1047 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1048 return KERN_SUCCESS;
1049 }
1050
1051 /*
1052 * kmem_alloc_kobject:
1053 *
1054 * Allocate wired-down memory in the kernel's address map
1055 * or a submap. The memory is not zero-filled.
1056 *
1057 * The memory is allocated in the kernel_object.
1058 * It may not be copied with vm_map_copy, and
1059 * it may not be reallocated with kmem_realloc.
1060 */
1061
1062 kern_return_t
kmem_alloc_kobject_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1063 kmem_alloc_kobject_external(
1064 vm_map_t map,
1065 vm_offset_t *addrp,
1066 vm_size_t size)
1067 {
1068 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1069 }
1070
1071 kern_return_t
kmem_alloc_kobject(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1072 kmem_alloc_kobject(
1073 vm_map_t map,
1074 vm_offset_t *addrp,
1075 vm_size_t size,
1076 vm_tag_t tag)
1077 {
1078 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1079 }
1080
1081 /*
1082 * kmem_alloc_aligned:
1083 *
1084 * Like kmem_alloc_kobject, except that the memory is aligned.
1085 * The size should be a power-of-2.
1086 */
1087
1088 kern_return_t
kmem_alloc_aligned(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1089 kmem_alloc_aligned(
1090 vm_map_t map,
1091 vm_offset_t *addrp,
1092 vm_size_t size,
1093 vm_tag_t tag)
1094 {
1095 if ((size & (size - 1)) != 0) {
1096 panic("kmem_alloc_aligned: size not aligned");
1097 }
1098 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1099 }
1100
1101 /*
1102 * kmem_alloc_pageable:
1103 *
1104 * Allocate pageable memory in the kernel's address map.
1105 */
1106
1107 kern_return_t
kmem_alloc_pageable_external(vm_map_t map,vm_offset_t * addrp,vm_size_t size)1108 kmem_alloc_pageable_external(
1109 vm_map_t map,
1110 vm_offset_t *addrp,
1111 vm_size_t size)
1112 {
1113 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1114 }
1115
1116 kern_return_t
kmem_alloc_pageable(vm_map_t map,vm_offset_t * addrp,vm_size_t size,vm_tag_t tag)1117 kmem_alloc_pageable(
1118 vm_map_t map,
1119 vm_offset_t *addrp,
1120 vm_size_t size,
1121 vm_tag_t tag)
1122 {
1123 vm_map_offset_t map_addr;
1124 vm_map_size_t map_size;
1125 kern_return_t kr;
1126
1127 #ifndef normal
1128 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1129 #else
1130 map_addr = vm_map_min(map);
1131 #endif
1132 map_size = vm_map_round_page(size,
1133 VM_MAP_PAGE_MASK(map));
1134 if (map_size < size) {
1135 /* overflow */
1136 *addrp = 0;
1137 return KERN_INVALID_ARGUMENT;
1138 }
1139
1140 kr = vm_map_enter(map, &map_addr, map_size,
1141 (vm_map_offset_t) 0,
1142 VM_FLAGS_ANYWHERE,
1143 VM_MAP_KERNEL_FLAGS_NONE,
1144 tag,
1145 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1146 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1147
1148 if (kr != KERN_SUCCESS) {
1149 return kr;
1150 }
1151
1152 #if KASAN
1153 kasan_notify_address(map_addr, map_size);
1154 #endif
1155 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1156 return KERN_SUCCESS;
1157 }
1158
1159 /*
1160 * kmem_free:
1161 *
1162 * Release a region of kernel virtual memory allocated
1163 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1164 * and return the physical pages associated with that region.
1165 */
1166
1167 void
kmem_free(vm_map_t map,vm_offset_t addr,vm_size_t size)1168 kmem_free(
1169 vm_map_t map,
1170 vm_offset_t addr,
1171 vm_size_t size)
1172 {
1173 kern_return_t kr;
1174
1175 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1176
1177 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1178
1179 if (size == 0) {
1180 #if MACH_ASSERT
1181 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1182 #endif
1183 return;
1184 }
1185
1186 kr = vm_map_remove(map,
1187 vm_map_trunc_page(addr,
1188 VM_MAP_PAGE_MASK(map)),
1189 vm_map_round_page(addr + size,
1190 VM_MAP_PAGE_MASK(map)),
1191 VM_MAP_REMOVE_KUNWIRE);
1192 if (kr != KERN_SUCCESS) {
1193 panic("kmem_free");
1194 }
1195 }
1196
1197 /*
1198 * Allocate new pages in an object.
1199 */
1200
1201 kern_return_t
kmem_alloc_pages(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size)1202 kmem_alloc_pages(
1203 vm_object_t object,
1204 vm_object_offset_t offset,
1205 vm_object_size_t size)
1206 {
1207 vm_object_size_t alloc_size;
1208
1209 alloc_size = vm_object_round_page(size);
1210 vm_object_lock(object);
1211 while (alloc_size) {
1212 vm_page_t mem;
1213
1214
1215 /*
1216 * Allocate a page
1217 */
1218 while (VM_PAGE_NULL ==
1219 (mem = vm_page_alloc(object, offset))) {
1220 vm_object_unlock(object);
1221 VM_PAGE_WAIT();
1222 vm_object_lock(object);
1223 }
1224 mem->vmp_busy = FALSE;
1225
1226 alloc_size -= PAGE_SIZE;
1227 offset += PAGE_SIZE;
1228 }
1229 vm_object_unlock(object);
1230 return KERN_SUCCESS;
1231 }
1232
1233 /*
1234 * kmem_suballoc:
1235 *
1236 * Allocates a map to manage a subrange
1237 * of the kernel virtual address space.
1238 *
1239 * Arguments are as follows:
1240 *
1241 * parent Map to take range from
1242 * addr Address of start of range (IN/OUT)
1243 * size Size of range to find
1244 * pageable Can region be paged
1245 * anywhere Can region be located anywhere in map
1246 * new_map Pointer to new submap
1247 */
1248 kern_return_t
kmem_suballoc(vm_map_t parent,vm_offset_t * addr,vm_size_t size,boolean_t pageable,int flags,vm_map_kernel_flags_t vmk_flags,vm_tag_t tag,vm_map_t * new_map)1249 kmem_suballoc(
1250 vm_map_t parent,
1251 vm_offset_t *addr,
1252 vm_size_t size,
1253 boolean_t pageable,
1254 int flags,
1255 vm_map_kernel_flags_t vmk_flags,
1256 vm_tag_t tag,
1257 vm_map_t *new_map)
1258 {
1259 vm_map_t map;
1260 vm_map_offset_t map_addr;
1261 vm_map_size_t map_size;
1262 kern_return_t kr;
1263
1264 map_size = vm_map_round_page(size,
1265 VM_MAP_PAGE_MASK(parent));
1266 if (map_size < size) {
1267 /* overflow */
1268 *addr = 0;
1269 return KERN_INVALID_ARGUMENT;
1270 }
1271
1272 /*
1273 * Need reference on submap object because it is internal
1274 * to the vm_system. vm_object_enter will never be called
1275 * on it (usual source of reference for vm_map_enter).
1276 */
1277 vm_object_reference(vm_submap_object);
1278
1279 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1280 ? vm_map_min(parent)
1281 : vm_map_trunc_page(*addr,
1282 VM_MAP_PAGE_MASK(parent)));
1283
1284 kr = vm_map_enter(parent, &map_addr, map_size,
1285 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1286 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1287 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1288 if (kr != KERN_SUCCESS) {
1289 vm_object_deallocate(vm_submap_object);
1290 return kr;
1291 }
1292
1293 pmap_reference(vm_map_pmap(parent));
1294 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1295 if (map == VM_MAP_NULL) {
1296 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1297 }
1298 /* inherit the parent map's page size */
1299 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1300
1301 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1302 if (kr != KERN_SUCCESS) {
1303 /*
1304 * See comment preceding vm_map_submap().
1305 */
1306 vm_map_remove(parent, map_addr, map_addr + map_size,
1307 VM_MAP_REMOVE_NO_FLAGS);
1308 vm_map_deallocate(map); /* also removes ref to pmap */
1309 vm_object_deallocate(vm_submap_object);
1310 return kr;
1311 }
1312 *addr = CAST_DOWN(vm_offset_t, map_addr);
1313 *new_map = map;
1314 return KERN_SUCCESS;
1315 }
1316 /*
1317 * The default percentage of memory that can be mlocked is scaled based on the total
1318 * amount of memory in the system. These percentages are caclulated
1319 * offline and stored in this table. We index this table by
1320 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1321 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1322 *
1323 * Note that these values were picked for mac.
1324 * If we ever have very large memory config arm devices, we may want to revisit
1325 * since the kernel overhead is smaller there due to the larger page size.
1326 */
1327
1328 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1329 #define VM_USER_WIREABLE_MIN_CONFIG 32
1330 #if CONFIG_JETSAM
1331 /* Systems with jetsam can wire a bit more b/c the system can relieve wired
1332 * pressure.
1333 */
1334 static vm_map_size_t wire_limit_percents[] =
1335 { 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
1336 #else
1337 static vm_map_size_t wire_limit_percents[] =
1338 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1339 #endif /* CONFIG_JETSAM */
1340
1341 /*
1342 * Sets the default global user wire limit which limits the amount of
1343 * memory that can be locked via mlock() based on the above algorithm..
1344 * This can be overridden via a sysctl.
1345 */
1346 static void
kmem_set_user_wire_limits(void)1347 kmem_set_user_wire_limits(void)
1348 {
1349 uint64_t available_mem_log;
1350 uint64_t max_wire_percent;
1351 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1352 sizeof(vm_map_size_t);
1353 vm_map_size_t limit;
1354 uint64_t config_memsize = max_mem;
1355 #if defined(XNU_TARGET_OS_OSX)
1356 config_memsize = max_mem_actual;
1357 #endif /* defined(XNU_TARGET_OS_OSX) */
1358
1359 available_mem_log = bit_floor(config_memsize);
1360
1361 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1362 available_mem_log = 0;
1363 } else {
1364 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1365 }
1366 if (available_mem_log >= wire_limit_percents_length) {
1367 available_mem_log = wire_limit_percents_length - 1;
1368 }
1369 max_wire_percent = wire_limit_percents[available_mem_log];
1370
1371 limit = config_memsize * max_wire_percent / 100;
1372 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1373 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1374 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1375 }
1376
1377 vm_global_user_wire_limit = limit;
1378 /* the default per task limit is the same as the global limit */
1379 vm_per_task_user_wire_limit = limit;
1380 vm_add_wire_count_over_global_limit = 0;
1381 vm_add_wire_count_over_user_limit = 0;
1382 }
1383
1384
1385 /*
1386 * kmem_init:
1387 *
1388 * Initialize the kernel's virtual memory map, taking
1389 * into account all memory allocated up to this time.
1390 */
1391 __startup_func
1392 void
kmem_init(vm_offset_t start,vm_offset_t end)1393 kmem_init(
1394 vm_offset_t start,
1395 vm_offset_t end)
1396 {
1397 vm_map_offset_t map_start;
1398 vm_map_offset_t map_end;
1399 vm_map_kernel_flags_t vmk_flags;
1400
1401 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1402 vmk_flags.vmkf_permanent = TRUE;
1403 vmk_flags.vmkf_no_pmap_check = TRUE;
1404
1405 map_start = vm_map_trunc_page(start,
1406 VM_MAP_PAGE_MASK(kernel_map));
1407 map_end = vm_map_round_page(end,
1408 VM_MAP_PAGE_MASK(kernel_map));
1409
1410 #if defined(__arm__) || defined(__arm64__)
1411 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1412 VM_MAX_KERNEL_ADDRESS, FALSE);
1413 /*
1414 * Reserve virtual memory allocated up to this time.
1415 */
1416 {
1417 unsigned int region_select = 0;
1418 vm_map_offset_t region_start;
1419 vm_map_size_t region_size;
1420 vm_map_offset_t map_addr;
1421 kern_return_t kr;
1422
1423 while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
1424 map_addr = region_start;
1425 kr = vm_map_enter(kernel_map, &map_addr,
1426 vm_map_round_page(region_size,
1427 VM_MAP_PAGE_MASK(kernel_map)),
1428 (vm_map_offset_t) 0,
1429 VM_FLAGS_FIXED,
1430 vmk_flags,
1431 VM_KERN_MEMORY_NONE,
1432 VM_OBJECT_NULL,
1433 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1434 VM_INHERIT_DEFAULT);
1435
1436 if (kr != KERN_SUCCESS) {
1437 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1438 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1439 (uint64_t) region_size, kr);
1440 }
1441
1442 region_select++;
1443 }
1444 }
1445 #else
1446 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1447 map_end, FALSE);
1448 /*
1449 * Reserve virtual memory allocated up to this time.
1450 */
1451 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1452 vm_map_offset_t map_addr;
1453 kern_return_t kr;
1454
1455 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1456 vmk_flags.vmkf_no_pmap_check = TRUE;
1457
1458 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1459 kr = vm_map_enter(kernel_map,
1460 &map_addr,
1461 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1462 (vm_map_offset_t) 0,
1463 VM_FLAGS_FIXED,
1464 vmk_flags,
1465 VM_KERN_MEMORY_NONE,
1466 VM_OBJECT_NULL,
1467 (vm_object_offset_t) 0, FALSE,
1468 VM_PROT_NONE, VM_PROT_NONE,
1469 VM_INHERIT_DEFAULT);
1470
1471 if (kr != KERN_SUCCESS) {
1472 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
1473 (uint64_t) start, (uint64_t) end,
1474 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1475 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1476 kr);
1477 }
1478 }
1479 #endif
1480
1481 kmem_set_user_wire_limits();
1482 }
1483
1484 /*
1485 * Routine: copyinmap
1486 * Purpose:
1487 * Like copyin, except that fromaddr is an address
1488 * in the specified VM map. This implementation
1489 * is incomplete; it handles the current user map
1490 * and the kernel map/submaps.
1491 */
1492 kern_return_t
copyinmap(vm_map_t map,vm_map_offset_t fromaddr,void * todata,vm_size_t length)1493 copyinmap(
1494 vm_map_t map,
1495 vm_map_offset_t fromaddr,
1496 void *todata,
1497 vm_size_t length)
1498 {
1499 kern_return_t kr = KERN_SUCCESS;
1500 vm_map_t oldmap;
1501
1502 if (vm_map_pmap(map) == pmap_kernel()) {
1503 /* assume a correct copy */
1504 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1505 } else if (current_map() == map) {
1506 if (copyin(fromaddr, todata, length) != 0) {
1507 kr = KERN_INVALID_ADDRESS;
1508 }
1509 } else {
1510 vm_map_reference(map);
1511 oldmap = vm_map_switch(map);
1512 if (copyin(fromaddr, todata, length) != 0) {
1513 kr = KERN_INVALID_ADDRESS;
1514 }
1515 vm_map_switch(oldmap);
1516 vm_map_deallocate(map);
1517 }
1518 return kr;
1519 }
1520
1521 /*
1522 * Routine: copyoutmap
1523 * Purpose:
1524 * Like copyout, except that toaddr is an address
1525 * in the specified VM map.
1526 */
1527 kern_return_t
copyoutmap(vm_map_t map,void * fromdata,vm_map_address_t toaddr,vm_size_t length)1528 copyoutmap(
1529 vm_map_t map,
1530 void *fromdata,
1531 vm_map_address_t toaddr,
1532 vm_size_t length)
1533 {
1534 kern_return_t kr = KERN_SUCCESS;
1535 vm_map_t oldmap;
1536
1537 if (vm_map_pmap(map) == pmap_kernel()) {
1538 /* assume a correct copy */
1539 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1540 } else if (current_map() == map) {
1541 if (copyout(fromdata, toaddr, length) != 0) {
1542 kr = KERN_INVALID_ADDRESS;
1543 }
1544 } else {
1545 vm_map_reference(map);
1546 oldmap = vm_map_switch(map);
1547 if (copyout(fromdata, toaddr, length) != 0) {
1548 kr = KERN_INVALID_ADDRESS;
1549 }
1550 vm_map_switch(oldmap);
1551 vm_map_deallocate(map);
1552 }
1553 return kr;
1554 }
1555
1556 /*
1557 * Routine: copyoutmap_atomic{32, 64}
1558 * Purpose:
1559 * Like copyoutmap, except that the operation is atomic.
1560 * Takes in value rather than *fromdata pointer.
1561 */
1562 kern_return_t
copyoutmap_atomic32(vm_map_t map,uint32_t value,vm_map_address_t toaddr)1563 copyoutmap_atomic32(
1564 vm_map_t map,
1565 uint32_t value,
1566 vm_map_address_t toaddr)
1567 {
1568 kern_return_t kr = KERN_SUCCESS;
1569 vm_map_t oldmap;
1570
1571 if (vm_map_pmap(map) == pmap_kernel()) {
1572 /* assume a correct toaddr */
1573 *(uint32_t *)toaddr = value;
1574 } else if (current_map() == map) {
1575 if (copyout_atomic32(value, toaddr) != 0) {
1576 kr = KERN_INVALID_ADDRESS;
1577 }
1578 } else {
1579 vm_map_reference(map);
1580 oldmap = vm_map_switch(map);
1581 if (copyout_atomic32(value, toaddr) != 0) {
1582 kr = KERN_INVALID_ADDRESS;
1583 }
1584 vm_map_switch(oldmap);
1585 vm_map_deallocate(map);
1586 }
1587 return kr;
1588 }
1589
1590 kern_return_t
copyoutmap_atomic64(vm_map_t map,uint64_t value,vm_map_address_t toaddr)1591 copyoutmap_atomic64(
1592 vm_map_t map,
1593 uint64_t value,
1594 vm_map_address_t toaddr)
1595 {
1596 kern_return_t kr = KERN_SUCCESS;
1597 vm_map_t oldmap;
1598
1599 if (vm_map_pmap(map) == pmap_kernel()) {
1600 /* assume a correct toaddr */
1601 *(uint64_t *)toaddr = value;
1602 } else if (current_map() == map) {
1603 if (copyout_atomic64(value, toaddr) != 0) {
1604 kr = KERN_INVALID_ADDRESS;
1605 }
1606 } else {
1607 vm_map_reference(map);
1608 oldmap = vm_map_switch(map);
1609 if (copyout_atomic64(value, toaddr) != 0) {
1610 kr = KERN_INVALID_ADDRESS;
1611 }
1612 vm_map_switch(oldmap);
1613 vm_map_deallocate(map);
1614 }
1615 return kr;
1616 }
1617
1618 /*
1619 *
1620 * The following two functions are to be used when exposing kernel
1621 * addresses to userspace via any of the various debug or info
1622 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1623 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1624 * are exported to KEXTs.
1625 *
1626 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1627 */
1628
1629 static void
vm_kernel_addrhash_internal(vm_offset_t addr,vm_offset_t * hash_addr,uint64_t salt)1630 vm_kernel_addrhash_internal(
1631 vm_offset_t addr,
1632 vm_offset_t *hash_addr,
1633 uint64_t salt)
1634 {
1635 assert(salt != 0);
1636
1637 if (addr == 0) {
1638 *hash_addr = 0;
1639 return;
1640 }
1641
1642 if (VM_KERNEL_IS_SLID(addr)) {
1643 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1644 return;
1645 }
1646
1647 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1648 SHA256_CTX sha_ctx;
1649
1650 SHA256_Init(&sha_ctx);
1651 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1652 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1653 SHA256_Final(sha_digest, &sha_ctx);
1654
1655 *hash_addr = sha_digest[0];
1656 }
1657
1658 void
vm_kernel_addrhash_external(vm_offset_t addr,vm_offset_t * hash_addr)1659 vm_kernel_addrhash_external(
1660 vm_offset_t addr,
1661 vm_offset_t *hash_addr)
1662 {
1663 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1664 }
1665
1666 vm_offset_t
vm_kernel_addrhash(vm_offset_t addr)1667 vm_kernel_addrhash(vm_offset_t addr)
1668 {
1669 vm_offset_t hash_addr;
1670 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1671 return hash_addr;
1672 }
1673
1674 void
vm_kernel_addrhide(vm_offset_t addr,vm_offset_t * hide_addr)1675 vm_kernel_addrhide(
1676 vm_offset_t addr,
1677 vm_offset_t *hide_addr)
1678 {
1679 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
1680 }
1681
1682 /*
1683 * vm_kernel_addrperm_external:
1684 * vm_kernel_unslide_or_perm_external:
1685 *
1686 * Use these macros when exposing an address to userspace that could come from
1687 * either kernel text/data *or* the heap.
1688 */
1689 void
vm_kernel_addrperm_external(vm_offset_t addr,vm_offset_t * perm_addr)1690 vm_kernel_addrperm_external(
1691 vm_offset_t addr,
1692 vm_offset_t *perm_addr)
1693 {
1694 if (VM_KERNEL_IS_SLID(addr)) {
1695 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1696 } else if (VM_KERNEL_ADDRESS(addr)) {
1697 *perm_addr = addr + vm_kernel_addrperm_ext;
1698 } else {
1699 *perm_addr = addr;
1700 }
1701 }
1702
1703 void
vm_kernel_unslide_or_perm_external(vm_offset_t addr,vm_offset_t * up_addr)1704 vm_kernel_unslide_or_perm_external(
1705 vm_offset_t addr,
1706 vm_offset_t *up_addr)
1707 {
1708 vm_kernel_addrperm_external(addr, up_addr);
1709 }
1710
1711 void
vm_packing_pointer_invalid(vm_offset_t ptr,vm_packing_params_t params)1712 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1713 {
1714 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1715 panic("pointer %p can't be packed: low %d bits aren't 0",
1716 (void *)ptr, params.vmpp_shift);
1717 } else if (ptr <= params.vmpp_base) {
1718 panic("pointer %p can't be packed: below base %p",
1719 (void *)ptr, (void *)params.vmpp_base);
1720 } else {
1721 panic("pointer %p can't be packed: maximum encodable pointer is %p",
1722 (void *)ptr, (void *)vm_packing_max_packable(params));
1723 }
1724 }
1725
1726 void
vm_packing_verify_range(const char * subsystem,vm_offset_t min_address,vm_offset_t max_address,vm_packing_params_t params)1727 vm_packing_verify_range(
1728 const char *subsystem,
1729 vm_offset_t min_address,
1730 vm_offset_t max_address,
1731 vm_packing_params_t params)
1732 {
1733 if (min_address > max_address) {
1734 panic("%s: %s range invalid min:%p > max:%p",
1735 __func__, subsystem, (void *)min_address, (void *)max_address);
1736 }
1737
1738 if (!params.vmpp_base_relative) {
1739 return;
1740 }
1741
1742 if (min_address <= params.vmpp_base) {
1743 panic("%s: %s range invalid min:%p <= base:%p",
1744 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1745 }
1746
1747 if (max_address > vm_packing_max_packable(params)) {
1748 panic("%s: %s range invalid max:%p >= max packable:%p",
1749 __func__, subsystem, (void *)max_address,
1750 (void *)vm_packing_max_packable(params));
1751 }
1752 }
1753