1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117
118 #include <machine/machine_routines.h>
119
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124
125 #include <IOKit/IOReturn.h>
126 #include <IOKit/IOBSD.h>
127
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132
133 #if CONFIG_MACF
134 #include <security/mac_framework.h>
135 #endif
136 #include <os/overflow.h>
137
138 /*
139 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140 * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
141 * compiled with an SDK from fall of 2020 or later?""
142 */
143 static bool
proc_2020_fall_os_sdk_or_later(void)144 proc_2020_fall_os_sdk_or_later(void)
145 {
146 const uint32_t proc_sdk_ver = proc_sdk(current_proc());
147
148 switch (proc_platform(current_proc())) {
149 case PLATFORM_MACOS:
150 return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
151 case PLATFORM_IOS:
152 case PLATFORM_IOSSIMULATOR:
153 case PLATFORM_MACCATALYST:
154 return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
155 case PLATFORM_BRIDGEOS:
156 return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
157 case PLATFORM_TVOS:
158 case PLATFORM_TVOSSIMULATOR:
159 return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
160 case PLATFORM_WATCHOS:
161 case PLATFORM_WATCHOSSIMULATOR:
162 return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
163 default:
164 /*
165 * tough call, but let's give new platforms the benefit of the doubt
166 * to avoid a re-occurence of rdar://89843927
167 */
168 return true;
169 }
170 }
171
172 #if MACH_ASSERT
173 vnode_t fbdp_vp = NULL;
174 #endif /* MACH_ASSERT */
175
176 /*
177 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
178 * XXX usage is PROT_* from an interface perspective. Thus the values of
179 * XXX VM_PROT_* and PROT_* need to correspond.
180 */
181 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)182 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
183 {
184 /*
185 * Map in special device (must be SHARED) or file
186 */
187 struct fileproc *fp;
188 struct vnode *vp;
189 int flags;
190 int prot;
191 int err = 0;
192 vm_map_t user_map;
193 kern_return_t result;
194 vm_map_offset_t user_addr;
195 vm_map_offset_t sum;
196 vm_map_size_t user_size;
197 vm_object_offset_t pageoff;
198 vm_object_offset_t file_pos;
199 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
200 boolean_t docow;
201 vm_prot_t maxprot;
202 void *handle;
203 memory_object_t pager = MEMORY_OBJECT_NULL;
204 memory_object_control_t control;
205 int mapanon = 0;
206 int fpref = 0;
207 int error = 0;
208 int fd = uap->fd;
209 int num_retries = 0;
210
211 /*
212 * Note that for UNIX03 conformance, there is additional parameter checking for
213 * mmap() system call in libsyscall prior to entering the kernel. The sanity
214 * checks and argument validation done in this function are not the only places
215 * one can get returned errnos.
216 */
217
218 user_map = current_map();
219 user_addr = (vm_map_offset_t)uap->addr;
220 user_size = (vm_map_size_t) uap->len;
221
222 AUDIT_ARG(addr, user_addr);
223 AUDIT_ARG(len, user_size);
224 AUDIT_ARG(fd, uap->fd);
225
226 if (vm_map_range_overflows(user_addr, user_size)) {
227 return EINVAL;
228 }
229 prot = (uap->prot & VM_PROT_ALL);
230 #if 3777787
231 /*
232 * Since the hardware currently does not support writing without
233 * read-before-write, or execution-without-read, if the request is
234 * for write or execute access, we must imply read access as well;
235 * otherwise programs expecting this to work will fail to operate.
236 */
237 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
238 prot |= VM_PROT_READ;
239 }
240 #endif /* radar 3777787 */
241
242 flags = uap->flags;
243 vp = NULLVP;
244
245 /*
246 * verify no unknown flags are passed in, and if any are,
247 * fail out early to make sure the logic below never has to deal
248 * with invalid flag values. only do so for processes compiled
249 * with Fall 2020 or later SDK, which is where we drew this
250 * line and documented it as such.
251 */
252 if (flags & ~(MAP_SHARED |
253 MAP_PRIVATE |
254 MAP_COPY |
255 MAP_FIXED |
256 MAP_RENAME |
257 MAP_NORESERVE |
258 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
259 MAP_NOEXTEND |
260 MAP_HASSEMAPHORE |
261 MAP_NOCACHE |
262 MAP_JIT |
263 MAP_TPRO |
264 MAP_FILE |
265 MAP_ANON |
266 MAP_RESILIENT_CODESIGN |
267 MAP_RESILIENT_MEDIA |
268 #if XNU_TARGET_OS_OSX
269 MAP_32BIT |
270 #endif
271 MAP_TRANSLATED_ALLOW_EXECUTE |
272 MAP_UNIX03)) {
273 if (proc_2020_fall_os_sdk_or_later()) {
274 return EINVAL;
275 }
276 }
277
278
279 /*
280 * The vm code does not have prototypes & compiler doesn't do
281 * the right thing when you cast 64bit value and pass it in function
282 * call. So here it is.
283 */
284 file_pos = (vm_object_offset_t)uap->pos;
285
286
287 /* make sure mapping fits into numeric range etc */
288 if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
289 return EINVAL;
290 }
291
292 if (flags & MAP_UNIX03) {
293 vm_map_offset_t offset_alignment_mask;
294
295 /*
296 * Enforce UNIX03 compliance.
297 */
298
299 if (vm_map_is_exotic(current_map())) {
300 offset_alignment_mask = 0xFFF;
301 } else {
302 offset_alignment_mask = vm_map_page_mask(current_map());
303 }
304 if (file_pos & offset_alignment_mask) {
305 /* file offset should be page-aligned */
306 return EINVAL;
307 }
308 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
309 /* need either MAP_PRIVATE or MAP_SHARED */
310 return EINVAL;
311 }
312 if (user_size == 0) {
313 /* mapping length should not be 0 */
314 return EINVAL;
315 }
316 }
317
318 /*
319 * Align the file position to a page boundary,
320 * and save its page offset component.
321 */
322 pageoff = (file_pos & vm_map_page_mask(user_map));
323 file_pos -= (vm_object_offset_t)pageoff;
324
325
326 /* Adjust size for rounding (on both ends). */
327 user_size += pageoff; /* low end... */
328 user_size = vm_map_round_page(user_size,
329 vm_map_page_mask(user_map)); /* hi end */
330
331
332 if (flags & MAP_JIT) {
333 if ((flags & MAP_FIXED) ||
334 (flags & MAP_SHARED) ||
335 !(flags & MAP_ANON) ||
336 (flags & MAP_RESILIENT_CODESIGN) ||
337 (flags & MAP_RESILIENT_MEDIA) ||
338 (flags & MAP_TPRO)) {
339 return EINVAL;
340 }
341 }
342
343 if ((flags & MAP_RESILIENT_CODESIGN) ||
344 (flags & MAP_RESILIENT_MEDIA)) {
345 if ((flags & MAP_ANON) ||
346 (flags & MAP_JIT) ||
347 (flags & MAP_TPRO)) {
348 return EINVAL;
349 }
350 }
351 if (flags & MAP_RESILIENT_CODESIGN) {
352 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
353 if (prot & reject_prot) {
354 /*
355 * Quick sanity check. maxprot is calculated below and
356 * we will test it again.
357 */
358 return EPERM;
359 }
360 }
361 if (flags & MAP_SHARED) {
362 /*
363 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
364 * there is no place to inject zero-filled pages without
365 * actually adding them to the file.
366 * Since we didn't reject that combination before, there might
367 * already be callers using it and getting a valid MAP_SHARED
368 * mapping but without the resilience.
369 * For backwards compatibility's sake, let's keep ignoring
370 * MAP_RESILIENT_MEDIA in that case.
371 */
372 flags &= ~MAP_RESILIENT_MEDIA;
373 }
374 if (flags & MAP_RESILIENT_MEDIA) {
375 if ((flags & MAP_ANON) ||
376 (flags & MAP_SHARED)) {
377 return EINVAL;
378 }
379 }
380 if (flags & MAP_TPRO) {
381 if ((prot & VM_PROT_EXECUTE) ||
382 !(prot & VM_PROT_WRITE) ||
383 (flags & MAP_SHARED) ||
384 !(flags & MAP_ANON)) {
385 return EPERM;
386 }
387 }
388
389 /*
390 * Check for illegal addresses. Watch out for address wrap... Note
391 * that VM_*_ADDRESS are not constants due to casts (argh).
392 */
393 if (flags & MAP_FIXED) {
394 /*
395 * The specified address must have the same remainder
396 * as the file offset taken modulo PAGE_SIZE, so it
397 * should be aligned after adjustment by pageoff.
398 */
399 user_addr -= pageoff;
400 if (user_addr & vm_map_page_mask(user_map)) {
401 return EINVAL;
402 }
403 }
404 #ifdef notyet
405 /* DO not have apis to get this info, need to wait till then*/
406 /*
407 * XXX for non-fixed mappings where no hint is provided or
408 * the hint would fall in the potential heap space,
409 * place it after the end of the largest possible heap.
410 *
411 * There should really be a pmap call to determine a reasonable
412 * location.
413 */
414 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
415 vm_map_page_mask(user_map))) {
416 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
417 vm_map_page_mask(user_map));
418 }
419
420 #endif
421
422 #if CONFIG_MAP_RANGES
423 /* default to placing mappings in the heap range. */
424 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
425 #endif /* CONFIG_MAP_RANGES */
426
427 if (flags & MAP_ANON) {
428 maxprot = VM_PROT_ALL;
429 #if CONFIG_MACF
430 /*
431 * Entitlement check.
432 */
433 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
434 if (error) {
435 return EINVAL;
436 }
437 #endif /* MAC */
438
439 /*
440 * Mapping blank space is trivial. Use positive fds as the alias
441 * value for memory tracking.
442 */
443 if (fd != -1) {
444 /*
445 * Use "fd" to pass (some) Mach VM allocation flags,
446 * (see the VM_FLAGS_* definitions).
447 */
448 int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
449 VM_FLAGS_SUPERPAGE_MASK |
450 VM_FLAGS_PURGABLE |
451 VM_FLAGS_4GB_CHUNK);
452
453 if (vm_flags != fd) {
454 /* reject if there are any extra flags */
455 return EINVAL;
456 }
457
458 /*
459 * vm_map_kernel_flags_set_vmflags() will assume that
460 * the full set of VM flags are passed, which is
461 * problematic for FIXED/ANYWHERE.
462 *
463 * The block handling MAP_FIXED below will do the same
464 * thing again which is fine because it's idempotent.
465 */
466 if (flags & MAP_FIXED) {
467 vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
468 } else {
469 vm_flags |= VM_FLAGS_ANYWHERE;
470 }
471 vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
472 if (vm_flags & VM_FLAGS_ALIAS_MASK) {
473 /*
474 * if the client specified a tag,
475 * let the system policy apply.
476 */
477 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_DEFAULT;
478 vm_map_kernel_flags_update_range_id(&vmk_flags, user_map);
479 }
480 }
481
482 handle = NULL;
483 file_pos = 0;
484 pageoff = 0;
485 mapanon = 1;
486 } else {
487 struct vnode_attr va;
488 vfs_context_t ctx = vfs_context_current();
489
490 if (flags & MAP_JIT) {
491 return EINVAL;
492 }
493
494 /*
495 * Mapping file, get fp for validation. Obtain vnode and make
496 * sure it is of appropriate type.
497 */
498 err = fp_lookup(p, fd, &fp, 0);
499 if (err) {
500 return err;
501 }
502 fpref = 1;
503 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
504 case DTYPE_PSXSHM:
505 uap->addr = (user_addr_t)user_addr;
506 uap->len = (user_size_t)user_size;
507 uap->prot = prot;
508 uap->flags = flags;
509 uap->pos = file_pos;
510 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
511 goto bad;
512 case DTYPE_VNODE:
513 break;
514 default:
515 error = EINVAL;
516 goto bad;
517 }
518 vp = (struct vnode *)fp_get_data(fp);
519 error = vnode_getwithref(vp);
520 if (error != 0) {
521 goto bad;
522 }
523
524 if (vp->v_type != VREG && vp->v_type != VCHR) {
525 (void)vnode_put(vp);
526 error = EINVAL;
527 goto bad;
528 }
529
530 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
531
532 /*
533 * POSIX: mmap needs to update access time for mapped files
534 */
535 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
536 VATTR_INIT(&va);
537 nanotime(&va.va_access_time);
538 VATTR_SET_ACTIVE(&va, va_access_time);
539 vnode_setattr(vp, &va, ctx);
540 }
541
542 /*
543 * XXX hack to handle use of /dev/zero to map anon memory (ala
544 * SunOS).
545 */
546 if (vp->v_type == VCHR || vp->v_type == VSTR) {
547 (void)vnode_put(vp);
548 error = ENODEV;
549 goto bad;
550 } else {
551 /*
552 * Ensure that file and memory protections are
553 * compatible. Note that we only worry about
554 * writability if mapping is shared; in this case,
555 * current and max prot are dictated by the open file.
556 * XXX use the vnode instead? Problem is: what
557 * credentials do we use for determination? What if
558 * proc does a setuid?
559 */
560 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
561 if (fp->fp_glob->fg_flag & FREAD) {
562 maxprot |= VM_PROT_READ;
563 } else if (prot & PROT_READ) {
564 (void)vnode_put(vp);
565 error = EACCES;
566 goto bad;
567 }
568 /*
569 * If we are sharing potential changes (either via
570 * MAP_SHARED or via the implicit sharing of character
571 * device mappings), and we are trying to get write
572 * permission although we opened it without asking
573 * for it, bail out.
574 */
575
576 if ((flags & MAP_SHARED) != 0) {
577 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
578 /*
579 * Do not allow writable mappings of
580 * swap files (see vm_swapfile_pager.c).
581 */
582 !vnode_isswap(vp)) {
583 /*
584 * check for write access
585 *
586 * Note that we already made this check when granting FWRITE
587 * against the file, so it seems redundant here.
588 */
589 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
590
591 /* if not granted for any reason, but we wanted it, bad */
592 if ((prot & PROT_WRITE) && (error != 0)) {
593 vnode_put(vp);
594 goto bad;
595 }
596
597 /* if writable, remember */
598 if (error == 0) {
599 maxprot |= VM_PROT_WRITE;
600 }
601 } else if ((prot & PROT_WRITE) != 0) {
602 (void)vnode_put(vp);
603 error = EACCES;
604 goto bad;
605 }
606 } else {
607 maxprot |= VM_PROT_WRITE;
608 }
609
610 handle = (void *)vp;
611 #if CONFIG_MACF
612 error = mac_file_check_mmap(vfs_context_ucred(ctx),
613 fp->fp_glob, prot, flags, file_pos + pageoff,
614 &maxprot);
615 if (error) {
616 (void)vnode_put(vp);
617 goto bad;
618 }
619 #endif /* MAC */
620 /*
621 * Consult the file system to determine if this
622 * particular file object can be mapped.
623 *
624 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
625 * then we don't check for writeability on the file
626 * object, because it will only ever see reads.
627 */
628 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
629 (prot & ~PROT_WRITE) : prot, ctx);
630 if (error) {
631 (void)vnode_put(vp);
632 goto bad;
633 }
634 }
635
636 /*
637 * No copy-on-read for mmap() mappings themselves.
638 */
639 vmk_flags.vmkf_no_copy_on_read = 1;
640 }
641
642 if (user_size == 0) {
643 if (!mapanon) {
644 (void)vnode_put(vp);
645 }
646 error = 0;
647 goto bad;
648 }
649
650 /*
651 * We bend a little - round the start and end addresses
652 * to the nearest page boundary.
653 */
654 user_size = vm_map_round_page(user_size,
655 vm_map_page_mask(user_map));
656
657 if (file_pos & vm_map_page_mask(user_map)) {
658 if (!mapanon) {
659 (void)vnode_put(vp);
660 }
661 error = EINVAL;
662 goto bad;
663 }
664
665 if ((flags & MAP_FIXED) == 0) {
666 user_addr = vm_map_round_page(user_addr,
667 vm_map_page_mask(user_map));
668 } else {
669 if (user_addr != vm_map_trunc_page(user_addr,
670 vm_map_page_mask(user_map))) {
671 if (!mapanon) {
672 (void)vnode_put(vp);
673 }
674 error = EINVAL;
675 goto bad;
676 }
677 /*
678 * mmap(MAP_FIXED) will replace any existing mappings in the
679 * specified range, if the new mapping is successful.
680 * If we just deallocate the specified address range here,
681 * another thread might jump in and allocate memory in that
682 * range before we get a chance to establish the new mapping,
683 * and we won't have a chance to restore the old mappings.
684 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
685 * has to deallocate the existing mappings and establish the
686 * new ones atomically.
687 */
688 vmk_flags.vmf_fixed = true;
689 vmk_flags.vmf_overwrite = true;
690 }
691
692 if (flags & MAP_NOCACHE) {
693 vmk_flags.vmf_no_cache = true;
694 }
695
696 if (flags & MAP_JIT) {
697 vmk_flags.vmkf_map_jit = TRUE;
698 }
699
700 if (flags & MAP_TPRO) {
701 vmk_flags.vmf_tpro = true;
702 }
703
704 #if CONFIG_ROSETTA
705 if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
706 if (!proc_is_translated(p)) {
707 if (!mapanon) {
708 (void)vnode_put(vp);
709 }
710 error = EINVAL;
711 goto bad;
712 }
713 vmk_flags.vmkf_translated_allow_execute = TRUE;
714 }
715 #endif
716
717 if (flags & MAP_RESILIENT_CODESIGN) {
718 vmk_flags.vmf_resilient_codesign = true;
719 }
720 if (flags & MAP_RESILIENT_MEDIA) {
721 vmk_flags.vmf_resilient_media = true;
722 }
723
724 #if XNU_TARGET_OS_OSX
725 /* macOS-specific MAP_32BIT flag handling */
726 if (flags & MAP_32BIT) {
727 vmk_flags.vmkf_32bit_map_va = TRUE;
728 }
729 #endif
730
731 /*
732 * Lookup/allocate object.
733 */
734 if (handle == NULL) {
735 control = NULL;
736 #ifdef notyet
737 /* Hmm .. */
738 #if defined(VM_PROT_READ_IS_EXEC)
739 if (prot & VM_PROT_READ) {
740 prot |= VM_PROT_EXECUTE;
741 }
742 if (maxprot & VM_PROT_READ) {
743 maxprot |= VM_PROT_EXECUTE;
744 }
745 #endif
746 #endif
747
748 #if 3777787
749 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
750 prot |= VM_PROT_READ;
751 }
752 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
753 maxprot |= VM_PROT_READ;
754 }
755 #endif /* radar 3777787 */
756 map_anon_retry:
757
758 result = vm_map_enter_mem_object(user_map,
759 &user_addr, user_size,
760 0, vmk_flags,
761 IPC_PORT_NULL, 0, FALSE,
762 prot, maxprot,
763 (flags & MAP_SHARED) ?
764 VM_INHERIT_SHARE :
765 VM_INHERIT_DEFAULT);
766
767 /* If a non-binding address was specified for this anonymous
768 * mapping, retry the mapping with a zero base
769 * in the event the mapping operation failed due to
770 * lack of space between the address and the map's maximum.
771 */
772 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
773 user_addr = vm_map_page_size(user_map);
774 goto map_anon_retry;
775 }
776 } else {
777 if (vnode_isswap(vp)) {
778 /*
779 * Map swap files with a special pager
780 * that returns obfuscated contents.
781 */
782 control = NULL;
783 pager = swapfile_pager_setup(vp);
784 if (pager != MEMORY_OBJECT_NULL) {
785 control = swapfile_pager_control(pager);
786 }
787 } else {
788 control = ubc_getobject(vp, UBC_FLAGS_NONE);
789 }
790
791 if (control == NULL) {
792 (void)vnode_put(vp);
793 error = ENOMEM;
794 goto bad;
795 }
796
797 #if MACH_ASSERT
798 #define FBDP_PATH_NAME "/private/var/db/timezone/tz/2022a.1.1/icutz/"
799 #define FBDP_FILE_NAME "icutz44l.dat"
800 if (fbdp_vp == NULL &&
801 !strncmp(vp->v_name, FBDP_FILE_NAME, strlen(FBDP_FILE_NAME))) {
802 char *path;
803 int len;
804 len = MAXPATHLEN;
805 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
806 vn_getpath(vp, path, &len);
807 if (!strncmp(path, FBDP_PATH_NAME, strlen(FBDP_PATH_NAME))) {
808 fbdp_vp = vp;
809 memory_object_mark_for_fbdp(control);
810 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as 'fbdp'\n", __FUNCTION__, __LINE__, vp, path, control);
811 }
812 zfree(ZV_NAMEI, path);
813 }
814 #endif /* MACH_ASSERT */
815
816 /*
817 * Set credentials:
818 * FIXME: if we're writing the file we need a way to
819 * ensure that someone doesn't replace our R/W creds
820 * with ones that only work for read.
821 */
822
823 ubc_setthreadcred(vp, p, current_thread());
824 docow = FALSE;
825 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
826 docow = TRUE;
827 }
828
829 #ifdef notyet
830 /* Hmm .. */
831 #if defined(VM_PROT_READ_IS_EXEC)
832 if (prot & VM_PROT_READ) {
833 prot |= VM_PROT_EXECUTE;
834 }
835 if (maxprot & VM_PROT_READ) {
836 maxprot |= VM_PROT_EXECUTE;
837 }
838 #endif
839 #endif /* notyet */
840
841 #if 3777787
842 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
843 prot |= VM_PROT_READ;
844 }
845 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
846 maxprot |= VM_PROT_READ;
847 }
848 #endif /* radar 3777787 */
849
850 map_file_retry:
851 if (flags & MAP_RESILIENT_CODESIGN) {
852 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
853 if (prot & reject_prot) {
854 /*
855 * Would like to use (prot | maxprot) here
856 * but the assignment of VM_PROT_EXECUTE
857 * to maxprot above would always fail the test.
858 *
859 * Skipping the check is ok, however, because we
860 * restrict maxprot to prot just below in this
861 * block.
862 */
863 assert(!mapanon);
864 vnode_put(vp);
865 error = EPERM;
866 goto bad;
867 }
868 /* strictly limit access to "prot" */
869 maxprot &= prot;
870 }
871
872 vm_object_offset_t end_pos = 0;
873 if (os_add_overflow(user_size, file_pos, &end_pos)) {
874 vnode_put(vp);
875 error = EINVAL;
876 goto bad;
877 }
878
879 result = vm_map_enter_mem_object_control(user_map,
880 &user_addr, user_size,
881 0, vmk_flags,
882 control, file_pos,
883 docow, prot, maxprot,
884 (flags & MAP_SHARED) ?
885 VM_INHERIT_SHARE :
886 VM_INHERIT_DEFAULT);
887
888 /* If a non-binding address was specified for this file backed
889 * mapping, retry the mapping with a zero base
890 * in the event the mapping operation failed due to
891 * lack of space between the address and the map's maximum.
892 */
893 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
894 user_addr = vm_map_page_size(user_map);
895 goto map_file_retry;
896 }
897 }
898
899 if (!mapanon) {
900 (void)vnode_put(vp);
901 }
902
903 switch (result) {
904 case KERN_SUCCESS:
905 *retval = user_addr + pageoff;
906 error = 0;
907 break;
908 case KERN_INVALID_ADDRESS:
909 case KERN_NO_SPACE:
910 error = ENOMEM;
911 break;
912 case KERN_PROTECTION_FAILURE:
913 error = EACCES;
914 break;
915 default:
916 error = EINVAL;
917 break;
918 }
919 bad:
920 if (pager != MEMORY_OBJECT_NULL) {
921 /*
922 * Release the reference on the pager.
923 * If the mapping was successful, it now holds
924 * an extra reference.
925 */
926 memory_object_deallocate(pager);
927 }
928 if (fpref) {
929 fp_drop(p, fd, fp, 0);
930 }
931
932 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
933 #if XNU_TARGET_OS_OSX
934 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
935 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
936 #endif /* XNU_TARGET_OS_OSX */
937 return error;
938 }
939
940 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)941 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
942 {
943 __pthread_testcancel(1);
944 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
945 }
946
947 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)948 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
949 {
950 mach_vm_offset_t addr;
951 mach_vm_size_t size;
952 int flags;
953 vm_map_t user_map;
954 int rv;
955 vm_sync_t sync_flags = 0;
956
957 user_map = current_map();
958 addr = (mach_vm_offset_t) uap->addr;
959 size = (mach_vm_size_t) uap->len;
960 #if XNU_TARGET_OS_OSX
961 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
962 #endif /* XNU_TARGET_OS_OSX */
963 if (mach_vm_range_overflows(addr, size)) {
964 return EINVAL;
965 }
966 if (addr & vm_map_page_mask(user_map)) {
967 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
968 return EINVAL;
969 }
970 if (size == 0) {
971 /*
972 * We cannot support this properly without maintaining
973 * list all mmaps done. Cannot use vm_map_entry as they could be
974 * split or coalesced by indepenedant actions. So instead of
975 * inaccurate results, lets just return error as invalid size
976 * specified
977 */
978 return EINVAL; /* XXX breaks posix apps */
979 }
980
981 flags = uap->flags;
982 /* disallow contradictory flags */
983 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
984 return EINVAL;
985 }
986
987 if (flags & MS_KILLPAGES) {
988 sync_flags |= VM_SYNC_KILLPAGES;
989 }
990 if (flags & MS_DEACTIVATE) {
991 sync_flags |= VM_SYNC_DEACTIVATE;
992 }
993 if (flags & MS_INVALIDATE) {
994 sync_flags |= VM_SYNC_INVALIDATE;
995 }
996
997 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
998 if (flags & MS_ASYNC) {
999 sync_flags |= VM_SYNC_ASYNCHRONOUS;
1000 } else {
1001 sync_flags |= VM_SYNC_SYNCHRONOUS;
1002 }
1003 }
1004
1005 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
1006
1007 rv = mach_vm_msync(user_map, addr, size, sync_flags);
1008
1009 switch (rv) {
1010 case KERN_SUCCESS:
1011 break;
1012 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
1013 return ENOMEM;
1014 case KERN_FAILURE:
1015 return EIO;
1016 default:
1017 return EINVAL;
1018 }
1019 return 0;
1020 }
1021
1022
1023 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1024 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1025 {
1026 mach_vm_offset_t user_addr;
1027 mach_vm_size_t user_size;
1028 kern_return_t result;
1029 vm_map_t user_map;
1030
1031 user_map = current_map();
1032 user_addr = (mach_vm_offset_t) uap->addr;
1033 user_size = (mach_vm_size_t) uap->len;
1034
1035 AUDIT_ARG(addr, user_addr);
1036 AUDIT_ARG(len, user_size);
1037
1038 if (user_addr & vm_map_page_mask(user_map)) {
1039 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1040 return EINVAL;
1041 }
1042
1043 if (mach_vm_range_overflows(user_addr, user_size)) {
1044 return EINVAL;
1045 }
1046
1047 if (user_size == 0) {
1048 /* UNIX SPEC: size is 0, return EINVAL */
1049 return EINVAL;
1050 }
1051
1052 result = mach_vm_deallocate(user_map, user_addr, user_size);
1053 if (result != KERN_SUCCESS) {
1054 return EINVAL;
1055 }
1056 return 0;
1057 }
1058
1059 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1060 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1061 {
1062 vm_prot_t prot;
1063 mach_vm_offset_t user_addr;
1064 mach_vm_size_t user_size;
1065 kern_return_t result;
1066 vm_map_t user_map;
1067 #if CONFIG_MACF
1068 int error;
1069 #endif
1070
1071 AUDIT_ARG(addr, uap->addr);
1072 AUDIT_ARG(len, uap->len);
1073 AUDIT_ARG(value32, uap->prot);
1074
1075 user_map = current_map();
1076 user_addr = (mach_vm_offset_t) uap->addr;
1077 user_size = (mach_vm_size_t) uap->len;
1078 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1079
1080 if (mach_vm_range_overflows(user_addr, user_size)) {
1081 return EINVAL;
1082 }
1083 if (user_addr & vm_map_page_mask(user_map)) {
1084 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1085 return EINVAL;
1086 }
1087
1088 #ifdef notyet
1089 /* Hmm .. */
1090 #if defined(VM_PROT_READ_IS_EXEC)
1091 if (prot & VM_PROT_READ) {
1092 prot |= VM_PROT_EXECUTE;
1093 }
1094 #endif
1095 #endif /* notyet */
1096
1097 #if 3936456
1098 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1099 prot |= VM_PROT_READ;
1100 }
1101 #endif /* 3936456 */
1102
1103 #if CONFIG_MACF
1104 /*
1105 * The MAC check for mprotect is of limited use for 2 reasons:
1106 * Without mmap revocation, the caller could have asked for the max
1107 * protections initially instead of a reduced set, so a mprotect
1108 * check would offer no new security.
1109 * It is not possible to extract the vnode from the pager object(s)
1110 * of the target memory range.
1111 * However, the MAC check may be used to prevent a process from,
1112 * e.g., making the stack executable.
1113 */
1114 error = mac_proc_check_mprotect(p, user_addr,
1115 user_size, prot);
1116 if (error) {
1117 return error;
1118 }
1119 #endif
1120
1121 if (prot & VM_PROT_TRUSTED) {
1122 #if CONFIG_DYNAMIC_CODE_SIGNING
1123 /* CODE SIGNING ENFORCEMENT - JIT support */
1124 /* The special protection value VM_PROT_TRUSTED requests that we treat
1125 * this page as if it had a valid code signature.
1126 * If this is enabled, there MUST be a MAC policy implementing the
1127 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1128 * compromised because the check would always succeed and thusly any
1129 * process could sign dynamically. */
1130 result = vm_map_sign(
1131 user_map,
1132 vm_map_trunc_page(user_addr,
1133 vm_map_page_mask(user_map)),
1134 vm_map_round_page(user_addr + user_size,
1135 vm_map_page_mask(user_map)));
1136 switch (result) {
1137 case KERN_SUCCESS:
1138 break;
1139 case KERN_INVALID_ADDRESS:
1140 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1141 return ENOMEM;
1142 default:
1143 return EINVAL;
1144 }
1145 #else
1146 return ENOTSUP;
1147 #endif
1148 }
1149 prot &= ~VM_PROT_TRUSTED;
1150
1151 result = mach_vm_protect(user_map, user_addr, user_size,
1152 FALSE, prot);
1153 switch (result) {
1154 case KERN_SUCCESS:
1155 return 0;
1156 case KERN_PROTECTION_FAILURE:
1157 return EACCES;
1158 case KERN_INVALID_ADDRESS:
1159 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1160 return ENOMEM;
1161 }
1162 return EINVAL;
1163 }
1164
1165
1166 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1167 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1168 {
1169 mach_vm_offset_t addr;
1170 mach_vm_size_t size;
1171 vm_inherit_t inherit;
1172 vm_map_t user_map;
1173 kern_return_t result;
1174
1175 AUDIT_ARG(addr, uap->addr);
1176 AUDIT_ARG(len, uap->len);
1177 AUDIT_ARG(value32, uap->inherit);
1178
1179 addr = (mach_vm_offset_t)uap->addr;
1180 size = (mach_vm_size_t)uap->len;
1181 inherit = uap->inherit;
1182 if (mach_vm_range_overflows(addr, size)) {
1183 return EINVAL;
1184 }
1185 user_map = current_map();
1186 result = mach_vm_inherit(user_map, addr, size,
1187 inherit);
1188 switch (result) {
1189 case KERN_SUCCESS:
1190 return 0;
1191 case KERN_PROTECTION_FAILURE:
1192 return EACCES;
1193 }
1194 return EINVAL;
1195 }
1196
1197 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1198 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1199 {
1200 vm_map_t user_map;
1201 mach_vm_offset_t start;
1202 mach_vm_size_t size;
1203 vm_behavior_t new_behavior;
1204 kern_return_t result;
1205
1206 /*
1207 * Since this routine is only advisory, we default to conservative
1208 * behavior.
1209 */
1210 switch (uap->behav) {
1211 case MADV_RANDOM:
1212 new_behavior = VM_BEHAVIOR_RANDOM;
1213 break;
1214 case MADV_SEQUENTIAL:
1215 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1216 break;
1217 case MADV_NORMAL:
1218 new_behavior = VM_BEHAVIOR_DEFAULT;
1219 break;
1220 case MADV_WILLNEED:
1221 new_behavior = VM_BEHAVIOR_WILLNEED;
1222 break;
1223 case MADV_DONTNEED:
1224 new_behavior = VM_BEHAVIOR_DONTNEED;
1225 break;
1226 case MADV_FREE:
1227 new_behavior = VM_BEHAVIOR_FREE;
1228 break;
1229 case MADV_ZERO_WIRED_PAGES:
1230 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1231 break;
1232 case MADV_FREE_REUSABLE:
1233 new_behavior = VM_BEHAVIOR_REUSABLE;
1234 break;
1235 case MADV_FREE_REUSE:
1236 new_behavior = VM_BEHAVIOR_REUSE;
1237 break;
1238 case MADV_CAN_REUSE:
1239 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1240 break;
1241 case MADV_PAGEOUT:
1242 #if MACH_ASSERT
1243 new_behavior = VM_BEHAVIOR_PAGEOUT;
1244 break;
1245 #else /* MACH_ASSERT */
1246 return ENOTSUP;
1247 #endif /* MACH_ASSERT */
1248 default:
1249 return EINVAL;
1250 }
1251
1252 start = (mach_vm_offset_t) uap->addr;
1253 size = (mach_vm_size_t) uap->len;
1254 if (mach_vm_range_overflows(start, size)) {
1255 return EINVAL;
1256 }
1257 #if __arm64__
1258 if (start == 0 &&
1259 size != 0 &&
1260 (uap->behav == MADV_FREE ||
1261 uap->behav == MADV_FREE_REUSABLE)) {
1262 printf("** FOURK_COMPAT: %d[%s] "
1263 "failing madvise(0x%llx,0x%llx,%s)\n",
1264 proc_getpid(p), p->p_comm, start, size,
1265 ((uap->behav == MADV_FREE_REUSABLE)
1266 ? "MADV_FREE_REUSABLE"
1267 : "MADV_FREE"));
1268 DTRACE_VM3(fourk_compat_madvise,
1269 uint64_t, start,
1270 uint64_t, size,
1271 int, uap->behav);
1272 return EINVAL;
1273 }
1274 #endif /* __arm64__ */
1275
1276 user_map = current_map();
1277
1278 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1279 switch (result) {
1280 case KERN_SUCCESS:
1281 return 0;
1282 case KERN_INVALID_ADDRESS:
1283 return EINVAL;
1284 case KERN_NO_SPACE:
1285 return ENOMEM;
1286 }
1287
1288 return EINVAL;
1289 }
1290
1291 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1292 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1293 {
1294 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1295 vm_map_t map = VM_MAP_NULL;
1296 user_addr_t vec = 0;
1297 int error = 0;
1298 int64_t lastvecindex = 0;
1299 int mincoreinfo = 0;
1300 int pqueryinfo = 0;
1301 uint64_t pqueryinfo_vec_size = 0;
1302 vm_page_info_basic_t info = NULL;
1303 mach_msg_type_number_t count = 0;
1304 char *kernel_vec = NULL;
1305 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1306 kern_return_t kr = KERN_SUCCESS;
1307 int effective_page_shift, effective_page_size;
1308
1309 map = current_map();
1310
1311 /*
1312 * On systems with 4k kernel space and 16k user space, we will
1313 * use the kernel page size to report back the residency information.
1314 * This is for backwards compatibility since we already have
1315 * processes that depend on this behavior.
1316 */
1317 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1318 effective_page_shift = vm_map_page_shift(map);
1319 effective_page_size = vm_map_page_size(map);
1320 } else {
1321 effective_page_shift = PAGE_SHIFT;
1322 effective_page_size = PAGE_SIZE;
1323 }
1324
1325 /*
1326 * Make sure that the addresses presented are valid for user
1327 * mode.
1328 */
1329 first_addr = addr = vm_map_trunc_page(uap->addr,
1330 vm_map_page_mask(map));
1331 end = vm_map_round_page(uap->addr + uap->len,
1332 vm_map_page_mask(map));
1333
1334 if (end < addr) {
1335 return EINVAL;
1336 }
1337
1338 if (end == addr) {
1339 return 0;
1340 }
1341
1342 /*
1343 * We are going to loop through the whole 'req_vec_size' pages
1344 * range in chunks of 'cur_vec_size'.
1345 */
1346
1347 req_vec_size_pages = (end - addr) >> effective_page_shift;
1348 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1349 size_t kernel_vec_size = cur_vec_size_pages;
1350
1351 kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1352
1353 if (kernel_vec == NULL) {
1354 return ENOMEM;
1355 }
1356
1357 /*
1358 * Address of byte vector
1359 */
1360 vec = uap->vec;
1361
1362 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1363
1364 info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1365
1366 if (info == NULL) {
1367 kfree_data(kernel_vec, kernel_vec_size);
1368 return ENOMEM;
1369 }
1370
1371 while (addr < end) {
1372 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1373
1374 count = VM_PAGE_INFO_BASIC_COUNT;
1375 kr = vm_map_page_range_info_internal(map,
1376 addr,
1377 cur_end,
1378 effective_page_shift,
1379 VM_PAGE_INFO_BASIC,
1380 (vm_page_info_t) info,
1381 &count);
1382
1383 assert(kr == KERN_SUCCESS);
1384
1385 /*
1386 * Do this on a map entry basis so that if the pages are not
1387 * in the current processes address space, we can easily look
1388 * up the pages elsewhere.
1389 */
1390 lastvecindex = -1;
1391
1392 for (; addr < cur_end; addr += effective_page_size) {
1393 pqueryinfo = info[lastvecindex + 1].disposition;
1394
1395 mincoreinfo = 0;
1396
1397 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1398 mincoreinfo |= MINCORE_INCORE;
1399 }
1400 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1401 mincoreinfo |= MINCORE_REFERENCED;
1402 }
1403 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1404 mincoreinfo |= MINCORE_MODIFIED;
1405 }
1406 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1407 mincoreinfo |= MINCORE_PAGED_OUT;
1408 }
1409 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1410 mincoreinfo |= MINCORE_COPIED;
1411 }
1412 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1413 mincoreinfo |= MINCORE_ANONYMOUS;
1414 }
1415 /*
1416 * calculate index into user supplied byte vector
1417 */
1418 vecindex = (addr - first_addr) >> effective_page_shift;
1419 kernel_vec[vecindex] = (char)mincoreinfo;
1420 lastvecindex = vecindex;
1421 }
1422
1423
1424 assert(vecindex == (cur_vec_size_pages - 1));
1425
1426 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1427
1428 if (error) {
1429 break;
1430 }
1431
1432 /*
1433 * For the next chunk, we'll need:
1434 * - bump the location in the user buffer for our next disposition.
1435 * - new length
1436 * - starting address
1437 */
1438 vec += cur_vec_size_pages * sizeof(char);
1439 req_vec_size_pages = (end - addr) >> effective_page_shift;
1440 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1441
1442 first_addr = addr;
1443 }
1444
1445 kfree_data(info, pqueryinfo_vec_size);
1446 kfree_data(kernel_vec, kernel_vec_size);
1447
1448 if (error) {
1449 return EFAULT;
1450 }
1451
1452 return 0;
1453 }
1454
1455 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1456 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1457 {
1458 vm_map_t user_map;
1459 vm_map_offset_t addr;
1460 vm_map_size_t size, pageoff;
1461 kern_return_t result;
1462
1463 AUDIT_ARG(addr, uap->addr);
1464 AUDIT_ARG(len, uap->len);
1465
1466 addr = (vm_map_offset_t) uap->addr;
1467 size = (vm_map_size_t)uap->len;
1468
1469 if (vm_map_range_overflows(addr, size)) {
1470 return EINVAL;
1471 }
1472
1473 if (size == 0) {
1474 return 0;
1475 }
1476
1477 user_map = current_map();
1478 pageoff = (addr & vm_map_page_mask(user_map));
1479 addr -= pageoff;
1480 size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1481
1482 /* have to call vm_map_wire directly to pass "I don't know" protections */
1483 result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1484
1485 if (result == KERN_RESOURCE_SHORTAGE) {
1486 return EAGAIN;
1487 } else if (result == KERN_PROTECTION_FAILURE) {
1488 return EACCES;
1489 } else if (result != KERN_SUCCESS) {
1490 return ENOMEM;
1491 }
1492
1493 return 0; /* KERN_SUCCESS */
1494 }
1495
1496 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1497 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1498 {
1499 mach_vm_offset_t addr;
1500 mach_vm_size_t size;
1501 vm_map_t user_map;
1502 kern_return_t result;
1503
1504 AUDIT_ARG(addr, uap->addr);
1505 AUDIT_ARG(len, uap->len);
1506
1507 addr = (mach_vm_offset_t) uap->addr;
1508 size = (mach_vm_size_t)uap->len;
1509 user_map = current_map();
1510 if (mach_vm_range_overflows(addr, size)) {
1511 return EINVAL;
1512 }
1513 /* JMM - need to remove all wirings by spec - this just removes one */
1514 result = mach_vm_wire_kernel(user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1515 return result == KERN_SUCCESS ? 0 : ENOMEM;
1516 }
1517
1518
1519 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1520 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1521 {
1522 return ENOSYS;
1523 }
1524
1525 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1526 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1527 {
1528 return ENOSYS;
1529 }
1530
1531 #if CONFIG_CODE_DECRYPTION
1532 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1533 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1534 {
1535 mach_vm_offset_t user_addr;
1536 mach_vm_size_t user_size;
1537 kern_return_t result;
1538 vm_map_t user_map;
1539 uint32_t cryptid;
1540 cpu_type_t cputype;
1541 cpu_subtype_t cpusubtype;
1542 pager_crypt_info_t crypt_info;
1543 const char * cryptname = 0;
1544 char *vpath;
1545 int len, ret;
1546 struct proc_regioninfo_internal pinfo;
1547 vnode_t vp;
1548 uintptr_t vnodeaddr;
1549 uint32_t vid;
1550
1551 AUDIT_ARG(addr, uap->addr);
1552 AUDIT_ARG(len, uap->len);
1553
1554 user_map = current_map();
1555 user_addr = (mach_vm_offset_t) uap->addr;
1556 user_size = (mach_vm_size_t) uap->len;
1557
1558 cryptid = uap->cryptid;
1559 cputype = uap->cputype;
1560 cpusubtype = uap->cpusubtype;
1561
1562 if (mach_vm_range_overflows(user_addr, user_size)) {
1563 return EINVAL;
1564 }
1565 if (user_addr & vm_map_page_mask(user_map)) {
1566 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1567 return EINVAL;
1568 }
1569
1570 switch (cryptid) {
1571 case CRYPTID_NO_ENCRYPTION:
1572 /* not encrypted, just an empty load command */
1573 return 0;
1574 case CRYPTID_APP_ENCRYPTION:
1575 case CRYPTID_MODEL_ENCRYPTION:
1576 cryptname = "com.apple.unfree";
1577 break;
1578 case 0x10:
1579 /* some random cryptid that you could manually put into
1580 * your binary if you want NULL */
1581 cryptname = "com.apple.null";
1582 break;
1583 default:
1584 return EINVAL;
1585 }
1586
1587 if (NULL == text_crypter_create) {
1588 return ENOTSUP;
1589 }
1590
1591 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1592 if (ret == 0 || !vnodeaddr) {
1593 /* No really, this returns 0 if the memory address is not backed by a file */
1594 return EINVAL;
1595 }
1596
1597 vp = (vnode_t)vnodeaddr;
1598 if ((vnode_getwithvid(vp, vid)) == 0) {
1599 vpath = zalloc(ZV_NAMEI);
1600
1601 len = MAXPATHLEN;
1602 ret = vn_getpath(vp, vpath, &len);
1603 if (ret) {
1604 zfree(ZV_NAMEI, vpath);
1605 vnode_put(vp);
1606 return ret;
1607 }
1608
1609 vnode_put(vp);
1610 } else {
1611 return EINVAL;
1612 }
1613
1614 #if 0
1615 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1616 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1617 #endif
1618
1619 if (user_size == 0) {
1620 printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1621 zfree(ZV_NAMEI, vpath);
1622 return 0;
1623 }
1624
1625 /* set up decrypter first */
1626 crypt_file_data_t crypt_data = {
1627 .filename = vpath,
1628 .cputype = cputype,
1629 .cpusubtype = cpusubtype,
1630 .origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1631 };
1632 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1633 #if VM_MAP_DEBUG_APPLE_PROTECT
1634 if (vm_map_debug_apple_protect) {
1635 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1636 proc_getpid(p), p->p_comm,
1637 user_map,
1638 (uint64_t) user_addr,
1639 (uint64_t) (user_addr + user_size),
1640 __FUNCTION__, vpath, result);
1641 }
1642 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1643 zfree(ZV_NAMEI, vpath);
1644
1645 if (result) {
1646 printf("%s: unable to create decrypter %s, kr=%d\n",
1647 __FUNCTION__, cryptname, result);
1648 if (result == kIOReturnNotPrivileged) {
1649 /* text encryption returned decryption failure */
1650 return EPERM;
1651 } else {
1652 return ENOMEM;
1653 }
1654 }
1655
1656 /* now remap using the decrypter */
1657 vm_object_offset_t crypto_backing_offset;
1658 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1659 result = vm_map_apple_protected(user_map,
1660 user_addr,
1661 user_addr + user_size,
1662 crypto_backing_offset,
1663 &crypt_info,
1664 cryptid);
1665 if (result) {
1666 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1667 }
1668
1669 if (result) {
1670 return EPERM;
1671 }
1672 return 0;
1673 }
1674 #endif /* CONFIG_CODE_DECRYPTION */
1675