1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117
118 #include <machine/machine_routines.h>
119
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124
125 #include <IOKit/IOReturn.h>
126 #include <IOKit/IOBSD.h>
127
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132
133 #if CONFIG_MACF
134 #include <security/mac_framework.h>
135 #endif
136 #include <os/overflow.h>
137
138 /*
139 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140 * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
141 * compiled with an SDK from fall of 2020 or later?""
142 */
143 static bool
proc_2020_fall_os_sdk_or_later(void)144 proc_2020_fall_os_sdk_or_later(void)
145 {
146 const uint32_t proc_sdk_ver = proc_sdk(current_proc());
147
148 switch (proc_platform(current_proc())) {
149 case PLATFORM_MACOS:
150 return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
151 case PLATFORM_IOS:
152 case PLATFORM_IOSSIMULATOR:
153 case PLATFORM_MACCATALYST:
154 return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
155 case PLATFORM_BRIDGEOS:
156 return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
157 case PLATFORM_TVOS:
158 case PLATFORM_TVOSSIMULATOR:
159 return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
160 case PLATFORM_WATCHOS:
161 case PLATFORM_WATCHOSSIMULATOR:
162 return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
163 default:
164 /*
165 * tough call, but let's give new platforms the benefit of the doubt
166 * to avoid a re-occurence of rdar://89843927
167 */
168 return true;
169 }
170 }
171
172 /*
173 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
174 * XXX usage is PROT_* from an interface perspective. Thus the values of
175 * XXX VM_PROT_* and PROT_* need to correspond.
176 */
177 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)178 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
179 {
180 /*
181 * Map in special device (must be SHARED) or file
182 */
183 struct fileproc *fp;
184 struct vnode *vp;
185 int flags;
186 int prot;
187 int err = 0;
188 vm_map_t user_map;
189 kern_return_t result;
190 vm_map_offset_t user_addr;
191 vm_map_offset_t sum;
192 vm_map_size_t user_size;
193 vm_object_offset_t pageoff;
194 vm_object_offset_t file_pos;
195 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
196 boolean_t docow;
197 vm_prot_t maxprot;
198 void *handle;
199 memory_object_t pager = MEMORY_OBJECT_NULL;
200 memory_object_control_t control;
201 int mapanon = 0;
202 int fpref = 0;
203 int error = 0;
204 int fd = uap->fd;
205 int num_retries = 0;
206
207 /*
208 * Note that for UNIX03 conformance, there is additional parameter checking for
209 * mmap() system call in libsyscall prior to entering the kernel. The sanity
210 * checks and argument validation done in this function are not the only places
211 * one can get returned errnos.
212 */
213
214 user_map = current_map();
215 user_addr = (vm_map_offset_t)uap->addr;
216 user_size = (vm_map_size_t) uap->len;
217
218 AUDIT_ARG(addr, user_addr);
219 AUDIT_ARG(len, user_size);
220 AUDIT_ARG(fd, uap->fd);
221
222 if (vm_map_range_overflows(user_map, user_addr, user_size)) {
223 return EINVAL;
224 }
225 prot = (uap->prot & VM_PROT_ALL);
226 #if 3777787
227 /*
228 * Since the hardware currently does not support writing without
229 * read-before-write, or execution-without-read, if the request is
230 * for write or execute access, we must imply read access as well;
231 * otherwise programs expecting this to work will fail to operate.
232 */
233 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
234 prot |= VM_PROT_READ;
235 }
236 #endif /* radar 3777787 */
237
238 flags = uap->flags;
239 vp = NULLVP;
240
241 /*
242 * verify no unknown flags are passed in, and if any are,
243 * fail out early to make sure the logic below never has to deal
244 * with invalid flag values. only do so for processes compiled
245 * with Fall 2020 or later SDK, which is where we drew this
246 * line and documented it as such.
247 */
248 if (flags & ~(MAP_SHARED |
249 MAP_PRIVATE |
250 MAP_COPY |
251 MAP_FIXED |
252 MAP_RENAME |
253 MAP_NORESERVE |
254 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
255 MAP_NOEXTEND |
256 MAP_HASSEMAPHORE |
257 MAP_NOCACHE |
258 MAP_JIT |
259 MAP_TPRO |
260 MAP_FILE |
261 MAP_ANON |
262 MAP_RESILIENT_CODESIGN |
263 MAP_RESILIENT_MEDIA |
264 #if XNU_TARGET_OS_OSX
265 MAP_32BIT |
266 #endif
267 MAP_TRANSLATED_ALLOW_EXECUTE |
268 MAP_UNIX03)) {
269 if (proc_2020_fall_os_sdk_or_later()) {
270 return EINVAL;
271 }
272 }
273
274
275 /*
276 * The vm code does not have prototypes & compiler doesn't do
277 * the right thing when you cast 64bit value and pass it in function
278 * call. So here it is.
279 */
280 file_pos = (vm_object_offset_t)uap->pos;
281
282
283 /* make sure mapping fits into numeric range etc */
284 if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
285 return EINVAL;
286 }
287
288 if (flags & MAP_UNIX03) {
289 vm_map_offset_t offset_alignment_mask;
290
291 /*
292 * Enforce UNIX03 compliance.
293 */
294
295 if (vm_map_is_exotic(current_map())) {
296 offset_alignment_mask = 0xFFF;
297 } else {
298 offset_alignment_mask = vm_map_page_mask(current_map());
299 }
300 if (file_pos & offset_alignment_mask) {
301 /* file offset should be page-aligned */
302 return EINVAL;
303 }
304 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
305 /* need either MAP_PRIVATE or MAP_SHARED */
306 return EINVAL;
307 }
308 if (user_size == 0) {
309 /* mapping length should not be 0 */
310 return EINVAL;
311 }
312 }
313
314 /*
315 * Align the file position to a page boundary,
316 * and save its page offset component.
317 */
318 pageoff = (file_pos & vm_map_page_mask(user_map));
319 file_pos -= (vm_object_offset_t)pageoff;
320
321
322 /* Adjust size for rounding (on both ends). */
323 user_size += pageoff; /* low end... */
324 user_size = vm_map_round_page(user_size,
325 vm_map_page_mask(user_map)); /* hi end */
326
327
328 if (flags & MAP_JIT) {
329 if ((flags & MAP_FIXED) ||
330 (flags & MAP_SHARED) ||
331 !(flags & MAP_ANON) ||
332 (flags & MAP_RESILIENT_CODESIGN) ||
333 (flags & MAP_RESILIENT_MEDIA) ||
334 (flags & MAP_TPRO)) {
335 return EINVAL;
336 }
337 }
338
339 if ((flags & MAP_RESILIENT_CODESIGN) ||
340 (flags & MAP_RESILIENT_MEDIA)) {
341 if ((flags & MAP_ANON) ||
342 (flags & MAP_JIT) ||
343 (flags & MAP_TPRO)) {
344 return EINVAL;
345 }
346 }
347 if (flags & MAP_RESILIENT_CODESIGN) {
348 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
349 if (prot & reject_prot) {
350 /*
351 * Quick sanity check. maxprot is calculated below and
352 * we will test it again.
353 */
354 return EPERM;
355 }
356 }
357 if (flags & MAP_SHARED) {
358 /*
359 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
360 * there is no place to inject zero-filled pages without
361 * actually adding them to the file.
362 * Since we didn't reject that combination before, there might
363 * already be callers using it and getting a valid MAP_SHARED
364 * mapping but without the resilience.
365 * For backwards compatibility's sake, let's keep ignoring
366 * MAP_RESILIENT_MEDIA in that case.
367 */
368 flags &= ~MAP_RESILIENT_MEDIA;
369 }
370 if (flags & MAP_RESILIENT_MEDIA) {
371 if ((flags & MAP_ANON) ||
372 (flags & MAP_SHARED)) {
373 return EINVAL;
374 }
375 }
376 if (flags & MAP_TPRO) {
377 /*
378 * MAP_TPRO without VM_PROT_WRITE is not valid here because
379 * the TPRO mapping is handled at the PMAP layer with implicit RW
380 * protections.
381 *
382 * This would enable bypassing of file-based protections, i.e.
383 * a file open/mapped as read-only could be written to.
384 */
385 if ((prot & VM_PROT_EXECUTE) ||
386 !(prot & VM_PROT_WRITE)) {
387 return EPERM;
388 }
389 }
390
391 /*
392 * Check for illegal addresses. Watch out for address wrap... Note
393 * that VM_*_ADDRESS are not constants due to casts (argh).
394 */
395 if (flags & MAP_FIXED) {
396 /*
397 * The specified address must have the same remainder
398 * as the file offset taken modulo PAGE_SIZE, so it
399 * should be aligned after adjustment by pageoff.
400 */
401 user_addr -= pageoff;
402 if (user_addr & vm_map_page_mask(user_map)) {
403 return EINVAL;
404 }
405 }
406 #ifdef notyet
407 /* DO not have apis to get this info, need to wait till then*/
408 /*
409 * XXX for non-fixed mappings where no hint is provided or
410 * the hint would fall in the potential heap space,
411 * place it after the end of the largest possible heap.
412 *
413 * There should really be a pmap call to determine a reasonable
414 * location.
415 */
416 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
417 vm_map_page_mask(user_map))) {
418 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
419 vm_map_page_mask(user_map));
420 }
421
422 #endif
423
424 /* Entitlement check against code signing monitor */
425 if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(user_map) != KERN_SUCCESS)) {
426 printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
427 return EPERM;
428 }
429
430 if (flags & MAP_ANON) {
431 maxprot = VM_PROT_ALL;
432 #if CONFIG_MACF
433 /*
434 * Entitlement check.
435 */
436 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
437 if (error) {
438 return EINVAL;
439 }
440 #endif /* MAC */
441
442 /*
443 * Mapping blank space is trivial. Use positive fds as the alias
444 * value for memory tracking.
445 */
446 if (fd != -1) {
447 /*
448 * Use "fd" to pass (some) Mach VM allocation flags,
449 * (see the VM_FLAGS_* definitions).
450 */
451 int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
452 VM_FLAGS_SUPERPAGE_MASK |
453 VM_FLAGS_PURGABLE |
454 VM_FLAGS_4GB_CHUNK);
455
456 if (vm_flags != fd) {
457 /* reject if there are any extra flags */
458 return EINVAL;
459 }
460
461 /*
462 * vm_map_kernel_flags_set_vmflags() will assume that
463 * the full set of VM flags are passed, which is
464 * problematic for FIXED/ANYWHERE.
465 *
466 * The block handling MAP_FIXED below will do the same
467 * thing again which is fine because it's idempotent.
468 */
469 if (flags & MAP_FIXED) {
470 vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
471 } else {
472 vm_flags |= VM_FLAGS_ANYWHERE;
473 }
474 vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
475 }
476
477 #if CONFIG_MAP_RANGES
478 /*
479 * if the client specified a tag, let the system policy apply.
480 *
481 * otherwise, force the heap range.
482 */
483 if (vmk_flags.vm_tag) {
484 vm_map_kernel_flags_update_range_id(&vmk_flags, user_map);
485 } else {
486 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
487 }
488 #endif /* CONFIG_MAP_RANGES */
489
490 handle = NULL;
491 file_pos = 0;
492 pageoff = 0;
493 mapanon = 1;
494 } else {
495 struct vnode_attr va;
496 vfs_context_t ctx = vfs_context_current();
497
498 if (flags & MAP_JIT) {
499 return EINVAL;
500 }
501
502 /*
503 * Mapping file, get fp for validation. Obtain vnode and make
504 * sure it is of appropriate type.
505 */
506 err = fp_lookup(p, fd, &fp, 0);
507 if (err) {
508 return err;
509 }
510 fpref = 1;
511 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
512 case DTYPE_PSXSHM:
513 uap->addr = (user_addr_t)user_addr;
514 uap->len = (user_size_t)user_size;
515 uap->prot = prot;
516 uap->flags = flags;
517 uap->pos = file_pos;
518 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
519 goto bad;
520 case DTYPE_VNODE:
521 break;
522 default:
523 error = EINVAL;
524 goto bad;
525 }
526 vp = (struct vnode *)fp_get_data(fp);
527 error = vnode_getwithref(vp);
528 if (error != 0) {
529 goto bad;
530 }
531
532 if (vp->v_type != VREG && vp->v_type != VCHR) {
533 (void)vnode_put(vp);
534 error = EINVAL;
535 goto bad;
536 }
537
538 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
539
540 /*
541 * POSIX: mmap needs to update access time for mapped files
542 */
543 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
544 VATTR_INIT(&va);
545 nanotime(&va.va_access_time);
546 VATTR_SET_ACTIVE(&va, va_access_time);
547 vnode_setattr(vp, &va, ctx);
548 }
549
550 /*
551 * XXX hack to handle use of /dev/zero to map anon memory (ala
552 * SunOS).
553 */
554 if (vp->v_type == VCHR || vp->v_type == VSTR) {
555 (void)vnode_put(vp);
556 error = ENODEV;
557 goto bad;
558 } else {
559 /*
560 * Ensure that file and memory protections are
561 * compatible. Note that we only worry about
562 * writability if mapping is shared; in this case,
563 * current and max prot are dictated by the open file.
564 * XXX use the vnode instead? Problem is: what
565 * credentials do we use for determination? What if
566 * proc does a setuid?
567 */
568 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
569 if (fp->fp_glob->fg_flag & FREAD) {
570 maxprot |= VM_PROT_READ;
571 } else if (prot & PROT_READ) {
572 (void)vnode_put(vp);
573 error = EACCES;
574 goto bad;
575 }
576 /*
577 * If we are sharing potential changes (either via
578 * MAP_SHARED or via the implicit sharing of character
579 * device mappings), and we are trying to get write
580 * permission although we opened it without asking
581 * for it, bail out.
582 */
583
584 if ((flags & MAP_SHARED) != 0) {
585 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
586 /*
587 * Do not allow writable mappings of
588 * swap files (see vm_swapfile_pager.c).
589 */
590 !vnode_isswap(vp)) {
591 /*
592 * check for write access
593 *
594 * Note that we already made this check when granting FWRITE
595 * against the file, so it seems redundant here.
596 */
597 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
598
599 /* if not granted for any reason, but we wanted it, bad */
600 if ((prot & PROT_WRITE) && (error != 0)) {
601 vnode_put(vp);
602 goto bad;
603 }
604
605 /* if writable, remember */
606 if (error == 0) {
607 maxprot |= VM_PROT_WRITE;
608 }
609 } else if ((prot & PROT_WRITE) != 0) {
610 (void)vnode_put(vp);
611 error = EACCES;
612 goto bad;
613 }
614 } else {
615 maxprot |= VM_PROT_WRITE;
616 }
617
618 handle = (void *)vp;
619 #if CONFIG_MACF
620 error = mac_file_check_mmap(vfs_context_ucred(ctx),
621 fp->fp_glob, prot, flags, file_pos + pageoff,
622 &maxprot);
623 if (error) {
624 (void)vnode_put(vp);
625 goto bad;
626 }
627 #endif /* MAC */
628 /*
629 * Consult the file system to determine if this
630 * particular file object can be mapped.
631 *
632 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
633 * then we don't check for writeability on the file
634 * object, because it will only ever see reads.
635 */
636 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
637 (prot & ~PROT_WRITE) : prot, ctx);
638 if (error) {
639 (void)vnode_put(vp);
640 goto bad;
641 }
642 }
643
644 /*
645 * No copy-on-read for mmap() mappings themselves.
646 */
647 vmk_flags.vmkf_no_copy_on_read = 1;
648 #if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
649 /* force file ranges on !macOS */
650 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
651 #endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
652 }
653
654 if (user_size == 0) {
655 if (!mapanon) {
656 (void)vnode_put(vp);
657 }
658 error = 0;
659 goto bad;
660 }
661
662 /*
663 * We bend a little - round the start and end addresses
664 * to the nearest page boundary.
665 */
666 user_size = vm_map_round_page(user_size,
667 vm_map_page_mask(user_map));
668
669 if (file_pos & vm_map_page_mask(user_map)) {
670 if (!mapanon) {
671 (void)vnode_put(vp);
672 }
673 error = EINVAL;
674 goto bad;
675 }
676
677 if ((flags & MAP_FIXED) == 0) {
678 user_addr = vm_map_round_page(user_addr,
679 vm_map_page_mask(user_map));
680 } else {
681 if (user_addr != vm_map_trunc_page(user_addr,
682 vm_map_page_mask(user_map))) {
683 if (!mapanon) {
684 (void)vnode_put(vp);
685 }
686 error = EINVAL;
687 goto bad;
688 }
689 /*
690 * mmap(MAP_FIXED) will replace any existing mappings in the
691 * specified range, if the new mapping is successful.
692 * If we just deallocate the specified address range here,
693 * another thread might jump in and allocate memory in that
694 * range before we get a chance to establish the new mapping,
695 * and we won't have a chance to restore the old mappings.
696 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
697 * has to deallocate the existing mappings and establish the
698 * new ones atomically.
699 */
700 vmk_flags.vmf_fixed = true;
701 vmk_flags.vmf_overwrite = true;
702 }
703
704 if (flags & MAP_NOCACHE) {
705 vmk_flags.vmf_no_cache = true;
706 }
707
708 if (flags & MAP_JIT) {
709 vmk_flags.vmkf_map_jit = TRUE;
710 }
711
712 if (flags & MAP_TPRO) {
713 vmk_flags.vmf_tpro = true;
714 }
715
716 #if CONFIG_ROSETTA
717 if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
718 if (!proc_is_translated(p)) {
719 if (!mapanon) {
720 (void)vnode_put(vp);
721 }
722 error = EINVAL;
723 goto bad;
724 }
725 vmk_flags.vmkf_translated_allow_execute = TRUE;
726 }
727 #endif
728
729 if (flags & MAP_RESILIENT_CODESIGN) {
730 vmk_flags.vmf_resilient_codesign = true;
731 }
732 if (flags & MAP_RESILIENT_MEDIA) {
733 vmk_flags.vmf_resilient_media = true;
734 }
735
736 #if XNU_TARGET_OS_OSX
737 /* macOS-specific MAP_32BIT flag handling */
738 if (flags & MAP_32BIT) {
739 vmk_flags.vmkf_32bit_map_va = TRUE;
740 }
741 #endif
742
743 /*
744 * Lookup/allocate object.
745 */
746 if (handle == NULL) {
747 control = NULL;
748 #ifdef notyet
749 /* Hmm .. */
750 #if defined(VM_PROT_READ_IS_EXEC)
751 if (prot & VM_PROT_READ) {
752 prot |= VM_PROT_EXECUTE;
753 }
754 if (maxprot & VM_PROT_READ) {
755 maxprot |= VM_PROT_EXECUTE;
756 }
757 #endif
758 #endif
759
760 #if 3777787
761 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
762 prot |= VM_PROT_READ;
763 }
764 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
765 maxprot |= VM_PROT_READ;
766 }
767 #endif /* radar 3777787 */
768 map_anon_retry:
769
770 result = vm_map_enter_mem_object(user_map,
771 &user_addr, user_size,
772 0, vmk_flags,
773 IPC_PORT_NULL, 0, FALSE,
774 prot, maxprot,
775 (flags & MAP_SHARED) ?
776 VM_INHERIT_SHARE :
777 VM_INHERIT_DEFAULT);
778
779 /* If a non-binding address was specified for this anonymous
780 * mapping, retry the mapping with a zero base
781 * in the event the mapping operation failed due to
782 * lack of space between the address and the map's maximum.
783 */
784 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
785 user_addr = vm_map_page_size(user_map);
786 goto map_anon_retry;
787 }
788 } else {
789 if (vnode_isswap(vp)) {
790 /*
791 * Map swap files with a special pager
792 * that returns obfuscated contents.
793 */
794 control = NULL;
795 pager = swapfile_pager_setup(vp);
796 if (pager != MEMORY_OBJECT_NULL) {
797 control = swapfile_pager_control(pager);
798 }
799 } else {
800 control = ubc_getobject(vp, UBC_FLAGS_NONE);
801 }
802
803 if (control == NULL) {
804 (void)vnode_put(vp);
805 error = ENOMEM;
806 goto bad;
807 }
808
809 #if FBDP_DEBUG_OBJECT_NO_PAGER
810 //#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
811 #define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
812 #define FBDP_FILE_NAME1 "icutz44l.dat"
813 #define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
814 #define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
815 #define FBDP_FILE_NAME_END2 "-v2.csstore"
816 if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
817 char *path;
818 int len;
819 bool already_tracked;
820 len = MAXPATHLEN;
821 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
822 vn_getpath(vp, path, &len);
823 if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
824 if (memory_object_mark_as_tracked(control,
825 true,
826 &already_tracked) == KERN_SUCCESS &&
827 !already_tracked) {
828 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
829 }
830 }
831 zfree(ZV_NAMEI, path);
832 } else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
833 strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
834 !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
835 FBDP_FILE_NAME_END2,
836 strlen(FBDP_FILE_NAME_END2))) {
837 char *path;
838 int len;
839 bool already_tracked;
840 len = MAXPATHLEN;
841 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
842 vn_getpath(vp, path, &len);
843 if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
844 if (memory_object_mark_as_tracked(control,
845 true,
846 &already_tracked) == KERN_SUCCESS &&
847 !already_tracked) {
848 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
849 }
850 }
851 zfree(ZV_NAMEI, path);
852 }
853 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
854
855 /*
856 * Set credentials:
857 * FIXME: if we're writing the file we need a way to
858 * ensure that someone doesn't replace our R/W creds
859 * with ones that only work for read.
860 */
861
862 ubc_setthreadcred(vp, p, current_thread());
863 docow = FALSE;
864 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
865 docow = TRUE;
866 }
867
868 #ifdef notyet
869 /* Hmm .. */
870 #if defined(VM_PROT_READ_IS_EXEC)
871 if (prot & VM_PROT_READ) {
872 prot |= VM_PROT_EXECUTE;
873 }
874 if (maxprot & VM_PROT_READ) {
875 maxprot |= VM_PROT_EXECUTE;
876 }
877 #endif
878 #endif /* notyet */
879
880 #if 3777787
881 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
882 prot |= VM_PROT_READ;
883 }
884 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
885 maxprot |= VM_PROT_READ;
886 }
887 #endif /* radar 3777787 */
888
889 map_file_retry:
890 if (flags & MAP_RESILIENT_CODESIGN) {
891 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
892 if (prot & reject_prot) {
893 /*
894 * Would like to use (prot | maxprot) here
895 * but the assignment of VM_PROT_EXECUTE
896 * to maxprot above would always fail the test.
897 *
898 * Skipping the check is ok, however, because we
899 * restrict maxprot to prot just below in this
900 * block.
901 */
902 assert(!mapanon);
903 vnode_put(vp);
904 error = EPERM;
905 goto bad;
906 }
907 /* strictly limit access to "prot" */
908 maxprot &= prot;
909 }
910
911 vm_object_offset_t end_pos = 0;
912 if (os_add_overflow(user_size, file_pos, &end_pos)) {
913 vnode_put(vp);
914 error = EINVAL;
915 goto bad;
916 }
917
918 result = vm_map_enter_mem_object_control(user_map,
919 &user_addr, user_size,
920 0, vmk_flags,
921 control, file_pos,
922 docow, prot, maxprot,
923 (flags & MAP_SHARED) ?
924 VM_INHERIT_SHARE :
925 VM_INHERIT_DEFAULT);
926
927 /* If a non-binding address was specified for this file backed
928 * mapping, retry the mapping with a zero base
929 * in the event the mapping operation failed due to
930 * lack of space between the address and the map's maximum.
931 */
932 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
933 user_addr = vm_map_page_size(user_map);
934 goto map_file_retry;
935 }
936 }
937
938 if (!mapanon) {
939 (void)vnode_put(vp);
940 }
941
942 switch (result) {
943 case KERN_SUCCESS:
944 *retval = user_addr + pageoff;
945 error = 0;
946 break;
947 case KERN_INVALID_ADDRESS:
948 case KERN_NO_SPACE:
949 error = ENOMEM;
950 break;
951 case KERN_PROTECTION_FAILURE:
952 error = EACCES;
953 break;
954 default:
955 error = EINVAL;
956 break;
957 }
958 bad:
959 if (pager != MEMORY_OBJECT_NULL) {
960 /*
961 * Release the reference on the pager.
962 * If the mapping was successful, it now holds
963 * an extra reference.
964 */
965 memory_object_deallocate(pager);
966 }
967 if (fpref) {
968 fp_drop(p, fd, fp, 0);
969 }
970
971 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
972 #if XNU_TARGET_OS_OSX
973 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
974 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
975 #endif /* XNU_TARGET_OS_OSX */
976 return error;
977 }
978
979 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)980 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
981 {
982 __pthread_testcancel(1);
983 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
984 }
985
986 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)987 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
988 {
989 mach_vm_offset_t addr;
990 mach_vm_size_t size;
991 int flags;
992 vm_map_t user_map;
993 int rv;
994 vm_sync_t sync_flags = 0;
995
996 user_map = current_map();
997 addr = (mach_vm_offset_t) uap->addr;
998 size = (mach_vm_size_t) uap->len;
999 #if XNU_TARGET_OS_OSX
1000 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1001 #endif /* XNU_TARGET_OS_OSX */
1002 if (vm_map_range_overflows(user_map, addr, size)) {
1003 return EINVAL;
1004 }
1005 if (addr & vm_map_page_mask(user_map)) {
1006 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1007 return EINVAL;
1008 }
1009 if (size == 0) {
1010 /*
1011 * We cannot support this properly without maintaining
1012 * list all mmaps done. Cannot use vm_map_entry as they could be
1013 * split or coalesced by indepenedant actions. So instead of
1014 * inaccurate results, lets just return error as invalid size
1015 * specified
1016 */
1017 return EINVAL; /* XXX breaks posix apps */
1018 }
1019
1020 flags = uap->flags;
1021 /* disallow contradictory flags */
1022 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1023 return EINVAL;
1024 }
1025
1026 if (flags & MS_KILLPAGES) {
1027 sync_flags |= VM_SYNC_KILLPAGES;
1028 }
1029 if (flags & MS_DEACTIVATE) {
1030 sync_flags |= VM_SYNC_DEACTIVATE;
1031 }
1032 if (flags & MS_INVALIDATE) {
1033 sync_flags |= VM_SYNC_INVALIDATE;
1034 }
1035
1036 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1037 if (flags & MS_ASYNC) {
1038 sync_flags |= VM_SYNC_ASYNCHRONOUS;
1039 } else {
1040 sync_flags |= VM_SYNC_SYNCHRONOUS;
1041 }
1042 }
1043
1044 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
1045
1046 rv = mach_vm_msync(user_map, addr, size, sync_flags);
1047
1048 switch (rv) {
1049 case KERN_SUCCESS:
1050 break;
1051 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
1052 return ENOMEM;
1053 case KERN_FAILURE:
1054 return EIO;
1055 default:
1056 return EINVAL;
1057 }
1058 return 0;
1059 }
1060
1061
1062 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1063 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1064 {
1065 mach_vm_offset_t user_addr;
1066 mach_vm_size_t user_size;
1067 kern_return_t result;
1068 vm_map_t user_map;
1069
1070 user_map = current_map();
1071 user_addr = (mach_vm_offset_t) uap->addr;
1072 user_size = (mach_vm_size_t) uap->len;
1073
1074 AUDIT_ARG(addr, user_addr);
1075 AUDIT_ARG(len, user_size);
1076
1077 if (user_addr & vm_map_page_mask(user_map)) {
1078 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1079 return EINVAL;
1080 }
1081
1082 if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1083 return EINVAL;
1084 }
1085
1086 if (user_size == 0) {
1087 /* UNIX SPEC: size is 0, return EINVAL */
1088 return EINVAL;
1089 }
1090
1091 result = mach_vm_deallocate(user_map, user_addr, user_size);
1092 if (result != KERN_SUCCESS) {
1093 return EINVAL;
1094 }
1095 return 0;
1096 }
1097
1098 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1099 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1100 {
1101 vm_prot_t prot;
1102 mach_vm_offset_t user_addr;
1103 mach_vm_size_t user_size;
1104 kern_return_t result;
1105 vm_map_t user_map;
1106 #if CONFIG_MACF
1107 int error;
1108 #endif
1109
1110 AUDIT_ARG(addr, uap->addr);
1111 AUDIT_ARG(len, uap->len);
1112 AUDIT_ARG(value32, uap->prot);
1113
1114 user_map = current_map();
1115 user_addr = (mach_vm_offset_t) uap->addr;
1116 user_size = (mach_vm_size_t) uap->len;
1117 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1118
1119 if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1120 return EINVAL;
1121 }
1122 if (user_addr & vm_map_page_mask(user_map)) {
1123 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1124 return EINVAL;
1125 }
1126
1127 #ifdef notyet
1128 /* Hmm .. */
1129 #if defined(VM_PROT_READ_IS_EXEC)
1130 if (prot & VM_PROT_READ) {
1131 prot |= VM_PROT_EXECUTE;
1132 }
1133 #endif
1134 #endif /* notyet */
1135
1136 #if 3936456
1137 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1138 prot |= VM_PROT_READ;
1139 }
1140 #endif /* 3936456 */
1141
1142 #if CONFIG_MACF
1143 /*
1144 * The MAC check for mprotect is of limited use for 2 reasons:
1145 * Without mmap revocation, the caller could have asked for the max
1146 * protections initially instead of a reduced set, so a mprotect
1147 * check would offer no new security.
1148 * It is not possible to extract the vnode from the pager object(s)
1149 * of the target memory range.
1150 * However, the MAC check may be used to prevent a process from,
1151 * e.g., making the stack executable.
1152 */
1153 error = mac_proc_check_mprotect(p, user_addr,
1154 user_size, prot);
1155 if (error) {
1156 return error;
1157 }
1158 #endif
1159
1160 if (prot & VM_PROT_TRUSTED) {
1161 #if CONFIG_DYNAMIC_CODE_SIGNING
1162 /* CODE SIGNING ENFORCEMENT - JIT support */
1163 /* The special protection value VM_PROT_TRUSTED requests that we treat
1164 * this page as if it had a valid code signature.
1165 * If this is enabled, there MUST be a MAC policy implementing the
1166 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1167 * compromised because the check would always succeed and thusly any
1168 * process could sign dynamically. */
1169 result = vm_map_sign(
1170 user_map,
1171 vm_map_trunc_page(user_addr,
1172 vm_map_page_mask(user_map)),
1173 vm_map_round_page(user_addr + user_size,
1174 vm_map_page_mask(user_map)));
1175 switch (result) {
1176 case KERN_SUCCESS:
1177 break;
1178 case KERN_INVALID_ADDRESS:
1179 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1180 return ENOMEM;
1181 default:
1182 return EINVAL;
1183 }
1184 #else
1185 return ENOTSUP;
1186 #endif
1187 }
1188 prot &= ~VM_PROT_TRUSTED;
1189
1190 result = mach_vm_protect(user_map, user_addr, user_size,
1191 FALSE, prot);
1192 switch (result) {
1193 case KERN_SUCCESS:
1194 return 0;
1195 case KERN_PROTECTION_FAILURE:
1196 return EACCES;
1197 case KERN_INVALID_ADDRESS:
1198 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1199 return ENOMEM;
1200 }
1201 return EINVAL;
1202 }
1203
1204
1205 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1206 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1207 {
1208 mach_vm_offset_t addr;
1209 mach_vm_size_t size;
1210 vm_inherit_t inherit;
1211 vm_map_t user_map;
1212 kern_return_t result;
1213
1214 AUDIT_ARG(addr, uap->addr);
1215 AUDIT_ARG(len, uap->len);
1216 AUDIT_ARG(value32, uap->inherit);
1217
1218 user_map = current_map();
1219 addr = (mach_vm_offset_t)uap->addr;
1220 size = (mach_vm_size_t)uap->len;
1221 inherit = uap->inherit;
1222 if (vm_map_range_overflows(user_map, addr, size)) {
1223 return EINVAL;
1224 }
1225 result = mach_vm_inherit(user_map, addr, size,
1226 inherit);
1227 switch (result) {
1228 case KERN_SUCCESS:
1229 return 0;
1230 case KERN_PROTECTION_FAILURE:
1231 return EACCES;
1232 }
1233 return EINVAL;
1234 }
1235
1236 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1237 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1238 {
1239 vm_map_t user_map;
1240 mach_vm_offset_t start;
1241 mach_vm_size_t size;
1242 vm_behavior_t new_behavior;
1243 kern_return_t result;
1244
1245 /*
1246 * Since this routine is only advisory, we default to conservative
1247 * behavior.
1248 */
1249 switch (uap->behav) {
1250 case MADV_RANDOM:
1251 new_behavior = VM_BEHAVIOR_RANDOM;
1252 break;
1253 case MADV_SEQUENTIAL:
1254 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1255 break;
1256 case MADV_NORMAL:
1257 new_behavior = VM_BEHAVIOR_DEFAULT;
1258 break;
1259 case MADV_WILLNEED:
1260 new_behavior = VM_BEHAVIOR_WILLNEED;
1261 break;
1262 case MADV_DONTNEED:
1263 new_behavior = VM_BEHAVIOR_DONTNEED;
1264 break;
1265 case MADV_FREE:
1266 new_behavior = VM_BEHAVIOR_FREE;
1267 break;
1268 case MADV_ZERO_WIRED_PAGES:
1269 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1270 break;
1271 case MADV_FREE_REUSABLE:
1272 new_behavior = VM_BEHAVIOR_REUSABLE;
1273 break;
1274 case MADV_FREE_REUSE:
1275 new_behavior = VM_BEHAVIOR_REUSE;
1276 break;
1277 case MADV_CAN_REUSE:
1278 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1279 break;
1280 case MADV_PAGEOUT:
1281 #if MACH_ASSERT
1282 new_behavior = VM_BEHAVIOR_PAGEOUT;
1283 break;
1284 #else /* MACH_ASSERT */
1285 return ENOTSUP;
1286 #endif /* MACH_ASSERT */
1287 default:
1288 return EINVAL;
1289 }
1290
1291 user_map = current_map();
1292 start = (mach_vm_offset_t) uap->addr;
1293 size = (mach_vm_size_t) uap->len;
1294 if (vm_map_range_overflows(user_map, start, size)) {
1295 return EINVAL;
1296 }
1297 #if __arm64__
1298 if (start == 0 &&
1299 size != 0 &&
1300 (uap->behav == MADV_FREE ||
1301 uap->behav == MADV_FREE_REUSABLE)) {
1302 printf("** FOURK_COMPAT: %d[%s] "
1303 "failing madvise(0x%llx,0x%llx,%s)\n",
1304 proc_getpid(p), p->p_comm, start, size,
1305 ((uap->behav == MADV_FREE_REUSABLE)
1306 ? "MADV_FREE_REUSABLE"
1307 : "MADV_FREE"));
1308 DTRACE_VM3(fourk_compat_madvise,
1309 uint64_t, start,
1310 uint64_t, size,
1311 int, uap->behav);
1312 return EINVAL;
1313 }
1314 #endif /* __arm64__ */
1315
1316 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1317 switch (result) {
1318 case KERN_SUCCESS:
1319 return 0;
1320 case KERN_INVALID_ADDRESS:
1321 return EINVAL;
1322 case KERN_NO_SPACE:
1323 return ENOMEM;
1324 }
1325
1326 return EINVAL;
1327 }
1328
1329 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1330 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1331 {
1332 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1333 vm_map_t map = VM_MAP_NULL;
1334 user_addr_t vec = 0;
1335 int error = 0;
1336 int64_t lastvecindex = 0;
1337 int mincoreinfo = 0;
1338 int pqueryinfo = 0;
1339 uint64_t pqueryinfo_vec_size = 0;
1340 vm_page_info_basic_t info = NULL;
1341 mach_msg_type_number_t count = 0;
1342 char *kernel_vec = NULL;
1343 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1344 kern_return_t kr = KERN_SUCCESS;
1345 int effective_page_shift, effective_page_size;
1346
1347 map = current_map();
1348
1349 /*
1350 * On systems with 4k kernel space and 16k user space, we will
1351 * use the kernel page size to report back the residency information.
1352 * This is for backwards compatibility since we already have
1353 * processes that depend on this behavior.
1354 */
1355 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1356 effective_page_shift = vm_map_page_shift(map);
1357 effective_page_size = vm_map_page_size(map);
1358 } else {
1359 effective_page_shift = PAGE_SHIFT;
1360 effective_page_size = PAGE_SIZE;
1361 }
1362
1363 /*
1364 * Make sure that the addresses presented are valid for user
1365 * mode.
1366 */
1367 first_addr = addr = vm_map_trunc_page(uap->addr,
1368 vm_map_page_mask(map));
1369 end = vm_map_round_page(uap->addr + uap->len,
1370 vm_map_page_mask(map));
1371
1372 if (end < addr) {
1373 return EINVAL;
1374 }
1375
1376 if (end == addr) {
1377 return 0;
1378 }
1379
1380 /*
1381 * We are going to loop through the whole 'req_vec_size' pages
1382 * range in chunks of 'cur_vec_size'.
1383 */
1384
1385 req_vec_size_pages = (end - addr) >> effective_page_shift;
1386 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1387 size_t kernel_vec_size = cur_vec_size_pages;
1388
1389 kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1390
1391 if (kernel_vec == NULL) {
1392 return ENOMEM;
1393 }
1394
1395 /*
1396 * Address of byte vector
1397 */
1398 vec = uap->vec;
1399
1400 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1401
1402 info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1403
1404 if (info == NULL) {
1405 kfree_data(kernel_vec, kernel_vec_size);
1406 return ENOMEM;
1407 }
1408
1409 while (addr < end) {
1410 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1411
1412 count = VM_PAGE_INFO_BASIC_COUNT;
1413 kr = vm_map_page_range_info_internal(map,
1414 addr,
1415 cur_end,
1416 effective_page_shift,
1417 VM_PAGE_INFO_BASIC,
1418 (vm_page_info_t) info,
1419 &count);
1420
1421 assert(kr == KERN_SUCCESS);
1422
1423 /*
1424 * Do this on a map entry basis so that if the pages are not
1425 * in the current processes address space, we can easily look
1426 * up the pages elsewhere.
1427 */
1428 lastvecindex = -1;
1429
1430 for (; addr < cur_end; addr += effective_page_size) {
1431 pqueryinfo = info[lastvecindex + 1].disposition;
1432
1433 mincoreinfo = 0;
1434
1435 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1436 mincoreinfo |= MINCORE_INCORE;
1437 }
1438 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1439 mincoreinfo |= MINCORE_REFERENCED;
1440 }
1441 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1442 mincoreinfo |= MINCORE_MODIFIED;
1443 }
1444 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1445 mincoreinfo |= MINCORE_PAGED_OUT;
1446 }
1447 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1448 mincoreinfo |= MINCORE_COPIED;
1449 }
1450 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1451 mincoreinfo |= MINCORE_ANONYMOUS;
1452 }
1453 /*
1454 * calculate index into user supplied byte vector
1455 */
1456 vecindex = (addr - first_addr) >> effective_page_shift;
1457 kernel_vec[vecindex] = (char)mincoreinfo;
1458 lastvecindex = vecindex;
1459 }
1460
1461
1462 assert(vecindex == (cur_vec_size_pages - 1));
1463
1464 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1465
1466 if (error) {
1467 break;
1468 }
1469
1470 /*
1471 * For the next chunk, we'll need:
1472 * - bump the location in the user buffer for our next disposition.
1473 * - new length
1474 * - starting address
1475 */
1476 vec += cur_vec_size_pages * sizeof(char);
1477 req_vec_size_pages = (end - addr) >> effective_page_shift;
1478 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1479
1480 first_addr = addr;
1481 }
1482
1483 kfree_data(info, pqueryinfo_vec_size);
1484 kfree_data(kernel_vec, kernel_vec_size);
1485
1486 if (error) {
1487 return EFAULT;
1488 }
1489
1490 return 0;
1491 }
1492
1493 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1494 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1495 {
1496 vm_map_t user_map;
1497 vm_map_offset_t addr;
1498 vm_map_size_t size, pageoff;
1499 kern_return_t result;
1500
1501 AUDIT_ARG(addr, uap->addr);
1502 AUDIT_ARG(len, uap->len);
1503
1504 user_map = current_map();
1505 addr = (vm_map_offset_t) uap->addr;
1506 size = (vm_map_size_t)uap->len;
1507
1508 if (vm_map_range_overflows(user_map, addr, size)) {
1509 return EINVAL;
1510 }
1511
1512 if (size == 0) {
1513 return 0;
1514 }
1515
1516 pageoff = (addr & vm_map_page_mask(user_map));
1517 addr -= pageoff;
1518 size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1519
1520 /* have to call vm_map_wire directly to pass "I don't know" protections */
1521 result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1522
1523 if (result == KERN_RESOURCE_SHORTAGE) {
1524 return EAGAIN;
1525 } else if (result == KERN_PROTECTION_FAILURE) {
1526 return EACCES;
1527 } else if (result != KERN_SUCCESS) {
1528 return ENOMEM;
1529 }
1530
1531 return 0; /* KERN_SUCCESS */
1532 }
1533
1534 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1535 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1536 {
1537 mach_vm_offset_t addr;
1538 mach_vm_size_t size;
1539 vm_map_t user_map;
1540 kern_return_t result;
1541
1542 AUDIT_ARG(addr, uap->addr);
1543 AUDIT_ARG(len, uap->len);
1544
1545 addr = (mach_vm_offset_t) uap->addr;
1546 size = (mach_vm_size_t)uap->len;
1547 user_map = current_map();
1548 if (vm_map_range_overflows(user_map, addr, size)) {
1549 return EINVAL;
1550 }
1551 /* JMM - need to remove all wirings by spec - this just removes one */
1552 result = mach_vm_wire_kernel(user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1553 return result == KERN_SUCCESS ? 0 : ENOMEM;
1554 }
1555
1556
1557 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1558 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1559 {
1560 return ENOSYS;
1561 }
1562
1563 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1564 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1565 {
1566 return ENOSYS;
1567 }
1568
1569 #if CONFIG_CODE_DECRYPTION
1570 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1571 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1572 {
1573 mach_vm_offset_t user_addr;
1574 mach_vm_size_t user_size;
1575 kern_return_t result;
1576 vm_map_t user_map;
1577 uint32_t cryptid;
1578 cpu_type_t cputype;
1579 cpu_subtype_t cpusubtype;
1580 pager_crypt_info_t crypt_info;
1581 const char * cryptname = 0;
1582 char *vpath;
1583 int len, ret;
1584 struct proc_regioninfo_internal pinfo;
1585 vnode_t vp;
1586 uintptr_t vnodeaddr;
1587 uint32_t vid;
1588
1589 AUDIT_ARG(addr, uap->addr);
1590 AUDIT_ARG(len, uap->len);
1591
1592 user_map = current_map();
1593 user_addr = (mach_vm_offset_t) uap->addr;
1594 user_size = (mach_vm_size_t) uap->len;
1595
1596 cryptid = uap->cryptid;
1597 cputype = uap->cputype;
1598 cpusubtype = uap->cpusubtype;
1599
1600 if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1601 return EINVAL;
1602 }
1603 if (user_addr & vm_map_page_mask(user_map)) {
1604 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1605 return EINVAL;
1606 }
1607
1608 switch (cryptid) {
1609 case CRYPTID_NO_ENCRYPTION:
1610 /* not encrypted, just an empty load command */
1611 return 0;
1612 case CRYPTID_APP_ENCRYPTION:
1613 case CRYPTID_MODEL_ENCRYPTION:
1614 cryptname = "com.apple.unfree";
1615 break;
1616 case 0x10:
1617 /* some random cryptid that you could manually put into
1618 * your binary if you want NULL */
1619 cryptname = "com.apple.null";
1620 break;
1621 default:
1622 return EINVAL;
1623 }
1624
1625 if (NULL == text_crypter_create) {
1626 return ENOTSUP;
1627 }
1628
1629 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1630 if (ret == 0 || !vnodeaddr) {
1631 /* No really, this returns 0 if the memory address is not backed by a file */
1632 return EINVAL;
1633 }
1634
1635 vp = (vnode_t)vnodeaddr;
1636 if ((vnode_getwithvid(vp, vid)) == 0) {
1637 vpath = zalloc(ZV_NAMEI);
1638
1639 len = MAXPATHLEN;
1640 ret = vn_getpath(vp, vpath, &len);
1641 if (ret) {
1642 zfree(ZV_NAMEI, vpath);
1643 vnode_put(vp);
1644 return ret;
1645 }
1646
1647 vnode_put(vp);
1648 } else {
1649 return EINVAL;
1650 }
1651
1652 #if 0
1653 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1654 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1655 #endif
1656
1657 if (user_size == 0) {
1658 printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1659 zfree(ZV_NAMEI, vpath);
1660 return 0;
1661 }
1662
1663 /* set up decrypter first */
1664 crypt_file_data_t crypt_data = {
1665 .filename = vpath,
1666 .cputype = cputype,
1667 .cpusubtype = cpusubtype,
1668 .origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1669 };
1670 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1671 #if VM_MAP_DEBUG_APPLE_PROTECT
1672 if (vm_map_debug_apple_protect) {
1673 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1674 proc_getpid(p), p->p_comm,
1675 user_map,
1676 (uint64_t) user_addr,
1677 (uint64_t) (user_addr + user_size),
1678 __FUNCTION__, vpath, result);
1679 }
1680 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1681 zfree(ZV_NAMEI, vpath);
1682
1683 if (result) {
1684 printf("%s: unable to create decrypter %s, kr=%d\n",
1685 __FUNCTION__, cryptname, result);
1686 if (result == kIOReturnNotPrivileged) {
1687 /* text encryption returned decryption failure */
1688 return EPERM;
1689 } else {
1690 return ENOMEM;
1691 }
1692 }
1693
1694 /* now remap using the decrypter */
1695 vm_object_offset_t crypto_backing_offset;
1696 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1697 result = vm_map_apple_protected(user_map,
1698 user_addr,
1699 user_addr + user_size,
1700 crypto_backing_offset,
1701 &crypt_info,
1702 cryptid);
1703 if (result) {
1704 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1705 }
1706
1707 if (result) {
1708 return EPERM;
1709 }
1710 return 0;
1711 }
1712 #endif /* CONFIG_CODE_DECRYPTION */
1713