xref: /xnu-10063.101.15/bsd/kern/kern_mman.c (revision 94d3b452840153a99b38a3a9659680b2a006908e)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988 University of Utah.
30  * Copyright (c) 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * the Systems Programming Group of the University of Utah Computer
35  * Science Department.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66  *
67  *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 /*
77  * Mapped file (mmap) interface to VM
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102 
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105 
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117 
118 #include <machine/machine_routines.h>
119 
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124 
125 #include <IOKit/IOReturn.h>
126 #include <IOKit/IOBSD.h>
127 
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132 
133 #if CONFIG_MACF
134 #include <security/mac_framework.h>
135 #endif
136 #include <os/overflow.h>
137 
138 /*
139  * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140  * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
141  * compiled with an SDK from fall of 2020 or later?""
142  */
143 static bool
proc_2020_fall_os_sdk_or_later(void)144 proc_2020_fall_os_sdk_or_later(void)
145 {
146 	const uint32_t proc_sdk_ver = proc_sdk(current_proc());
147 
148 	switch (proc_platform(current_proc())) {
149 	case PLATFORM_MACOS:
150 		return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
151 	case PLATFORM_IOS:
152 	case PLATFORM_IOSSIMULATOR:
153 	case PLATFORM_MACCATALYST:
154 		return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
155 	case PLATFORM_BRIDGEOS:
156 		return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
157 	case PLATFORM_TVOS:
158 	case PLATFORM_TVOSSIMULATOR:
159 		return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
160 	case PLATFORM_WATCHOS:
161 	case PLATFORM_WATCHOSSIMULATOR:
162 		return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
163 	default:
164 		/*
165 		 * tough call, but let's give new platforms the benefit of the doubt
166 		 * to avoid a re-occurence of rdar://89843927
167 		 */
168 		return true;
169 	}
170 }
171 
172 /*
173  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
174  * XXX usage is PROT_* from an interface perspective.  Thus the values of
175  * XXX VM_PROT_* and PROT_* need to correspond.
176  */
177 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)178 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
179 {
180 	/*
181 	 *	Map in special device (must be SHARED) or file
182 	 */
183 	struct fileproc *fp;
184 	struct                  vnode *vp;
185 	int                     flags;
186 	int                     prot;
187 	int                     err = 0;
188 	vm_map_t                user_map;
189 	kern_return_t           result;
190 	vm_map_offset_t         user_addr;
191 	vm_map_offset_t         sum;
192 	vm_map_size_t           user_size;
193 	vm_object_offset_t      pageoff;
194 	vm_object_offset_t      file_pos;
195 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
196 	boolean_t               docow;
197 	vm_prot_t               maxprot;
198 	void                    *handle;
199 	memory_object_t         pager = MEMORY_OBJECT_NULL;
200 	memory_object_control_t  control;
201 	int                     mapanon = 0;
202 	int                     fpref = 0;
203 	int error = 0;
204 	int fd = uap->fd;
205 	int num_retries = 0;
206 
207 	/*
208 	 * Note that for UNIX03 conformance, there is additional parameter checking for
209 	 * mmap() system call in libsyscall prior to entering the kernel.  The sanity
210 	 * checks and argument validation done in this function are not the only places
211 	 * one can get returned errnos.
212 	 */
213 
214 	user_map = current_map();
215 	user_addr = (vm_map_offset_t)uap->addr;
216 	user_size = (vm_map_size_t) uap->len;
217 
218 	AUDIT_ARG(addr, user_addr);
219 	AUDIT_ARG(len, user_size);
220 	AUDIT_ARG(fd, uap->fd);
221 
222 	if (vm_map_range_overflows(user_map, user_addr, user_size)) {
223 		return EINVAL;
224 	}
225 	prot = (uap->prot & VM_PROT_ALL);
226 #if 3777787
227 	/*
228 	 * Since the hardware currently does not support writing without
229 	 * read-before-write, or execution-without-read, if the request is
230 	 * for write or execute access, we must imply read access as well;
231 	 * otherwise programs expecting this to work will fail to operate.
232 	 */
233 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
234 		prot |= VM_PROT_READ;
235 	}
236 #endif  /* radar 3777787 */
237 
238 	flags = uap->flags;
239 	vp = NULLVP;
240 
241 	/*
242 	 * verify no unknown flags are passed in, and if any are,
243 	 * fail out early to make sure the logic below never has to deal
244 	 * with invalid flag values. only do so for processes compiled
245 	 * with Fall 2020 or later SDK, which is where we drew this
246 	 * line and documented it as such.
247 	 */
248 	if (flags & ~(MAP_SHARED |
249 	    MAP_PRIVATE |
250 	    MAP_COPY |
251 	    MAP_FIXED |
252 	    MAP_RENAME |
253 	    MAP_NORESERVE |
254 	    MAP_RESERVED0080 |                                  //grandfathered in as accepted and ignored
255 	    MAP_NOEXTEND |
256 	    MAP_HASSEMAPHORE |
257 	    MAP_NOCACHE |
258 	    MAP_JIT |
259 	    MAP_TPRO |
260 	    MAP_FILE |
261 	    MAP_ANON |
262 	    MAP_RESILIENT_CODESIGN |
263 	    MAP_RESILIENT_MEDIA |
264 #if XNU_TARGET_OS_OSX
265 	    MAP_32BIT |
266 #endif
267 	    MAP_TRANSLATED_ALLOW_EXECUTE |
268 	    MAP_UNIX03)) {
269 		if (proc_2020_fall_os_sdk_or_later()) {
270 			return EINVAL;
271 		}
272 	}
273 
274 
275 	/*
276 	 * The vm code does not have prototypes & compiler doesn't do
277 	 * the right thing when you cast 64bit value and pass it in function
278 	 * call. So here it is.
279 	 */
280 	file_pos = (vm_object_offset_t)uap->pos;
281 
282 
283 	/* make sure mapping fits into numeric range etc */
284 	if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
285 		return EINVAL;
286 	}
287 
288 	if (flags & MAP_UNIX03) {
289 		vm_map_offset_t offset_alignment_mask;
290 
291 		/*
292 		 * Enforce UNIX03 compliance.
293 		 */
294 
295 		if (vm_map_is_exotic(current_map())) {
296 			offset_alignment_mask = 0xFFF;
297 		} else {
298 			offset_alignment_mask = vm_map_page_mask(current_map());
299 		}
300 		if (file_pos & offset_alignment_mask) {
301 			/* file offset should be page-aligned */
302 			return EINVAL;
303 		}
304 		if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
305 			/* need either MAP_PRIVATE or MAP_SHARED */
306 			return EINVAL;
307 		}
308 		if (user_size == 0) {
309 			/* mapping length should not be 0 */
310 			return EINVAL;
311 		}
312 	}
313 
314 	/*
315 	 * Align the file position to a page boundary,
316 	 * and save its page offset component.
317 	 */
318 	pageoff = (file_pos & vm_map_page_mask(user_map));
319 	file_pos -= (vm_object_offset_t)pageoff;
320 
321 
322 	/* Adjust size for rounding (on both ends). */
323 	user_size += pageoff;   /* low end... */
324 	user_size = vm_map_round_page(user_size,
325 	    vm_map_page_mask(user_map));                           /* hi end */
326 
327 
328 	if (flags & MAP_JIT) {
329 		if ((flags & MAP_FIXED) ||
330 		    (flags & MAP_SHARED) ||
331 		    !(flags & MAP_ANON) ||
332 		    (flags & MAP_RESILIENT_CODESIGN) ||
333 		    (flags & MAP_RESILIENT_MEDIA) ||
334 		    (flags & MAP_TPRO)) {
335 			return EINVAL;
336 		}
337 	}
338 
339 	if ((flags & MAP_RESILIENT_CODESIGN) ||
340 	    (flags & MAP_RESILIENT_MEDIA)) {
341 		if ((flags & MAP_ANON) ||
342 		    (flags & MAP_JIT) ||
343 		    (flags & MAP_TPRO)) {
344 			return EINVAL;
345 		}
346 	}
347 	if (flags & MAP_RESILIENT_CODESIGN) {
348 		int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
349 		if (prot & reject_prot) {
350 			/*
351 			 * Quick sanity check. maxprot is calculated below and
352 			 * we will test it again.
353 			 */
354 			return EPERM;
355 		}
356 	}
357 	if (flags & MAP_SHARED) {
358 		/*
359 		 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
360 		 * there is no place to inject zero-filled pages without
361 		 * actually adding them to the file.
362 		 * Since we didn't reject that combination before, there might
363 		 * already be callers using it and getting a valid MAP_SHARED
364 		 * mapping but without the resilience.
365 		 * For backwards compatibility's sake, let's keep ignoring
366 		 * MAP_RESILIENT_MEDIA in that case.
367 		 */
368 		flags &= ~MAP_RESILIENT_MEDIA;
369 	}
370 	if (flags & MAP_RESILIENT_MEDIA) {
371 		if ((flags & MAP_ANON) ||
372 		    (flags & MAP_SHARED)) {
373 			return EINVAL;
374 		}
375 	}
376 	if (flags & MAP_TPRO) {
377 		/*
378 		 * MAP_TPRO without VM_PROT_WRITE is not valid here because
379 		 * the TPRO mapping is handled at the PMAP layer with implicit RW
380 		 * protections.
381 		 *
382 		 * This would enable bypassing of file-based protections, i.e.
383 		 * a file open/mapped as read-only could be written to.
384 		 */
385 		if ((prot & VM_PROT_EXECUTE) ||
386 		    !(prot & VM_PROT_WRITE)) {
387 			return EPERM;
388 		}
389 	}
390 
391 	/*
392 	 * Check for illegal addresses.  Watch out for address wrap... Note
393 	 * that VM_*_ADDRESS are not constants due to casts (argh).
394 	 */
395 	if (flags & MAP_FIXED) {
396 		/*
397 		 * The specified address must have the same remainder
398 		 * as the file offset taken modulo PAGE_SIZE, so it
399 		 * should be aligned after adjustment by pageoff.
400 		 */
401 		user_addr -= pageoff;
402 		if (user_addr & vm_map_page_mask(user_map)) {
403 			return EINVAL;
404 		}
405 	}
406 #ifdef notyet
407 	/* DO not have apis to get this info, need to wait till then*/
408 	/*
409 	 * XXX for non-fixed mappings where no hint is provided or
410 	 * the hint would fall in the potential heap space,
411 	 * place it after the end of the largest possible heap.
412 	 *
413 	 * There should really be a pmap call to determine a reasonable
414 	 * location.
415 	 */
416 	else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
417 	    vm_map_page_mask(user_map))) {
418 		addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
419 		    vm_map_page_mask(user_map));
420 	}
421 
422 #endif
423 
424 	/* Entitlement check against code signing monitor */
425 	if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(user_map) != KERN_SUCCESS)) {
426 		printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
427 		return EPERM;
428 	}
429 
430 	if (flags & MAP_ANON) {
431 		maxprot = VM_PROT_ALL;
432 #if CONFIG_MACF
433 		/*
434 		 * Entitlement check.
435 		 */
436 		error = mac_proc_check_map_anon(p, current_cached_proc_cred(p),
437 		    user_addr, user_size, prot, flags, &maxprot);
438 		if (error) {
439 			return EINVAL;
440 		}
441 #endif /* MAC */
442 
443 		/*
444 		 * Mapping blank space is trivial.  Use positive fds as the alias
445 		 * value for memory tracking.
446 		 */
447 		if (fd != -1) {
448 			/*
449 			 * Use "fd" to pass (some) Mach VM allocation flags,
450 			 * (see the VM_FLAGS_* definitions).
451 			 */
452 			int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
453 			    VM_FLAGS_SUPERPAGE_MASK |
454 			    VM_FLAGS_PURGABLE |
455 			    VM_FLAGS_4GB_CHUNK);
456 
457 			if (vm_flags != fd) {
458 				/* reject if there are any extra flags */
459 				return EINVAL;
460 			}
461 
462 			/*
463 			 * vm_map_kernel_flags_set_vmflags() will assume that
464 			 * the full set of VM flags are passed, which is
465 			 * problematic for FIXED/ANYWHERE.
466 			 *
467 			 * The block handling MAP_FIXED below will do the same
468 			 * thing again which is fine because it's idempotent.
469 			 */
470 			if (flags & MAP_FIXED) {
471 				vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
472 			} else {
473 				vm_flags |= VM_FLAGS_ANYWHERE;
474 			}
475 			vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
476 		}
477 
478 #if CONFIG_MAP_RANGES
479 		/*
480 		 * if the client specified a tag, let the system policy apply.
481 		 *
482 		 * otherwise, force the heap range.
483 		 */
484 		if (vmk_flags.vm_tag) {
485 			vm_map_kernel_flags_update_range_id(&vmk_flags, user_map);
486 		} else {
487 			vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
488 		}
489 #endif /* CONFIG_MAP_RANGES */
490 
491 		handle = NULL;
492 		file_pos = 0;
493 		pageoff = 0;
494 		mapanon = 1;
495 	} else {
496 		struct vnode_attr va;
497 		vfs_context_t ctx = vfs_context_current();
498 
499 		if (flags & MAP_JIT) {
500 			return EINVAL;
501 		}
502 
503 		/*
504 		 * Mapping file, get fp for validation. Obtain vnode and make
505 		 * sure it is of appropriate type.
506 		 */
507 		err = fp_lookup(p, fd, &fp, 0);
508 		if (err) {
509 			return err;
510 		}
511 		fpref = 1;
512 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
513 		case DTYPE_PSXSHM:
514 			uap->addr = (user_addr_t)user_addr;
515 			uap->len = (user_size_t)user_size;
516 			uap->prot = prot;
517 			uap->flags = flags;
518 			uap->pos = file_pos;
519 			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
520 			goto bad;
521 		case DTYPE_VNODE:
522 			break;
523 		default:
524 			error = EINVAL;
525 			goto bad;
526 		}
527 		vp = (struct vnode *)fp_get_data(fp);
528 		error = vnode_getwithref(vp);
529 		if (error != 0) {
530 			goto bad;
531 		}
532 
533 		if (vp->v_type != VREG && vp->v_type != VCHR) {
534 			(void)vnode_put(vp);
535 			error = EINVAL;
536 			goto bad;
537 		}
538 
539 		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
540 
541 		/*
542 		 * POSIX: mmap needs to update access time for mapped files
543 		 */
544 		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
545 			VATTR_INIT(&va);
546 			nanotime(&va.va_access_time);
547 			VATTR_SET_ACTIVE(&va, va_access_time);
548 			vnode_setattr(vp, &va, ctx);
549 		}
550 
551 		/*
552 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
553 		 * SunOS).
554 		 */
555 		if (vp->v_type == VCHR || vp->v_type == VSTR) {
556 			(void)vnode_put(vp);
557 			error = ENODEV;
558 			goto bad;
559 		} else {
560 			/*
561 			 * Ensure that file and memory protections are
562 			 * compatible.  Note that we only worry about
563 			 * writability if mapping is shared; in this case,
564 			 * current and max prot are dictated by the open file.
565 			 * XXX use the vnode instead?  Problem is: what
566 			 * credentials do we use for determination? What if
567 			 * proc does a setuid?
568 			 */
569 			maxprot = VM_PROT_EXECUTE;      /* TODO: Remove this and restrict maxprot? */
570 			if (fp->fp_glob->fg_flag & FREAD) {
571 				maxprot |= VM_PROT_READ;
572 			} else if (prot & PROT_READ) {
573 				(void)vnode_put(vp);
574 				error = EACCES;
575 				goto bad;
576 			}
577 			/*
578 			 * If we are sharing potential changes (either via
579 			 * MAP_SHARED or via the implicit sharing of character
580 			 * device mappings), and we are trying to get write
581 			 * permission although we opened it without asking
582 			 * for it, bail out.
583 			 */
584 
585 			if ((flags & MAP_SHARED) != 0) {
586 				if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
587 				    /*
588 				     * Do not allow writable mappings of
589 				     * swap files (see vm_swapfile_pager.c).
590 				     */
591 				    !vnode_isswap(vp)) {
592 					/*
593 					 * check for write access
594 					 *
595 					 * Note that we already made this check when granting FWRITE
596 					 * against the file, so it seems redundant here.
597 					 */
598 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
599 
600 					/* if not granted for any reason, but we wanted it, bad */
601 					if ((prot & PROT_WRITE) && (error != 0)) {
602 						vnode_put(vp);
603 						goto bad;
604 					}
605 
606 					/* if writable, remember */
607 					if (error == 0) {
608 						maxprot |= VM_PROT_WRITE;
609 					}
610 				} else if ((prot & PROT_WRITE) != 0) {
611 					(void)vnode_put(vp);
612 					error = EACCES;
613 					goto bad;
614 				}
615 			} else {
616 				maxprot |= VM_PROT_WRITE;
617 			}
618 
619 			handle = (void *)vp;
620 #if CONFIG_MACF
621 			error = mac_file_check_mmap(vfs_context_ucred(ctx),
622 			    fp->fp_glob, prot, flags, file_pos + pageoff,
623 			    &maxprot);
624 			if (error) {
625 				(void)vnode_put(vp);
626 				goto bad;
627 			}
628 #endif /* MAC */
629 			/*
630 			 * Consult the file system to determine if this
631 			 * particular file object can be mapped.
632 			 *
633 			 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
634 			 * then we don't check for writeability on the file
635 			 * object, because it will only ever see reads.
636 			 */
637 			error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
638 			    (prot & ~PROT_WRITE) : prot, ctx);
639 			if (error) {
640 				(void)vnode_put(vp);
641 				goto bad;
642 			}
643 		}
644 
645 		/*
646 		 * No copy-on-read for mmap() mappings themselves.
647 		 */
648 		vmk_flags.vmkf_no_copy_on_read = 1;
649 #if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
650 		/* force file ranges on !macOS */
651 		vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
652 #endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
653 	}
654 
655 	if (user_size == 0) {
656 		if (!mapanon) {
657 			(void)vnode_put(vp);
658 		}
659 		error = 0;
660 		goto bad;
661 	}
662 
663 	/*
664 	 *	We bend a little - round the start and end addresses
665 	 *	to the nearest page boundary.
666 	 */
667 	user_size = vm_map_round_page(user_size,
668 	    vm_map_page_mask(user_map));
669 
670 	if (file_pos & vm_map_page_mask(user_map)) {
671 		if (!mapanon) {
672 			(void)vnode_put(vp);
673 		}
674 		error = EINVAL;
675 		goto bad;
676 	}
677 
678 	if ((flags & MAP_FIXED) == 0) {
679 		user_addr = vm_map_round_page(user_addr,
680 		    vm_map_page_mask(user_map));
681 	} else {
682 		if (user_addr != vm_map_trunc_page(user_addr,
683 		    vm_map_page_mask(user_map))) {
684 			if (!mapanon) {
685 				(void)vnode_put(vp);
686 			}
687 			error = EINVAL;
688 			goto bad;
689 		}
690 		/*
691 		 * mmap(MAP_FIXED) will replace any existing mappings in the
692 		 * specified range, if the new mapping is successful.
693 		 * If we just deallocate the specified address range here,
694 		 * another thread might jump in and allocate memory in that
695 		 * range before we get a chance to establish the new mapping,
696 		 * and we won't have a chance to restore the old mappings.
697 		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
698 		 * has to deallocate the existing mappings and establish the
699 		 * new ones atomically.
700 		 */
701 		vmk_flags.vmf_fixed = true;
702 		vmk_flags.vmf_overwrite = true;
703 	}
704 
705 	if (flags & MAP_NOCACHE) {
706 		vmk_flags.vmf_no_cache = true;
707 	}
708 
709 	if (flags & MAP_JIT) {
710 		vmk_flags.vmkf_map_jit = TRUE;
711 	}
712 
713 	if (flags & MAP_TPRO) {
714 		vmk_flags.vmf_tpro = true;
715 	}
716 
717 #if CONFIG_ROSETTA
718 	if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
719 		if (!proc_is_translated(p)) {
720 			if (!mapanon) {
721 				(void)vnode_put(vp);
722 			}
723 			error = EINVAL;
724 			goto bad;
725 		}
726 		vmk_flags.vmkf_translated_allow_execute = TRUE;
727 	}
728 #endif
729 
730 	if (flags & MAP_RESILIENT_CODESIGN) {
731 		vmk_flags.vmf_resilient_codesign = true;
732 	}
733 	if (flags & MAP_RESILIENT_MEDIA) {
734 		vmk_flags.vmf_resilient_media = true;
735 	}
736 
737 #if XNU_TARGET_OS_OSX
738 	/* macOS-specific MAP_32BIT flag handling */
739 	if (flags & MAP_32BIT) {
740 		vmk_flags.vmkf_32bit_map_va = TRUE;
741 	}
742 #endif
743 
744 	/*
745 	 * Lookup/allocate object.
746 	 */
747 	if (handle == NULL) {
748 		control = NULL;
749 #ifdef notyet
750 /* Hmm .. */
751 #if defined(VM_PROT_READ_IS_EXEC)
752 		if (prot & VM_PROT_READ) {
753 			prot |= VM_PROT_EXECUTE;
754 		}
755 		if (maxprot & VM_PROT_READ) {
756 			maxprot |= VM_PROT_EXECUTE;
757 		}
758 #endif
759 #endif
760 
761 #if 3777787
762 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
763 			prot |= VM_PROT_READ;
764 		}
765 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
766 			maxprot |= VM_PROT_READ;
767 		}
768 #endif  /* radar 3777787 */
769 map_anon_retry:
770 
771 		result = vm_map_enter_mem_object(user_map,
772 		    &user_addr, user_size,
773 		    0, vmk_flags,
774 		    IPC_PORT_NULL, 0, FALSE,
775 		    prot, maxprot,
776 		    (flags & MAP_SHARED) ?
777 		    VM_INHERIT_SHARE :
778 		    VM_INHERIT_DEFAULT);
779 
780 		/* If a non-binding address was specified for this anonymous
781 		 * mapping, retry the mapping with a zero base
782 		 * in the event the mapping operation failed due to
783 		 * lack of space between the address and the map's maximum.
784 		 */
785 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
786 			user_addr = vm_map_page_size(user_map);
787 			goto map_anon_retry;
788 		}
789 	} else {
790 		if (vnode_isswap(vp)) {
791 			/*
792 			 * Map swap files with a special pager
793 			 * that returns obfuscated contents.
794 			 */
795 			control = NULL;
796 			pager = swapfile_pager_setup(vp);
797 			if (pager != MEMORY_OBJECT_NULL) {
798 				control = swapfile_pager_control(pager);
799 			}
800 		} else {
801 			control = ubc_getobject(vp, UBC_FLAGS_NONE);
802 		}
803 
804 		if (control == NULL) {
805 			(void)vnode_put(vp);
806 			error = ENOMEM;
807 			goto bad;
808 		}
809 
810 #if FBDP_DEBUG_OBJECT_NO_PAGER
811 //#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
812 #define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
813 #define FBDP_FILE_NAME1 "icutz44l.dat"
814 #define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
815 #define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
816 #define FBDP_FILE_NAME_END2 "-v2.csstore"
817 		if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
818 			char *path;
819 			int len;
820 			bool already_tracked;
821 			len = MAXPATHLEN;
822 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
823 			vn_getpath(vp, path, &len);
824 			if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
825 				if (memory_object_mark_as_tracked(control,
826 				    true,
827 				    &already_tracked) == KERN_SUCCESS &&
828 				    !already_tracked) {
829 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
830 				}
831 			}
832 			zfree(ZV_NAMEI, path);
833 		} else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
834 		    strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
835 		    !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
836 		    FBDP_FILE_NAME_END2,
837 		    strlen(FBDP_FILE_NAME_END2))) {
838 			char *path;
839 			int len;
840 			bool already_tracked;
841 			len = MAXPATHLEN;
842 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
843 			vn_getpath(vp, path, &len);
844 			if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
845 				if (memory_object_mark_as_tracked(control,
846 				    true,
847 				    &already_tracked) == KERN_SUCCESS &&
848 				    !already_tracked) {
849 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
850 				}
851 			}
852 			zfree(ZV_NAMEI, path);
853 		}
854 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
855 
856 		/*
857 		 *  Set credentials:
858 		 *	FIXME: if we're writing the file we need a way to
859 		 *      ensure that someone doesn't replace our R/W creds
860 		 *      with ones that only work for read.
861 		 */
862 
863 		ubc_setthreadcred(vp, p, current_thread());
864 		docow = FALSE;
865 		if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
866 			docow = TRUE;
867 		}
868 
869 #ifdef notyet
870 /* Hmm .. */
871 #if defined(VM_PROT_READ_IS_EXEC)
872 		if (prot & VM_PROT_READ) {
873 			prot |= VM_PROT_EXECUTE;
874 		}
875 		if (maxprot & VM_PROT_READ) {
876 			maxprot |= VM_PROT_EXECUTE;
877 		}
878 #endif
879 #endif /* notyet */
880 
881 #if 3777787
882 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
883 			prot |= VM_PROT_READ;
884 		}
885 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
886 			maxprot |= VM_PROT_READ;
887 		}
888 #endif  /* radar 3777787 */
889 
890 map_file_retry:
891 		if (flags & MAP_RESILIENT_CODESIGN) {
892 			int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
893 			if (prot & reject_prot) {
894 				/*
895 				 * Would like to use (prot | maxprot) here
896 				 * but the assignment of VM_PROT_EXECUTE
897 				 * to maxprot above would always fail the test.
898 				 *
899 				 * Skipping the check is ok, however, because we
900 				 * restrict maxprot to prot just below in this
901 				 * block.
902 				 */
903 				assert(!mapanon);
904 				vnode_put(vp);
905 				error = EPERM;
906 				goto bad;
907 			}
908 			/* strictly limit access to "prot" */
909 			maxprot &= prot;
910 		}
911 
912 		vm_object_offset_t end_pos = 0;
913 		if (os_add_overflow(user_size, file_pos, &end_pos)) {
914 			vnode_put(vp);
915 			error = EINVAL;
916 			goto bad;
917 		}
918 
919 		result = vm_map_enter_mem_object_control(user_map,
920 		    &user_addr, user_size,
921 		    0, vmk_flags,
922 		    control, file_pos,
923 		    docow, prot, maxprot,
924 		    (flags & MAP_SHARED) ?
925 		    VM_INHERIT_SHARE :
926 		    VM_INHERIT_DEFAULT);
927 
928 		/* If a non-binding address was specified for this file backed
929 		 * mapping, retry the mapping with a zero base
930 		 * in the event the mapping operation failed due to
931 		 * lack of space between the address and the map's maximum.
932 		 */
933 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
934 			user_addr = vm_map_page_size(user_map);
935 			goto map_file_retry;
936 		}
937 	}
938 
939 	if (!mapanon) {
940 		(void)vnode_put(vp);
941 	}
942 
943 	switch (result) {
944 	case KERN_SUCCESS:
945 		*retval = user_addr + pageoff;
946 		error = 0;
947 		break;
948 	case KERN_INVALID_ADDRESS:
949 	case KERN_NO_SPACE:
950 		error =  ENOMEM;
951 		break;
952 	case KERN_PROTECTION_FAILURE:
953 		error =  EACCES;
954 		break;
955 	default:
956 		error =  EINVAL;
957 		break;
958 	}
959 bad:
960 	if (pager != MEMORY_OBJECT_NULL) {
961 		/*
962 		 * Release the reference on the pager.
963 		 * If the mapping was successful, it now holds
964 		 * an extra reference.
965 		 */
966 		memory_object_deallocate(pager);
967 	}
968 	if (fpref) {
969 		fp_drop(p, fd, fp, 0);
970 	}
971 
972 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
973 #if XNU_TARGET_OS_OSX
974 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
975 	    (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
976 #endif /* XNU_TARGET_OS_OSX */
977 	return error;
978 }
979 
980 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)981 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
982 {
983 	__pthread_testcancel(1);
984 	return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
985 }
986 
987 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)988 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
989 {
990 	mach_vm_offset_t addr;
991 	mach_vm_size_t size;
992 	int flags;
993 	vm_map_t user_map;
994 	int rv;
995 	vm_sync_t sync_flags = 0;
996 
997 	user_map = current_map();
998 	addr = (mach_vm_offset_t) uap->addr;
999 	size = (mach_vm_size_t) uap->len;
1000 #if XNU_TARGET_OS_OSX
1001 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1002 #endif /* XNU_TARGET_OS_OSX */
1003 	if (vm_map_range_overflows(user_map, addr, size)) {
1004 		return EINVAL;
1005 	}
1006 	if (addr & vm_map_page_mask(user_map)) {
1007 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1008 		return EINVAL;
1009 	}
1010 	if (size == 0) {
1011 		/*
1012 		 * We cannot support this properly without maintaining
1013 		 * list all mmaps done. Cannot use vm_map_entry as they could be
1014 		 * split or coalesced by indepenedant actions. So instead of
1015 		 * inaccurate results, lets just return error as invalid size
1016 		 * specified
1017 		 */
1018 		return EINVAL; /* XXX breaks posix apps */
1019 	}
1020 
1021 	flags = uap->flags;
1022 	/* disallow contradictory flags */
1023 	if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1024 		return EINVAL;
1025 	}
1026 
1027 	if (flags & MS_KILLPAGES) {
1028 		sync_flags |= VM_SYNC_KILLPAGES;
1029 	}
1030 	if (flags & MS_DEACTIVATE) {
1031 		sync_flags |= VM_SYNC_DEACTIVATE;
1032 	}
1033 	if (flags & MS_INVALIDATE) {
1034 		sync_flags |= VM_SYNC_INVALIDATE;
1035 	}
1036 
1037 	if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1038 		if (flags & MS_ASYNC) {
1039 			sync_flags |= VM_SYNC_ASYNCHRONOUS;
1040 		} else {
1041 			sync_flags |= VM_SYNC_SYNCHRONOUS;
1042 		}
1043 	}
1044 
1045 	sync_flags |= VM_SYNC_CONTIGUOUS;       /* complain if holes */
1046 
1047 	rv = mach_vm_msync(user_map, addr, size, sync_flags);
1048 
1049 	switch (rv) {
1050 	case KERN_SUCCESS:
1051 		break;
1052 	case KERN_INVALID_ADDRESS:      /* hole in region being sync'ed */
1053 		return ENOMEM;
1054 	case KERN_FAILURE:
1055 		return EIO;
1056 	default:
1057 		return EINVAL;
1058 	}
1059 	return 0;
1060 }
1061 
1062 
1063 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1064 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1065 {
1066 	mach_vm_offset_t        user_addr;
1067 	mach_vm_size_t          user_size;
1068 	kern_return_t           result;
1069 	vm_map_t                user_map;
1070 
1071 	user_map = current_map();
1072 	user_addr = (mach_vm_offset_t) uap->addr;
1073 	user_size = (mach_vm_size_t) uap->len;
1074 
1075 	AUDIT_ARG(addr, user_addr);
1076 	AUDIT_ARG(len, user_size);
1077 
1078 	if (user_addr & vm_map_page_mask(user_map)) {
1079 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1080 		return EINVAL;
1081 	}
1082 
1083 	if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1084 		return EINVAL;
1085 	}
1086 
1087 	if (user_size == 0) {
1088 		/* UNIX SPEC: size is 0, return EINVAL */
1089 		return EINVAL;
1090 	}
1091 
1092 	result = mach_vm_deallocate(user_map, user_addr, user_size);
1093 	if (result != KERN_SUCCESS) {
1094 		return EINVAL;
1095 	}
1096 	return 0;
1097 }
1098 
1099 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1100 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1101 {
1102 	vm_prot_t prot;
1103 	mach_vm_offset_t        user_addr;
1104 	mach_vm_size_t  user_size;
1105 	kern_return_t   result;
1106 	vm_map_t        user_map;
1107 #if CONFIG_MACF
1108 	int error;
1109 #endif
1110 
1111 	AUDIT_ARG(addr, uap->addr);
1112 	AUDIT_ARG(len, uap->len);
1113 	AUDIT_ARG(value32, uap->prot);
1114 
1115 	user_map = current_map();
1116 	user_addr = (mach_vm_offset_t) uap->addr;
1117 	user_size = (mach_vm_size_t) uap->len;
1118 	prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1119 
1120 	if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1121 		return EINVAL;
1122 	}
1123 	if (user_addr & vm_map_page_mask(user_map)) {
1124 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1125 		return EINVAL;
1126 	}
1127 
1128 #ifdef notyet
1129 /* Hmm .. */
1130 #if defined(VM_PROT_READ_IS_EXEC)
1131 	if (prot & VM_PROT_READ) {
1132 		prot |= VM_PROT_EXECUTE;
1133 	}
1134 #endif
1135 #endif /* notyet */
1136 
1137 #if 3936456
1138 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1139 		prot |= VM_PROT_READ;
1140 	}
1141 #endif  /* 3936456 */
1142 
1143 #if CONFIG_MACF
1144 	/*
1145 	 * The MAC check for mprotect is of limited use for 2 reasons:
1146 	 * Without mmap revocation, the caller could have asked for the max
1147 	 * protections initially instead of a reduced set, so a mprotect
1148 	 * check would offer no new security.
1149 	 * It is not possible to extract the vnode from the pager object(s)
1150 	 * of the target memory range.
1151 	 * However, the MAC check may be used to prevent a process from,
1152 	 * e.g., making the stack executable.
1153 	 */
1154 	error = mac_proc_check_mprotect(p, user_addr,
1155 	    user_size, prot);
1156 	if (error) {
1157 		return error;
1158 	}
1159 #endif
1160 
1161 	if (prot & VM_PROT_TRUSTED) {
1162 #if CONFIG_DYNAMIC_CODE_SIGNING
1163 		/* CODE SIGNING ENFORCEMENT - JIT support */
1164 		/* The special protection value VM_PROT_TRUSTED requests that we treat
1165 		 * this page as if it had a valid code signature.
1166 		 * If this is enabled, there MUST be a MAC policy implementing the
1167 		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1168 		 * compromised because the check would always succeed and thusly any
1169 		 * process could sign dynamically. */
1170 		result = vm_map_sign(
1171 			user_map,
1172 			vm_map_trunc_page(user_addr,
1173 			vm_map_page_mask(user_map)),
1174 			vm_map_round_page(user_addr + user_size,
1175 			vm_map_page_mask(user_map)));
1176 		switch (result) {
1177 		case KERN_SUCCESS:
1178 			break;
1179 		case KERN_INVALID_ADDRESS:
1180 			/* UNIX SPEC: for an invalid address range, return ENOMEM */
1181 			return ENOMEM;
1182 		default:
1183 			return EINVAL;
1184 		}
1185 #else
1186 		return ENOTSUP;
1187 #endif
1188 	}
1189 	prot &= ~VM_PROT_TRUSTED;
1190 
1191 	result = mach_vm_protect(user_map, user_addr, user_size,
1192 	    FALSE, prot);
1193 	switch (result) {
1194 	case KERN_SUCCESS:
1195 		return 0;
1196 	case KERN_PROTECTION_FAILURE:
1197 		return EACCES;
1198 	case KERN_INVALID_ADDRESS:
1199 		/* UNIX SPEC: for an invalid address range, return ENOMEM */
1200 		return ENOMEM;
1201 	}
1202 	return EINVAL;
1203 }
1204 
1205 
1206 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1207 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1208 {
1209 	mach_vm_offset_t addr;
1210 	mach_vm_size_t size;
1211 	vm_inherit_t inherit;
1212 	vm_map_t        user_map;
1213 	kern_return_t   result;
1214 
1215 	AUDIT_ARG(addr, uap->addr);
1216 	AUDIT_ARG(len, uap->len);
1217 	AUDIT_ARG(value32, uap->inherit);
1218 
1219 	user_map = current_map();
1220 	addr = (mach_vm_offset_t)uap->addr;
1221 	size = (mach_vm_size_t)uap->len;
1222 	inherit = uap->inherit;
1223 	if (vm_map_range_overflows(user_map, addr, size)) {
1224 		return EINVAL;
1225 	}
1226 	result = mach_vm_inherit(user_map, addr, size,
1227 	    inherit);
1228 	switch (result) {
1229 	case KERN_SUCCESS:
1230 		return 0;
1231 	case KERN_PROTECTION_FAILURE:
1232 		return EACCES;
1233 	}
1234 	return EINVAL;
1235 }
1236 
1237 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1238 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1239 {
1240 	vm_map_t user_map;
1241 	mach_vm_offset_t start;
1242 	mach_vm_size_t size;
1243 	vm_behavior_t new_behavior;
1244 	kern_return_t   result;
1245 
1246 	/*
1247 	 * Since this routine is only advisory, we default to conservative
1248 	 * behavior.
1249 	 */
1250 	switch (uap->behav) {
1251 	case MADV_RANDOM:
1252 		new_behavior = VM_BEHAVIOR_RANDOM;
1253 		break;
1254 	case MADV_SEQUENTIAL:
1255 		new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1256 		break;
1257 	case MADV_NORMAL:
1258 		new_behavior = VM_BEHAVIOR_DEFAULT;
1259 		break;
1260 	case MADV_WILLNEED:
1261 		new_behavior = VM_BEHAVIOR_WILLNEED;
1262 		break;
1263 	case MADV_DONTNEED:
1264 		new_behavior = VM_BEHAVIOR_DONTNEED;
1265 		break;
1266 	case MADV_FREE:
1267 		new_behavior = VM_BEHAVIOR_FREE;
1268 		break;
1269 	case MADV_ZERO_WIRED_PAGES:
1270 		new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1271 		break;
1272 	case MADV_FREE_REUSABLE:
1273 		new_behavior = VM_BEHAVIOR_REUSABLE;
1274 		break;
1275 	case MADV_FREE_REUSE:
1276 		new_behavior = VM_BEHAVIOR_REUSE;
1277 		break;
1278 	case MADV_CAN_REUSE:
1279 		new_behavior = VM_BEHAVIOR_CAN_REUSE;
1280 		break;
1281 	case MADV_PAGEOUT:
1282 #if MACH_ASSERT
1283 		new_behavior = VM_BEHAVIOR_PAGEOUT;
1284 		break;
1285 #else /* MACH_ASSERT */
1286 		return ENOTSUP;
1287 #endif /* MACH_ASSERT */
1288 	case MADV_ZERO:
1289 		new_behavior = VM_BEHAVIOR_ZERO;
1290 		break;
1291 	default:
1292 		return EINVAL;
1293 	}
1294 
1295 	user_map = current_map();
1296 	start = (mach_vm_offset_t) uap->addr;
1297 	size = (mach_vm_size_t) uap->len;
1298 	if (vm_map_range_overflows(user_map, start, size)) {
1299 		return EINVAL;
1300 	}
1301 #if __arm64__
1302 	if (start == 0 &&
1303 	    size != 0 &&
1304 	    (uap->behav == MADV_FREE ||
1305 	    uap->behav == MADV_FREE_REUSABLE)) {
1306 		printf("** FOURK_COMPAT: %d[%s] "
1307 		    "failing madvise(0x%llx,0x%llx,%s)\n",
1308 		    proc_getpid(p), p->p_comm, start, size,
1309 		    ((uap->behav == MADV_FREE_REUSABLE)
1310 		    ? "MADV_FREE_REUSABLE"
1311 		    : "MADV_FREE"));
1312 		DTRACE_VM3(fourk_compat_madvise,
1313 		    uint64_t, start,
1314 		    uint64_t, size,
1315 		    int, uap->behav);
1316 		return EINVAL;
1317 	}
1318 #endif /* __arm64__ */
1319 
1320 	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1321 	switch (result) {
1322 	case KERN_SUCCESS:
1323 		return 0;
1324 	case KERN_INVALID_ADDRESS:
1325 		return EINVAL;
1326 	case KERN_NO_SPACE:
1327 		return ENOMEM;
1328 	case KERN_PROTECTION_FAILURE:
1329 		return EPERM;
1330 	case KERN_NO_ACCESS:
1331 		return ENOTSUP;
1332 	}
1333 
1334 	return EINVAL;
1335 }
1336 
1337 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1338 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1339 {
1340 	mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1341 	vm_map_t map = VM_MAP_NULL;
1342 	user_addr_t vec = 0;
1343 	int error = 0;
1344 	int64_t lastvecindex = 0;
1345 	int mincoreinfo = 0;
1346 	int pqueryinfo = 0;
1347 	uint64_t pqueryinfo_vec_size = 0;
1348 	vm_page_info_basic_t info = NULL;
1349 	mach_msg_type_number_t count = 0;
1350 	char *kernel_vec = NULL;
1351 	uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1352 	kern_return_t kr = KERN_SUCCESS;
1353 	int effective_page_shift, effective_page_size;
1354 
1355 	map = current_map();
1356 
1357 	/*
1358 	 * On systems with 4k kernel space and 16k user space, we will
1359 	 * use the kernel page size to report back the residency information.
1360 	 * This is for backwards compatibility since we already have
1361 	 * processes that depend on this behavior.
1362 	 */
1363 	if (vm_map_page_shift(map) < PAGE_SHIFT) {
1364 		effective_page_shift = vm_map_page_shift(map);
1365 		effective_page_size = vm_map_page_size(map);
1366 	} else {
1367 		effective_page_shift = PAGE_SHIFT;
1368 		effective_page_size = PAGE_SIZE;
1369 	}
1370 
1371 	/*
1372 	 * Make sure that the addresses presented are valid for user
1373 	 * mode.
1374 	 */
1375 	first_addr = addr = vm_map_trunc_page(uap->addr,
1376 	    vm_map_page_mask(map));
1377 	end = vm_map_round_page(uap->addr + uap->len,
1378 	    vm_map_page_mask(map));
1379 
1380 	if (end < addr) {
1381 		return EINVAL;
1382 	}
1383 
1384 	if (end == addr) {
1385 		return 0;
1386 	}
1387 
1388 	/*
1389 	 * We are going to loop through the whole 'req_vec_size' pages
1390 	 * range in chunks of 'cur_vec_size'.
1391 	 */
1392 
1393 	req_vec_size_pages = (end - addr) >> effective_page_shift;
1394 	cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1395 	size_t kernel_vec_size = cur_vec_size_pages;
1396 
1397 	kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1398 
1399 	if (kernel_vec == NULL) {
1400 		return ENOMEM;
1401 	}
1402 
1403 	/*
1404 	 * Address of byte vector
1405 	 */
1406 	vec = uap->vec;
1407 
1408 	pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1409 
1410 	info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1411 
1412 	if (info == NULL) {
1413 		kfree_data(kernel_vec, kernel_vec_size);
1414 		return ENOMEM;
1415 	}
1416 
1417 	while (addr < end) {
1418 		cur_end = addr + (cur_vec_size_pages * effective_page_size);
1419 
1420 		count =  VM_PAGE_INFO_BASIC_COUNT;
1421 		kr = vm_map_page_range_info_internal(map,
1422 		    addr,
1423 		    cur_end,
1424 		    effective_page_shift,
1425 		    VM_PAGE_INFO_BASIC,
1426 		    (vm_page_info_t) info,
1427 		    &count);
1428 
1429 		assert(kr == KERN_SUCCESS);
1430 
1431 		/*
1432 		 * Do this on a map entry basis so that if the pages are not
1433 		 * in the current processes address space, we can easily look
1434 		 * up the pages elsewhere.
1435 		 */
1436 		lastvecindex = -1;
1437 
1438 		for (; addr < cur_end; addr += effective_page_size) {
1439 			pqueryinfo = info[lastvecindex + 1].disposition;
1440 
1441 			mincoreinfo = 0;
1442 
1443 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1444 				mincoreinfo |= MINCORE_INCORE;
1445 			}
1446 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1447 				mincoreinfo |= MINCORE_REFERENCED;
1448 			}
1449 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1450 				mincoreinfo |= MINCORE_MODIFIED;
1451 			}
1452 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1453 				mincoreinfo |= MINCORE_PAGED_OUT;
1454 			}
1455 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1456 				mincoreinfo |= MINCORE_COPIED;
1457 			}
1458 			if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1459 				mincoreinfo |= MINCORE_ANONYMOUS;
1460 			}
1461 			/*
1462 			 * calculate index into user supplied byte vector
1463 			 */
1464 			vecindex = (addr - first_addr) >> effective_page_shift;
1465 			kernel_vec[vecindex] = (char)mincoreinfo;
1466 			lastvecindex = vecindex;
1467 		}
1468 
1469 
1470 		assert(vecindex == (cur_vec_size_pages - 1));
1471 
1472 		error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1473 
1474 		if (error) {
1475 			break;
1476 		}
1477 
1478 		/*
1479 		 * For the next chunk, we'll need:
1480 		 * - bump the location in the user buffer for our next disposition.
1481 		 * - new length
1482 		 * - starting address
1483 		 */
1484 		vec += cur_vec_size_pages * sizeof(char);
1485 		req_vec_size_pages = (end - addr) >> effective_page_shift;
1486 		cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1487 
1488 		first_addr = addr;
1489 	}
1490 
1491 	kfree_data(info, pqueryinfo_vec_size);
1492 	kfree_data(kernel_vec, kernel_vec_size);
1493 
1494 	if (error) {
1495 		return EFAULT;
1496 	}
1497 
1498 	return 0;
1499 }
1500 
1501 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1502 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1503 {
1504 	vm_map_t user_map;
1505 	vm_map_offset_t addr;
1506 	vm_map_size_t size, pageoff;
1507 	kern_return_t   result;
1508 
1509 	AUDIT_ARG(addr, uap->addr);
1510 	AUDIT_ARG(len, uap->len);
1511 
1512 	user_map = current_map();
1513 	addr = (vm_map_offset_t) uap->addr;
1514 	size = (vm_map_size_t)uap->len;
1515 
1516 	if (vm_map_range_overflows(user_map, addr, size)) {
1517 		return EINVAL;
1518 	}
1519 
1520 	if (size == 0) {
1521 		return 0;
1522 	}
1523 
1524 	pageoff = (addr & vm_map_page_mask(user_map));
1525 	addr -= pageoff;
1526 	size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1527 
1528 	/* have to call vm_map_wire directly to pass "I don't know" protections */
1529 	result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1530 
1531 	if (result == KERN_RESOURCE_SHORTAGE) {
1532 		return EAGAIN;
1533 	} else if (result == KERN_PROTECTION_FAILURE) {
1534 		return EACCES;
1535 	} else if (result != KERN_SUCCESS) {
1536 		return ENOMEM;
1537 	}
1538 
1539 	return 0;       /* KERN_SUCCESS */
1540 }
1541 
1542 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1543 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1544 {
1545 	mach_vm_offset_t addr;
1546 	mach_vm_size_t size;
1547 	vm_map_t user_map;
1548 	kern_return_t   result;
1549 
1550 	AUDIT_ARG(addr, uap->addr);
1551 	AUDIT_ARG(len, uap->len);
1552 
1553 	addr = (mach_vm_offset_t) uap->addr;
1554 	size = (mach_vm_size_t)uap->len;
1555 	user_map = current_map();
1556 	if (vm_map_range_overflows(user_map, addr, size)) {
1557 		return EINVAL;
1558 	}
1559 	/* JMM - need to remove all wirings by spec - this just removes one */
1560 	result = mach_vm_wire_kernel(user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1561 	return result == KERN_SUCCESS ? 0 : ENOMEM;
1562 }
1563 
1564 
1565 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1566 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1567 {
1568 	return ENOSYS;
1569 }
1570 
1571 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1572 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1573 {
1574 	return ENOSYS;
1575 }
1576 
1577 #if CONFIG_CODE_DECRYPTION
1578 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1579 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1580 {
1581 	mach_vm_offset_t    user_addr;
1582 	mach_vm_size_t      user_size;
1583 	kern_return_t       result;
1584 	vm_map_t    user_map;
1585 	uint32_t    cryptid;
1586 	cpu_type_t  cputype;
1587 	cpu_subtype_t       cpusubtype;
1588 	pager_crypt_info_t  crypt_info;
1589 	const char * cryptname = 0;
1590 	char *vpath;
1591 	int len, ret;
1592 	struct proc_regioninfo_internal pinfo;
1593 	vnode_t vp;
1594 	uintptr_t vnodeaddr;
1595 	uint32_t vid;
1596 
1597 	AUDIT_ARG(addr, uap->addr);
1598 	AUDIT_ARG(len, uap->len);
1599 
1600 	user_map = current_map();
1601 	user_addr = (mach_vm_offset_t) uap->addr;
1602 	user_size = (mach_vm_size_t) uap->len;
1603 
1604 	cryptid = uap->cryptid;
1605 	cputype = uap->cputype;
1606 	cpusubtype = uap->cpusubtype;
1607 
1608 	if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1609 		return EINVAL;
1610 	}
1611 	if (user_addr & vm_map_page_mask(user_map)) {
1612 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1613 		return EINVAL;
1614 	}
1615 
1616 	switch (cryptid) {
1617 	case CRYPTID_NO_ENCRYPTION:
1618 		/* not encrypted, just an empty load command */
1619 		return 0;
1620 	case CRYPTID_APP_ENCRYPTION:
1621 	case CRYPTID_MODEL_ENCRYPTION:
1622 		cryptname = "com.apple.unfree";
1623 		break;
1624 	case 0x10:
1625 		/* some random cryptid that you could manually put into
1626 		 * your binary if you want NULL */
1627 		cryptname = "com.apple.null";
1628 		break;
1629 	default:
1630 		return EINVAL;
1631 	}
1632 
1633 	if (NULL == text_crypter_create) {
1634 		return ENOTSUP;
1635 	}
1636 
1637 	ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1638 	if (ret == 0 || !vnodeaddr) {
1639 		/* No really, this returns 0 if the memory address is not backed by a file */
1640 		return EINVAL;
1641 	}
1642 
1643 	vp = (vnode_t)vnodeaddr;
1644 	if ((vnode_getwithvid(vp, vid)) == 0) {
1645 		vpath = zalloc(ZV_NAMEI);
1646 
1647 		len = MAXPATHLEN;
1648 		ret = vn_getpath(vp, vpath, &len);
1649 		if (ret) {
1650 			zfree(ZV_NAMEI, vpath);
1651 			vnode_put(vp);
1652 			return ret;
1653 		}
1654 
1655 		vnode_put(vp);
1656 	} else {
1657 		return EINVAL;
1658 	}
1659 
1660 #if 0
1661 	kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1662 	    __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1663 #endif
1664 
1665 	if (user_size == 0) {
1666 		printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1667 		zfree(ZV_NAMEI, vpath);
1668 		return 0;
1669 	}
1670 
1671 	/* set up decrypter first */
1672 	crypt_file_data_t crypt_data = {
1673 		.filename = vpath,
1674 		.cputype = cputype,
1675 		.cpusubtype = cpusubtype,
1676 		.origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1677 	};
1678 	result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1679 #if VM_MAP_DEBUG_APPLE_PROTECT
1680 	if (vm_map_debug_apple_protect) {
1681 		printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1682 		    proc_getpid(p), p->p_comm,
1683 		    user_map,
1684 		    (uint64_t) user_addr,
1685 		    (uint64_t) (user_addr + user_size),
1686 		    __FUNCTION__, vpath, result);
1687 	}
1688 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1689 	zfree(ZV_NAMEI, vpath);
1690 
1691 	if (result) {
1692 		printf("%s: unable to create decrypter %s, kr=%d\n",
1693 		    __FUNCTION__, cryptname, result);
1694 		if (result == kIOReturnNotPrivileged) {
1695 			/* text encryption returned decryption failure */
1696 			return EPERM;
1697 		} else {
1698 			return ENOMEM;
1699 		}
1700 	}
1701 
1702 	/* now remap using the decrypter */
1703 	vm_object_offset_t crypto_backing_offset;
1704 	crypto_backing_offset = -1; /* i.e. use map entry's offset */
1705 	result = vm_map_apple_protected(user_map,
1706 	    user_addr,
1707 	    user_addr + user_size,
1708 	    crypto_backing_offset,
1709 	    &crypt_info,
1710 	    cryptid);
1711 	if (result) {
1712 		printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1713 	}
1714 
1715 	if (result) {
1716 		return EPERM;
1717 	}
1718 	return 0;
1719 }
1720 #endif /* CONFIG_CODE_DECRYPTION */
1721