xref: /xnu-8020.121.3/bsd/kern/kern_mman.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988 University of Utah.
30  * Copyright (c) 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * the Systems Programming Group of the University of Utah Computer
35  * Science Department.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66  *
67  *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 /*
77  * Mapped file (mmap) interface to VM
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102 
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105 
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117 
118 #include <machine/machine_routines.h>
119 
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124 
125 #include <IOKit/IOReturn.h>
126 #include <IOKit/IOBSD.h>
127 
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132 
133 #if CONFIG_MACF
134 #include <security/mac_framework.h>
135 #endif
136 #include <os/overflow.h>
137 
138 /*
139  * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140  * from dyld_priv.h. this way we can consistently deny / allow allocations based
141  * on SDK version at fall 2020 level. Compare output to proc_sdk(current_proc())
142  */
143 static uint32_t
proc_2020_fall_os_sdk(void)144 proc_2020_fall_os_sdk(void)
145 {
146 	switch (proc_platform(current_proc())) {
147 	case PLATFORM_MACOS:
148 		return 0x000a1000; // DYLD_MACOSX_VERSION_10_16
149 	case PLATFORM_IOS:
150 	case PLATFORM_IOSSIMULATOR:
151 	case PLATFORM_MACCATALYST:
152 		return 0x000e0000; // DYLD_IOS_VERSION_14_0
153 	case PLATFORM_BRIDGEOS:
154 		return 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
155 	case PLATFORM_TVOS:
156 	case PLATFORM_TVOSSIMULATOR:
157 		return 0x000e0000; // DYLD_TVOS_VERSION_14_0
158 	case PLATFORM_WATCHOS:
159 	case PLATFORM_WATCHOSSIMULATOR:
160 		return 0x00070000; // DYLD_WATCHOS_VERSION_7_0
161 	default:
162 		return 0;
163 	}
164 }
165 
166 /*
167  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
168  * XXX usage is PROT_* from an interface perspective.  Thus the values of
169  * XXX VM_PROT_* and PROT_* need to correspond.
170  */
171 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)172 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
173 {
174 	/*
175 	 *	Map in special device (must be SHARED) or file
176 	 */
177 	struct fileproc *fp;
178 	struct                  vnode *vp;
179 	int                     flags;
180 	int                     prot;
181 	int                     err = 0;
182 	vm_map_t                user_map;
183 	kern_return_t           result;
184 	vm_map_offset_t         user_addr;
185 	vm_map_offset_t         sum;
186 	vm_map_size_t           user_size;
187 	vm_object_offset_t      pageoff;
188 	vm_object_offset_t      file_pos;
189 	int                     alloc_flags = 0;
190 	vm_tag_t                tag = VM_KERN_MEMORY_NONE;
191 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
192 	boolean_t               docow;
193 	vm_prot_t               maxprot;
194 	void                    *handle;
195 	memory_object_t         pager = MEMORY_OBJECT_NULL;
196 	memory_object_control_t  control;
197 	int                     mapanon = 0;
198 	int                     fpref = 0;
199 	int error = 0;
200 	int fd = uap->fd;
201 	int num_retries = 0;
202 
203 	/*
204 	 * Note that for UNIX03 conformance, there is additional parameter checking for
205 	 * mmap() system call in libsyscall prior to entering the kernel.  The sanity
206 	 * checks and argument validation done in this function are not the only places
207 	 * one can get returned errnos.
208 	 */
209 
210 	user_map = current_map();
211 	user_addr = (vm_map_offset_t)uap->addr;
212 	user_size = (vm_map_size_t) uap->len;
213 
214 	AUDIT_ARG(addr, user_addr);
215 	AUDIT_ARG(len, user_size);
216 	AUDIT_ARG(fd, uap->fd);
217 
218 	if (vm_map_range_overflows(user_addr, user_size)) {
219 		return EINVAL;
220 	}
221 	prot = (uap->prot & VM_PROT_ALL);
222 #if 3777787
223 	/*
224 	 * Since the hardware currently does not support writing without
225 	 * read-before-write, or execution-without-read, if the request is
226 	 * for write or execute access, we must imply read access as well;
227 	 * otherwise programs expecting this to work will fail to operate.
228 	 */
229 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
230 		prot |= VM_PROT_READ;
231 	}
232 #endif  /* radar 3777787 */
233 
234 	flags = uap->flags;
235 	vp = NULLVP;
236 
237 	/*
238 	 * verify no unknown flags are passed in, and if any are,
239 	 * fail out early to make sure the logic below never has to deal
240 	 * with invalid flag values
241 	 */
242 	if (flags & ~(MAP_SHARED |
243 	    MAP_PRIVATE |
244 	    MAP_COPY |
245 	    MAP_FIXED |
246 	    MAP_RENAME |
247 	    MAP_NORESERVE |
248 	    MAP_RESERVED0080 |                                  //grandfathered in as accepted and ignored
249 	    MAP_NOEXTEND |
250 	    MAP_HASSEMAPHORE |
251 	    MAP_NOCACHE |
252 	    MAP_JIT |
253 	    MAP_FILE |
254 	    MAP_ANON |
255 	    MAP_RESILIENT_CODESIGN |
256 	    MAP_RESILIENT_MEDIA |
257 #if XNU_TARGET_OS_OSX
258 	    MAP_32BIT |
259 #endif
260 	    MAP_TRANSLATED_ALLOW_EXECUTE |
261 	    MAP_UNIX03)) {
262 		if (proc_sdk(current_proc()) >= proc_2020_fall_os_sdk()) {
263 			return EINVAL;
264 		}
265 	}
266 
267 
268 	/*
269 	 * The vm code does not have prototypes & compiler doesn't do
270 	 * the right thing when you cast 64bit value and pass it in function
271 	 * call. So here it is.
272 	 */
273 	file_pos = (vm_object_offset_t)uap->pos;
274 
275 
276 	/* make sure mapping fits into numeric range etc */
277 	if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
278 		return EINVAL;
279 	}
280 
281 	if (flags & MAP_UNIX03) {
282 		vm_map_offset_t offset_alignment_mask;
283 
284 		/*
285 		 * Enforce UNIX03 compliance.
286 		 */
287 
288 		if (vm_map_is_exotic(current_map())) {
289 			offset_alignment_mask = 0xFFF;
290 		} else {
291 			offset_alignment_mask = vm_map_page_mask(current_map());
292 		}
293 		if (file_pos & offset_alignment_mask) {
294 			/* file offset should be page-aligned */
295 			return EINVAL;
296 		}
297 		if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
298 			/* need either MAP_PRIVATE or MAP_SHARED */
299 			return EINVAL;
300 		}
301 		if (user_size == 0) {
302 			/* mapping length should not be 0 */
303 			return EINVAL;
304 		}
305 	}
306 
307 	/*
308 	 * Align the file position to a page boundary,
309 	 * and save its page offset component.
310 	 */
311 	pageoff = (file_pos & vm_map_page_mask(user_map));
312 	file_pos -= (vm_object_offset_t)pageoff;
313 
314 
315 	/* Adjust size for rounding (on both ends). */
316 	user_size += pageoff;   /* low end... */
317 	user_size = vm_map_round_page(user_size,
318 	    vm_map_page_mask(user_map));                           /* hi end */
319 
320 
321 	if (flags & MAP_JIT) {
322 		if ((flags & MAP_FIXED) ||
323 		    (flags & MAP_SHARED) ||
324 		    !(flags & MAP_ANON) ||
325 		    (flags & MAP_RESILIENT_CODESIGN) ||
326 		    (flags & MAP_RESILIENT_MEDIA)) {
327 			return EINVAL;
328 		}
329 	}
330 
331 	if ((flags & MAP_RESILIENT_CODESIGN) ||
332 	    (flags & MAP_RESILIENT_MEDIA)) {
333 		if ((flags & MAP_ANON) ||
334 		    (flags & MAP_JIT)) {
335 			return EINVAL;
336 		}
337 	}
338 	if (flags & MAP_RESILIENT_CODESIGN) {
339 		int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
340 		if (prot & reject_prot) {
341 			/*
342 			 * Quick sanity check. maxprot is calculated below and
343 			 * we will test it again.
344 			 */
345 			return EPERM;
346 		}
347 	}
348 	if (flags & MAP_SHARED) {
349 		/*
350 		 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
351 		 * there is no place to inject zero-filled pages without
352 		 * actually adding them to the file.
353 		 * Since we didn't reject that combination before, there might
354 		 * already be callers using it and getting a valid MAP_SHARED
355 		 * mapping but without the resilience.
356 		 * For backwards compatibility's sake, let's keep ignoring
357 		 * MAP_RESILIENT_MEDIA in that case.
358 		 */
359 		flags &= ~MAP_RESILIENT_MEDIA;
360 	}
361 	if (flags & MAP_RESILIENT_MEDIA) {
362 		if ((flags & MAP_ANON) ||
363 		    (flags & MAP_SHARED)) {
364 			return EINVAL;
365 		}
366 	}
367 
368 	/*
369 	 * Check for illegal addresses.  Watch out for address wrap... Note
370 	 * that VM_*_ADDRESS are not constants due to casts (argh).
371 	 */
372 	if (flags & MAP_FIXED) {
373 		/*
374 		 * The specified address must have the same remainder
375 		 * as the file offset taken modulo PAGE_SIZE, so it
376 		 * should be aligned after adjustment by pageoff.
377 		 */
378 		user_addr -= pageoff;
379 		if (user_addr & vm_map_page_mask(user_map)) {
380 			return EINVAL;
381 		}
382 	}
383 #ifdef notyet
384 	/* DO not have apis to get this info, need to wait till then*/
385 	/*
386 	 * XXX for non-fixed mappings where no hint is provided or
387 	 * the hint would fall in the potential heap space,
388 	 * place it after the end of the largest possible heap.
389 	 *
390 	 * There should really be a pmap call to determine a reasonable
391 	 * location.
392 	 */
393 	else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
394 	    vm_map_page_mask(user_map))) {
395 		addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
396 		    vm_map_page_mask(user_map));
397 	}
398 
399 #endif
400 
401 	alloc_flags = 0;
402 
403 	if (flags & MAP_ANON) {
404 		maxprot = VM_PROT_ALL;
405 #if CONFIG_MACF
406 		/*
407 		 * Entitlement check.
408 		 */
409 		error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
410 		if (error) {
411 			return EINVAL;
412 		}
413 #endif /* MAC */
414 
415 		/*
416 		 * Mapping blank space is trivial.  Use positive fds as the alias
417 		 * value for memory tracking.
418 		 */
419 		if (fd != -1) {
420 			/*
421 			 * Use "fd" to pass (some) Mach VM allocation flags,
422 			 * (see the VM_FLAGS_* definitions).
423 			 */
424 			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
425 			    VM_FLAGS_SUPERPAGE_MASK |
426 			    VM_FLAGS_PURGABLE |
427 			    VM_FLAGS_4GB_CHUNK);
428 			if (alloc_flags != fd) {
429 				/* reject if there are any extra flags */
430 				return EINVAL;
431 			}
432 			VM_GET_FLAGS_ALIAS(alloc_flags, tag);
433 			alloc_flags &= ~VM_FLAGS_ALIAS_MASK;
434 		}
435 
436 		handle = NULL;
437 		file_pos = 0;
438 		pageoff = 0;
439 		mapanon = 1;
440 	} else {
441 		struct vnode_attr va;
442 		vfs_context_t ctx = vfs_context_current();
443 
444 		if (flags & MAP_JIT) {
445 			return EINVAL;
446 		}
447 
448 		/*
449 		 * Mapping file, get fp for validation. Obtain vnode and make
450 		 * sure it is of appropriate type.
451 		 */
452 		err = fp_lookup(p, fd, &fp, 0);
453 		if (err) {
454 			return err;
455 		}
456 		fpref = 1;
457 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
458 		case DTYPE_PSXSHM:
459 			uap->addr = (user_addr_t)user_addr;
460 			uap->len = (user_size_t)user_size;
461 			uap->prot = prot;
462 			uap->flags = flags;
463 			uap->pos = file_pos;
464 			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
465 			goto bad;
466 		case DTYPE_VNODE:
467 			break;
468 		default:
469 			error = EINVAL;
470 			goto bad;
471 		}
472 		vp = (struct vnode *)fp_get_data(fp);
473 		error = vnode_getwithref(vp);
474 		if (error != 0) {
475 			goto bad;
476 		}
477 
478 		if (vp->v_type != VREG && vp->v_type != VCHR) {
479 			(void)vnode_put(vp);
480 			error = EINVAL;
481 			goto bad;
482 		}
483 
484 		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
485 
486 		/*
487 		 * POSIX: mmap needs to update access time for mapped files
488 		 */
489 		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
490 			VATTR_INIT(&va);
491 			nanotime(&va.va_access_time);
492 			VATTR_SET_ACTIVE(&va, va_access_time);
493 			vnode_setattr(vp, &va, ctx);
494 		}
495 
496 		/*
497 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
498 		 * SunOS).
499 		 */
500 		if (vp->v_type == VCHR || vp->v_type == VSTR) {
501 			(void)vnode_put(vp);
502 			error = ENODEV;
503 			goto bad;
504 		} else {
505 			/*
506 			 * Ensure that file and memory protections are
507 			 * compatible.  Note that we only worry about
508 			 * writability if mapping is shared; in this case,
509 			 * current and max prot are dictated by the open file.
510 			 * XXX use the vnode instead?  Problem is: what
511 			 * credentials do we use for determination? What if
512 			 * proc does a setuid?
513 			 */
514 			maxprot = VM_PROT_EXECUTE;      /* TODO: Remove this and restrict maxprot? */
515 			if (fp->fp_glob->fg_flag & FREAD) {
516 				maxprot |= VM_PROT_READ;
517 			} else if (prot & PROT_READ) {
518 				(void)vnode_put(vp);
519 				error = EACCES;
520 				goto bad;
521 			}
522 			/*
523 			 * If we are sharing potential changes (either via
524 			 * MAP_SHARED or via the implicit sharing of character
525 			 * device mappings), and we are trying to get write
526 			 * permission although we opened it without asking
527 			 * for it, bail out.
528 			 */
529 
530 			if ((flags & MAP_SHARED) != 0) {
531 				if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
532 				    /*
533 				     * Do not allow writable mappings of
534 				     * swap files (see vm_swapfile_pager.c).
535 				     */
536 				    !vnode_isswap(vp)) {
537 					/*
538 					 * check for write access
539 					 *
540 					 * Note that we already made this check when granting FWRITE
541 					 * against the file, so it seems redundant here.
542 					 */
543 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
544 
545 					/* if not granted for any reason, but we wanted it, bad */
546 					if ((prot & PROT_WRITE) && (error != 0)) {
547 						vnode_put(vp);
548 						goto bad;
549 					}
550 
551 					/* if writable, remember */
552 					if (error == 0) {
553 						maxprot |= VM_PROT_WRITE;
554 					}
555 				} else if ((prot & PROT_WRITE) != 0) {
556 					(void)vnode_put(vp);
557 					error = EACCES;
558 					goto bad;
559 				}
560 			} else {
561 				maxprot |= VM_PROT_WRITE;
562 			}
563 
564 			handle = (void *)vp;
565 #if CONFIG_MACF
566 			error = mac_file_check_mmap(vfs_context_ucred(ctx),
567 			    fp->fp_glob, prot, flags, file_pos + pageoff,
568 			    &maxprot);
569 			if (error) {
570 				(void)vnode_put(vp);
571 				goto bad;
572 			}
573 #endif /* MAC */
574 			/*
575 			 * Consult the file system to determine if this
576 			 * particular file object can be mapped.
577 			 *
578 			 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
579 			 * then we don't check for writeability on the file
580 			 * object, because it will only ever see reads.
581 			 */
582 			error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
583 			    (prot & ~PROT_WRITE) : prot, ctx);
584 			if (error) {
585 				(void)vnode_put(vp);
586 				goto bad;
587 			}
588 		}
589 
590 		/*
591 		 * No copy-on-read for mmap() mappings themselves.
592 		 */
593 		vmk_flags.vmkf_no_copy_on_read = 1;
594 	}
595 
596 	if (user_size == 0) {
597 		if (!mapanon) {
598 			(void)vnode_put(vp);
599 		}
600 		error = 0;
601 		goto bad;
602 	}
603 
604 	/*
605 	 *	We bend a little - round the start and end addresses
606 	 *	to the nearest page boundary.
607 	 */
608 	user_size = vm_map_round_page(user_size,
609 	    vm_map_page_mask(user_map));
610 
611 	if (file_pos & vm_map_page_mask(user_map)) {
612 		if (!mapanon) {
613 			(void)vnode_put(vp);
614 		}
615 		error = EINVAL;
616 		goto bad;
617 	}
618 
619 	if ((flags & MAP_FIXED) == 0) {
620 		alloc_flags |= VM_FLAGS_ANYWHERE;
621 		user_addr = vm_map_round_page(user_addr,
622 		    vm_map_page_mask(user_map));
623 	} else {
624 		if (user_addr != vm_map_trunc_page(user_addr,
625 		    vm_map_page_mask(user_map))) {
626 			if (!mapanon) {
627 				(void)vnode_put(vp);
628 			}
629 			error = EINVAL;
630 			goto bad;
631 		}
632 		/*
633 		 * mmap(MAP_FIXED) will replace any existing mappings in the
634 		 * specified range, if the new mapping is successful.
635 		 * If we just deallocate the specified address range here,
636 		 * another thread might jump in and allocate memory in that
637 		 * range before we get a chance to establish the new mapping,
638 		 * and we won't have a chance to restore the old mappings.
639 		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
640 		 * has to deallocate the existing mappings and establish the
641 		 * new ones atomically.
642 		 */
643 		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
644 	}
645 
646 	if (flags & MAP_NOCACHE) {
647 		alloc_flags |= VM_FLAGS_NO_CACHE;
648 	}
649 
650 	if (flags & MAP_JIT) {
651 		vmk_flags.vmkf_map_jit = TRUE;
652 	}
653 
654 
655 	if (flags & MAP_RESILIENT_CODESIGN) {
656 		alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
657 	}
658 	if (flags & MAP_RESILIENT_MEDIA) {
659 		alloc_flags |= VM_FLAGS_RESILIENT_MEDIA;
660 	}
661 
662 #if XNU_TARGET_OS_OSX
663 	/* macOS-specific MAP_32BIT flag handling */
664 	if (flags & MAP_32BIT) {
665 		vmk_flags.vmkf_32bit_map_va = TRUE;
666 	}
667 #endif
668 
669 	/*
670 	 * Lookup/allocate object.
671 	 */
672 	if (handle == NULL) {
673 		control = NULL;
674 #ifdef notyet
675 /* Hmm .. */
676 #if defined(VM_PROT_READ_IS_EXEC)
677 		if (prot & VM_PROT_READ) {
678 			prot |= VM_PROT_EXECUTE;
679 		}
680 		if (maxprot & VM_PROT_READ) {
681 			maxprot |= VM_PROT_EXECUTE;
682 		}
683 #endif
684 #endif
685 
686 #if 3777787
687 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
688 			prot |= VM_PROT_READ;
689 		}
690 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
691 			maxprot |= VM_PROT_READ;
692 		}
693 #endif  /* radar 3777787 */
694 map_anon_retry:
695 
696 		result = vm_map_enter_mem_object(user_map,
697 		    &user_addr, user_size,
698 		    0, alloc_flags, vmk_flags,
699 		    tag,
700 		    IPC_PORT_NULL, 0, FALSE,
701 		    prot, maxprot,
702 		    (flags & MAP_SHARED) ?
703 		    VM_INHERIT_SHARE :
704 		    VM_INHERIT_DEFAULT);
705 
706 		/* If a non-binding address was specified for this anonymous
707 		 * mapping, retry the mapping with a zero base
708 		 * in the event the mapping operation failed due to
709 		 * lack of space between the address and the map's maximum.
710 		 */
711 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
712 			user_addr = vm_map_page_size(user_map);
713 			goto map_anon_retry;
714 		}
715 	} else {
716 		if (vnode_isswap(vp)) {
717 			/*
718 			 * Map swap files with a special pager
719 			 * that returns obfuscated contents.
720 			 */
721 			control = NULL;
722 			pager = swapfile_pager_setup(vp);
723 			if (pager != MEMORY_OBJECT_NULL) {
724 				control = swapfile_pager_control(pager);
725 			}
726 		} else {
727 			control = ubc_getobject(vp, UBC_FLAGS_NONE);
728 		}
729 
730 		if (control == NULL) {
731 			(void)vnode_put(vp);
732 			error = ENOMEM;
733 			goto bad;
734 		}
735 
736 		/*
737 		 *  Set credentials:
738 		 *	FIXME: if we're writing the file we need a way to
739 		 *      ensure that someone doesn't replace our R/W creds
740 		 *      with ones that only work for read.
741 		 */
742 
743 		ubc_setthreadcred(vp, p, current_thread());
744 		docow = FALSE;
745 		if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
746 			docow = TRUE;
747 		}
748 
749 #ifdef notyet
750 /* Hmm .. */
751 #if defined(VM_PROT_READ_IS_EXEC)
752 		if (prot & VM_PROT_READ) {
753 			prot |= VM_PROT_EXECUTE;
754 		}
755 		if (maxprot & VM_PROT_READ) {
756 			maxprot |= VM_PROT_EXECUTE;
757 		}
758 #endif
759 #endif /* notyet */
760 
761 #if 3777787
762 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
763 			prot |= VM_PROT_READ;
764 		}
765 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
766 			maxprot |= VM_PROT_READ;
767 		}
768 #endif  /* radar 3777787 */
769 
770 map_file_retry:
771 		if (flags & MAP_RESILIENT_CODESIGN) {
772 			int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
773 			if (prot & reject_prot) {
774 				/*
775 				 * Would like to use (prot | maxprot) here
776 				 * but the assignment of VM_PROT_EXECUTE
777 				 * to maxprot above would always fail the test.
778 				 *
779 				 * Skipping the check is ok, however, because we
780 				 * restrict maxprot to prot just below in this
781 				 * block.
782 				 */
783 				assert(!mapanon);
784 				vnode_put(vp);
785 				error = EPERM;
786 				goto bad;
787 			}
788 			/* strictly limit access to "prot" */
789 			maxprot &= prot;
790 		}
791 
792 		vm_object_offset_t end_pos = 0;
793 		if (os_add_overflow(user_size, file_pos, &end_pos)) {
794 			vnode_put(vp);
795 			error = EINVAL;
796 			goto bad;
797 		}
798 
799 		result = vm_map_enter_mem_object_control(user_map,
800 		    &user_addr, user_size,
801 		    0, alloc_flags, vmk_flags,
802 		    tag,
803 		    control, file_pos,
804 		    docow, prot, maxprot,
805 		    (flags & MAP_SHARED) ?
806 		    VM_INHERIT_SHARE :
807 		    VM_INHERIT_DEFAULT);
808 
809 		/* If a non-binding address was specified for this file backed
810 		 * mapping, retry the mapping with a zero base
811 		 * in the event the mapping operation failed due to
812 		 * lack of space between the address and the map's maximum.
813 		 */
814 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
815 			user_addr = vm_map_page_size(user_map);
816 			goto map_file_retry;
817 		}
818 	}
819 
820 	if (!mapanon) {
821 		(void)vnode_put(vp);
822 	}
823 
824 	switch (result) {
825 	case KERN_SUCCESS:
826 		*retval = user_addr + pageoff;
827 		error = 0;
828 		break;
829 	case KERN_INVALID_ADDRESS:
830 	case KERN_NO_SPACE:
831 		error =  ENOMEM;
832 		break;
833 	case KERN_PROTECTION_FAILURE:
834 		error =  EACCES;
835 		break;
836 	default:
837 		error =  EINVAL;
838 		break;
839 	}
840 bad:
841 	if (pager != MEMORY_OBJECT_NULL) {
842 		/*
843 		 * Release the reference on the pager.
844 		 * If the mapping was successful, it now holds
845 		 * an extra reference.
846 		 */
847 		memory_object_deallocate(pager);
848 	}
849 	if (fpref) {
850 		fp_drop(p, fd, fp, 0);
851 	}
852 
853 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
854 #if XNU_TARGET_OS_OSX
855 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
856 	    (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
857 #endif /* XNU_TARGET_OS_OSX */
858 	return error;
859 }
860 
861 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)862 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
863 {
864 	__pthread_testcancel(1);
865 	return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
866 }
867 
868 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)869 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
870 {
871 	mach_vm_offset_t addr;
872 	mach_vm_size_t size;
873 	int flags;
874 	vm_map_t user_map;
875 	int rv;
876 	vm_sync_t sync_flags = 0;
877 
878 	user_map = current_map();
879 	addr = (mach_vm_offset_t) uap->addr;
880 	size = (mach_vm_size_t) uap->len;
881 #if XNU_TARGET_OS_OSX
882 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
883 #endif /* XNU_TARGET_OS_OSX */
884 	if (mach_vm_range_overflows(addr, size)) {
885 		return EINVAL;
886 	}
887 	if (addr & vm_map_page_mask(user_map)) {
888 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
889 		return EINVAL;
890 	}
891 	if (size == 0) {
892 		/*
893 		 * We cannot support this properly without maintaining
894 		 * list all mmaps done. Cannot use vm_map_entry as they could be
895 		 * split or coalesced by indepenedant actions. So instead of
896 		 * inaccurate results, lets just return error as invalid size
897 		 * specified
898 		 */
899 		return EINVAL; /* XXX breaks posix apps */
900 	}
901 
902 	flags = uap->flags;
903 	/* disallow contradictory flags */
904 	if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
905 		return EINVAL;
906 	}
907 
908 	if (flags & MS_KILLPAGES) {
909 		sync_flags |= VM_SYNC_KILLPAGES;
910 	}
911 	if (flags & MS_DEACTIVATE) {
912 		sync_flags |= VM_SYNC_DEACTIVATE;
913 	}
914 	if (flags & MS_INVALIDATE) {
915 		sync_flags |= VM_SYNC_INVALIDATE;
916 	}
917 
918 	if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
919 		if (flags & MS_ASYNC) {
920 			sync_flags |= VM_SYNC_ASYNCHRONOUS;
921 		} else {
922 			sync_flags |= VM_SYNC_SYNCHRONOUS;
923 		}
924 	}
925 
926 	sync_flags |= VM_SYNC_CONTIGUOUS;       /* complain if holes */
927 
928 	rv = mach_vm_msync(user_map, addr, size, sync_flags);
929 
930 	switch (rv) {
931 	case KERN_SUCCESS:
932 		break;
933 	case KERN_INVALID_ADDRESS:      /* hole in region being sync'ed */
934 		return ENOMEM;
935 	case KERN_FAILURE:
936 		return EIO;
937 	default:
938 		return EINVAL;
939 	}
940 	return 0;
941 }
942 
943 
944 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)945 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
946 {
947 	mach_vm_offset_t        user_addr;
948 	mach_vm_size_t          user_size;
949 	kern_return_t           result;
950 	vm_map_t                user_map;
951 
952 	user_map = current_map();
953 	user_addr = (mach_vm_offset_t) uap->addr;
954 	user_size = (mach_vm_size_t) uap->len;
955 
956 	AUDIT_ARG(addr, user_addr);
957 	AUDIT_ARG(len, user_size);
958 
959 	if (user_addr & vm_map_page_mask(user_map)) {
960 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
961 		return EINVAL;
962 	}
963 
964 	if (mach_vm_range_overflows(user_addr, user_size)) {
965 		return EINVAL;
966 	}
967 
968 	if (user_size == 0) {
969 		/* UNIX SPEC: size is 0, return EINVAL */
970 		return EINVAL;
971 	}
972 
973 	result = mach_vm_deallocate(user_map, user_addr, user_size);
974 	if (result != KERN_SUCCESS) {
975 		return EINVAL;
976 	}
977 	return 0;
978 }
979 
980 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)981 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
982 {
983 	vm_prot_t prot;
984 	mach_vm_offset_t        user_addr;
985 	mach_vm_size_t  user_size;
986 	kern_return_t   result;
987 	vm_map_t        user_map;
988 #if CONFIG_MACF
989 	int error;
990 #endif
991 
992 	AUDIT_ARG(addr, uap->addr);
993 	AUDIT_ARG(len, uap->len);
994 	AUDIT_ARG(value32, uap->prot);
995 
996 	user_map = current_map();
997 	user_addr = (mach_vm_offset_t) uap->addr;
998 	user_size = (mach_vm_size_t) uap->len;
999 	prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1000 
1001 	if (mach_vm_range_overflows(user_addr, user_size)) {
1002 		return EINVAL;
1003 	}
1004 	if (user_addr & vm_map_page_mask(user_map)) {
1005 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1006 		return EINVAL;
1007 	}
1008 
1009 #ifdef notyet
1010 /* Hmm .. */
1011 #if defined(VM_PROT_READ_IS_EXEC)
1012 	if (prot & VM_PROT_READ) {
1013 		prot |= VM_PROT_EXECUTE;
1014 	}
1015 #endif
1016 #endif /* notyet */
1017 
1018 #if 3936456
1019 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1020 		prot |= VM_PROT_READ;
1021 	}
1022 #endif  /* 3936456 */
1023 
1024 #if defined(__arm64__)
1025 	if (prot & VM_PROT_STRIP_READ) {
1026 		prot &= ~(VM_PROT_READ | VM_PROT_STRIP_READ);
1027 	}
1028 #endif
1029 
1030 #if CONFIG_MACF
1031 	/*
1032 	 * The MAC check for mprotect is of limited use for 2 reasons:
1033 	 * Without mmap revocation, the caller could have asked for the max
1034 	 * protections initially instead of a reduced set, so a mprotect
1035 	 * check would offer no new security.
1036 	 * It is not possible to extract the vnode from the pager object(s)
1037 	 * of the target memory range.
1038 	 * However, the MAC check may be used to prevent a process from,
1039 	 * e.g., making the stack executable.
1040 	 */
1041 	error = mac_proc_check_mprotect(p, user_addr,
1042 	    user_size, prot);
1043 	if (error) {
1044 		return error;
1045 	}
1046 #endif
1047 
1048 	if (prot & VM_PROT_TRUSTED) {
1049 #if CONFIG_DYNAMIC_CODE_SIGNING
1050 		/* CODE SIGNING ENFORCEMENT - JIT support */
1051 		/* The special protection value VM_PROT_TRUSTED requests that we treat
1052 		 * this page as if it had a valid code signature.
1053 		 * If this is enabled, there MUST be a MAC policy implementing the
1054 		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1055 		 * compromised because the check would always succeed and thusly any
1056 		 * process could sign dynamically. */
1057 		result = vm_map_sign(
1058 			user_map,
1059 			vm_map_trunc_page(user_addr,
1060 			vm_map_page_mask(user_map)),
1061 			vm_map_round_page(user_addr + user_size,
1062 			vm_map_page_mask(user_map)));
1063 		switch (result) {
1064 		case KERN_SUCCESS:
1065 			break;
1066 		case KERN_INVALID_ADDRESS:
1067 			/* UNIX SPEC: for an invalid address range, return ENOMEM */
1068 			return ENOMEM;
1069 		default:
1070 			return EINVAL;
1071 		}
1072 #else
1073 		return ENOTSUP;
1074 #endif
1075 	}
1076 	prot &= ~VM_PROT_TRUSTED;
1077 
1078 	result = mach_vm_protect(user_map, user_addr, user_size,
1079 	    FALSE, prot);
1080 	switch (result) {
1081 	case KERN_SUCCESS:
1082 		return 0;
1083 	case KERN_PROTECTION_FAILURE:
1084 		return EACCES;
1085 	case KERN_INVALID_ADDRESS:
1086 		/* UNIX SPEC: for an invalid address range, return ENOMEM */
1087 		return ENOMEM;
1088 	}
1089 	return EINVAL;
1090 }
1091 
1092 
1093 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1094 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1095 {
1096 	mach_vm_offset_t addr;
1097 	mach_vm_size_t size;
1098 	vm_inherit_t inherit;
1099 	vm_map_t        user_map;
1100 	kern_return_t   result;
1101 
1102 	AUDIT_ARG(addr, uap->addr);
1103 	AUDIT_ARG(len, uap->len);
1104 	AUDIT_ARG(value32, uap->inherit);
1105 
1106 	addr = (mach_vm_offset_t)uap->addr;
1107 	size = (mach_vm_size_t)uap->len;
1108 	inherit = uap->inherit;
1109 	if (mach_vm_range_overflows(addr, size)) {
1110 		return EINVAL;
1111 	}
1112 	user_map = current_map();
1113 	result = mach_vm_inherit(user_map, addr, size,
1114 	    inherit);
1115 	switch (result) {
1116 	case KERN_SUCCESS:
1117 		return 0;
1118 	case KERN_PROTECTION_FAILURE:
1119 		return EACCES;
1120 	}
1121 	return EINVAL;
1122 }
1123 
1124 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1125 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1126 {
1127 	vm_map_t user_map;
1128 	mach_vm_offset_t start;
1129 	mach_vm_size_t size;
1130 	vm_behavior_t new_behavior;
1131 	kern_return_t   result;
1132 
1133 	/*
1134 	 * Since this routine is only advisory, we default to conservative
1135 	 * behavior.
1136 	 */
1137 	switch (uap->behav) {
1138 	case MADV_RANDOM:
1139 		new_behavior = VM_BEHAVIOR_RANDOM;
1140 		break;
1141 	case MADV_SEQUENTIAL:
1142 		new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1143 		break;
1144 	case MADV_NORMAL:
1145 		new_behavior = VM_BEHAVIOR_DEFAULT;
1146 		break;
1147 	case MADV_WILLNEED:
1148 		new_behavior = VM_BEHAVIOR_WILLNEED;
1149 		break;
1150 	case MADV_DONTNEED:
1151 		new_behavior = VM_BEHAVIOR_DONTNEED;
1152 		break;
1153 	case MADV_FREE:
1154 		new_behavior = VM_BEHAVIOR_FREE;
1155 		break;
1156 	case MADV_ZERO_WIRED_PAGES:
1157 		new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1158 		break;
1159 	case MADV_FREE_REUSABLE:
1160 		new_behavior = VM_BEHAVIOR_REUSABLE;
1161 		break;
1162 	case MADV_FREE_REUSE:
1163 		new_behavior = VM_BEHAVIOR_REUSE;
1164 		break;
1165 	case MADV_CAN_REUSE:
1166 		new_behavior = VM_BEHAVIOR_CAN_REUSE;
1167 		break;
1168 	case MADV_PAGEOUT:
1169 #if MACH_ASSERT
1170 		new_behavior = VM_BEHAVIOR_PAGEOUT;
1171 		break;
1172 #else /* MACH_ASSERT */
1173 		return ENOTSUP;
1174 #endif /* MACH_ASSERT */
1175 	default:
1176 		return EINVAL;
1177 	}
1178 
1179 	start = (mach_vm_offset_t) uap->addr;
1180 	size = (mach_vm_size_t) uap->len;
1181 	if (mach_vm_range_overflows(start, size)) {
1182 		return EINVAL;
1183 	}
1184 #if __arm64__
1185 	if (start == 0 &&
1186 	    size != 0 &&
1187 	    (uap->behav == MADV_FREE ||
1188 	    uap->behav == MADV_FREE_REUSABLE)) {
1189 		printf("** FOURK_COMPAT: %d[%s] "
1190 		    "failing madvise(0x%llx,0x%llx,%s)\n",
1191 		    proc_getpid(p), p->p_comm, start, size,
1192 		    ((uap->behav == MADV_FREE_REUSABLE)
1193 		    ? "MADV_FREE_REUSABLE"
1194 		    : "MADV_FREE"));
1195 		DTRACE_VM3(fourk_compat_madvise,
1196 		    uint64_t, start,
1197 		    uint64_t, size,
1198 		    int, uap->behav);
1199 		return EINVAL;
1200 	}
1201 #endif /* __arm64__ */
1202 
1203 	user_map = current_map();
1204 
1205 	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1206 	switch (result) {
1207 	case KERN_SUCCESS:
1208 		return 0;
1209 	case KERN_INVALID_ADDRESS:
1210 		return EINVAL;
1211 	case KERN_NO_SPACE:
1212 		return ENOMEM;
1213 	}
1214 
1215 	return EINVAL;
1216 }
1217 
1218 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1219 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1220 {
1221 	mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1222 	vm_map_t map = VM_MAP_NULL;
1223 	user_addr_t vec = 0;
1224 	int error = 0;
1225 	int64_t lastvecindex = 0;
1226 	int mincoreinfo = 0;
1227 	int pqueryinfo = 0;
1228 	uint64_t pqueryinfo_vec_size = 0;
1229 	vm_page_info_basic_t info = NULL;
1230 	mach_msg_type_number_t count = 0;
1231 	char *kernel_vec = NULL;
1232 	uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1233 	kern_return_t kr = KERN_SUCCESS;
1234 	int effective_page_shift, effective_page_size;
1235 
1236 	map = current_map();
1237 
1238 	/*
1239 	 * On systems with 4k kernel space and 16k user space, we will
1240 	 * use the kernel page size to report back the residency information.
1241 	 * This is for backwards compatibility since we already have
1242 	 * processes that depend on this behavior.
1243 	 */
1244 	if (vm_map_page_shift(map) < PAGE_SHIFT) {
1245 		effective_page_shift = vm_map_page_shift(map);
1246 		effective_page_size = vm_map_page_size(map);
1247 	} else {
1248 		effective_page_shift = PAGE_SHIFT;
1249 		effective_page_size = PAGE_SIZE;
1250 	}
1251 
1252 	/*
1253 	 * Make sure that the addresses presented are valid for user
1254 	 * mode.
1255 	 */
1256 	first_addr = addr = vm_map_trunc_page(uap->addr,
1257 	    vm_map_page_mask(map));
1258 	end = vm_map_round_page(uap->addr + uap->len,
1259 	    vm_map_page_mask(map));
1260 
1261 	if (end < addr) {
1262 		return EINVAL;
1263 	}
1264 
1265 	if (end == addr) {
1266 		return 0;
1267 	}
1268 
1269 	/*
1270 	 * We are going to loop through the whole 'req_vec_size' pages
1271 	 * range in chunks of 'cur_vec_size'.
1272 	 */
1273 
1274 	req_vec_size_pages = (end - addr) >> effective_page_shift;
1275 	cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1276 	size_t kernel_vec_size = cur_vec_size_pages;
1277 
1278 	kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1279 
1280 	if (kernel_vec == NULL) {
1281 		return ENOMEM;
1282 	}
1283 
1284 	/*
1285 	 * Address of byte vector
1286 	 */
1287 	vec = uap->vec;
1288 
1289 	pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1290 
1291 	info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1292 
1293 	if (info == NULL) {
1294 		kfree_data(kernel_vec, kernel_vec_size);
1295 		return ENOMEM;
1296 	}
1297 
1298 	while (addr < end) {
1299 		cur_end = addr + (cur_vec_size_pages * effective_page_size);
1300 
1301 		count =  VM_PAGE_INFO_BASIC_COUNT;
1302 		kr = vm_map_page_range_info_internal(map,
1303 		    addr,
1304 		    cur_end,
1305 		    effective_page_shift,
1306 		    VM_PAGE_INFO_BASIC,
1307 		    (vm_page_info_t) info,
1308 		    &count);
1309 
1310 		assert(kr == KERN_SUCCESS);
1311 
1312 		/*
1313 		 * Do this on a map entry basis so that if the pages are not
1314 		 * in the current processes address space, we can easily look
1315 		 * up the pages elsewhere.
1316 		 */
1317 		lastvecindex = -1;
1318 
1319 		for (; addr < cur_end; addr += effective_page_size) {
1320 			pqueryinfo = info[lastvecindex + 1].disposition;
1321 
1322 			mincoreinfo = 0;
1323 
1324 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1325 				mincoreinfo |= MINCORE_INCORE;
1326 			}
1327 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1328 				mincoreinfo |= MINCORE_REFERENCED;
1329 			}
1330 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1331 				mincoreinfo |= MINCORE_MODIFIED;
1332 			}
1333 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1334 				mincoreinfo |= MINCORE_PAGED_OUT;
1335 			}
1336 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1337 				mincoreinfo |= MINCORE_COPIED;
1338 			}
1339 			if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1340 				mincoreinfo |= MINCORE_ANONYMOUS;
1341 			}
1342 			/*
1343 			 * calculate index into user supplied byte vector
1344 			 */
1345 			vecindex = (addr - first_addr) >> effective_page_shift;
1346 			kernel_vec[vecindex] = (char)mincoreinfo;
1347 			lastvecindex = vecindex;
1348 		}
1349 
1350 
1351 		assert(vecindex == (cur_vec_size_pages - 1));
1352 
1353 		error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1354 
1355 		if (error) {
1356 			break;
1357 		}
1358 
1359 		/*
1360 		 * For the next chunk, we'll need:
1361 		 * - bump the location in the user buffer for our next disposition.
1362 		 * - new length
1363 		 * - starting address
1364 		 */
1365 		vec += cur_vec_size_pages * sizeof(char);
1366 		req_vec_size_pages = (end - addr) >> effective_page_shift;
1367 		cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1368 
1369 		first_addr = addr;
1370 	}
1371 
1372 	kfree_data(info, pqueryinfo_vec_size);
1373 	kfree_data(kernel_vec, kernel_vec_size);
1374 
1375 	if (error) {
1376 		return EFAULT;
1377 	}
1378 
1379 	return 0;
1380 }
1381 
1382 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1383 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1384 {
1385 	vm_map_t user_map;
1386 	vm_map_offset_t addr;
1387 	vm_map_size_t size, pageoff;
1388 	kern_return_t   result;
1389 
1390 	AUDIT_ARG(addr, uap->addr);
1391 	AUDIT_ARG(len, uap->len);
1392 
1393 	addr = (vm_map_offset_t) uap->addr;
1394 	size = (vm_map_size_t)uap->len;
1395 
1396 	if (vm_map_range_overflows(addr, size)) {
1397 		return EINVAL;
1398 	}
1399 
1400 	if (size == 0) {
1401 		return 0;
1402 	}
1403 
1404 	user_map = current_map();
1405 	pageoff = (addr & vm_map_page_mask(user_map));
1406 	addr -= pageoff;
1407 	size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1408 
1409 	/* have to call vm_map_wire directly to pass "I don't know" protections */
1410 	result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1411 
1412 	if (result == KERN_RESOURCE_SHORTAGE) {
1413 		return EAGAIN;
1414 	} else if (result == KERN_PROTECTION_FAILURE) {
1415 		return EACCES;
1416 	} else if (result != KERN_SUCCESS) {
1417 		return ENOMEM;
1418 	}
1419 
1420 	return 0;       /* KERN_SUCCESS */
1421 }
1422 
1423 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1424 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1425 {
1426 	mach_vm_offset_t addr;
1427 	mach_vm_size_t size;
1428 	vm_map_t user_map;
1429 	kern_return_t   result;
1430 
1431 	AUDIT_ARG(addr, uap->addr);
1432 	AUDIT_ARG(len, uap->len);
1433 
1434 	addr = (mach_vm_offset_t) uap->addr;
1435 	size = (mach_vm_size_t)uap->len;
1436 	user_map = current_map();
1437 	if (mach_vm_range_overflows(addr, size)) {
1438 		return EINVAL;
1439 	}
1440 	/* JMM - need to remove all wirings by spec - this just removes one */
1441 	result = mach_vm_wire_kernel(host_priv_self(), user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1442 	return result == KERN_SUCCESS ? 0 : ENOMEM;
1443 }
1444 
1445 
1446 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1447 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1448 {
1449 	return ENOSYS;
1450 }
1451 
1452 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1453 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1454 {
1455 	return ENOSYS;
1456 }
1457 
1458 #if CONFIG_CODE_DECRYPTION
1459 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1460 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1461 {
1462 	mach_vm_offset_t    user_addr;
1463 	mach_vm_size_t      user_size;
1464 	kern_return_t       result;
1465 	vm_map_t    user_map;
1466 	uint32_t    cryptid;
1467 	cpu_type_t  cputype;
1468 	cpu_subtype_t       cpusubtype;
1469 	pager_crypt_info_t  crypt_info;
1470 	const char * cryptname = 0;
1471 	char *vpath;
1472 	int len, ret;
1473 	struct proc_regioninfo_internal pinfo;
1474 	vnode_t vp;
1475 	uintptr_t vnodeaddr;
1476 	uint32_t vid;
1477 
1478 	AUDIT_ARG(addr, uap->addr);
1479 	AUDIT_ARG(len, uap->len);
1480 
1481 	user_map = current_map();
1482 	user_addr = (mach_vm_offset_t) uap->addr;
1483 	user_size = (mach_vm_size_t) uap->len;
1484 
1485 	cryptid = uap->cryptid;
1486 	cputype = uap->cputype;
1487 	cpusubtype = uap->cpusubtype;
1488 
1489 	if (mach_vm_range_overflows(user_addr, user_size)) {
1490 		return EINVAL;
1491 	}
1492 	if (user_addr & vm_map_page_mask(user_map)) {
1493 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1494 		return EINVAL;
1495 	}
1496 
1497 	switch (cryptid) {
1498 	case CRYPTID_NO_ENCRYPTION:
1499 		/* not encrypted, just an empty load command */
1500 		return 0;
1501 	case CRYPTID_APP_ENCRYPTION:
1502 	case CRYPTID_MODEL_ENCRYPTION:
1503 		cryptname = "com.apple.unfree";
1504 		break;
1505 	case 0x10:
1506 		/* some random cryptid that you could manually put into
1507 		 * your binary if you want NULL */
1508 		cryptname = "com.apple.null";
1509 		break;
1510 	default:
1511 		return EINVAL;
1512 	}
1513 
1514 	if (NULL == text_crypter_create) {
1515 		return ENOTSUP;
1516 	}
1517 
1518 	ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1519 	if (ret == 0 || !vnodeaddr) {
1520 		/* No really, this returns 0 if the memory address is not backed by a file */
1521 		return EINVAL;
1522 	}
1523 
1524 	vp = (vnode_t)vnodeaddr;
1525 	if ((vnode_getwithvid(vp, vid)) == 0) {
1526 		vpath = zalloc(ZV_NAMEI);
1527 
1528 		len = MAXPATHLEN;
1529 		ret = vn_getpath(vp, vpath, &len);
1530 		if (ret) {
1531 			zfree(ZV_NAMEI, vpath);
1532 			vnode_put(vp);
1533 			return ret;
1534 		}
1535 
1536 		vnode_put(vp);
1537 	} else {
1538 		return EINVAL;
1539 	}
1540 
1541 #if 0
1542 	kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1543 	    __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1544 #endif
1545 
1546 	if (user_size == 0) {
1547 		printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1548 		zfree(ZV_NAMEI, vpath);
1549 		return 0;
1550 	}
1551 
1552 	/* set up decrypter first */
1553 	crypt_file_data_t crypt_data = {
1554 		.filename = vpath,
1555 		.cputype = cputype,
1556 		.cpusubtype = cpusubtype
1557 	};
1558 	result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1559 #if VM_MAP_DEBUG_APPLE_PROTECT
1560 	if (vm_map_debug_apple_protect) {
1561 		printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1562 		    proc_getpid(p), p->p_comm,
1563 		    user_map,
1564 		    (uint64_t) user_addr,
1565 		    (uint64_t) (user_addr + user_size),
1566 		    __FUNCTION__, vpath, result);
1567 	}
1568 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1569 	zfree(ZV_NAMEI, vpath);
1570 
1571 	if (result) {
1572 		printf("%s: unable to create decrypter %s, kr=%d\n",
1573 		    __FUNCTION__, cryptname, result);
1574 		if (result == kIOReturnNotPrivileged) {
1575 			/* text encryption returned decryption failure */
1576 			return EPERM;
1577 		} else {
1578 			return ENOMEM;
1579 		}
1580 	}
1581 
1582 	/* now remap using the decrypter */
1583 	vm_object_offset_t crypto_backing_offset;
1584 	crypto_backing_offset = -1; /* i.e. use map entry's offset */
1585 	result = vm_map_apple_protected(user_map,
1586 	    user_addr,
1587 	    user_addr + user_size,
1588 	    crypto_backing_offset,
1589 	    &crypt_info,
1590 	    cryptid);
1591 	if (result) {
1592 		printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1593 	}
1594 
1595 	if (result) {
1596 		return EPERM;
1597 	}
1598 	return 0;
1599 }
1600 #endif /* CONFIG_CODE_DECRYPTION */
1601