xref: /xnu-11417.140.69/bsd/kern/kern_mman.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988 University of Utah.
30  * Copyright (c) 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * the Systems Programming Group of the University of Utah Computer
35  * Science Department.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66  *
67  *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 /*
77  * Mapped file (mmap) interface to VM
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102 
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105 
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117 #include <mach/vm_types_unsafe.h>
118 
119 #include <machine/machine_routines.h>
120 
121 #include <kern/cpu_number.h>
122 #include <kern/host.h>
123 #include <kern/task.h>
124 #include <kern/page_decrypt.h>
125 
126 #include <IOKit/IOReturn.h>
127 #include <IOKit/IOBSD.h>
128 
129 #include <vm/vm_kern_xnu.h>
130 #include <vm/vm_map_xnu.h>
131 #include <vm/vm_pager_xnu.h>
132 #include <vm/vm_sanitize_internal.h>
133 
134 #if CONFIG_MACF
135 #include <security/mac_framework.h>
136 #endif
137 #include <os/overflow.h>
138 
139 /*
140  * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
141  * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
142  * compiled with an SDK from fall of 2020 or later?""
143  */
144 static bool
proc_2020_fall_os_sdk_or_later(void)145 proc_2020_fall_os_sdk_or_later(void)
146 {
147 	const uint32_t proc_sdk_ver = proc_sdk(current_proc());
148 
149 	switch (proc_platform(current_proc())) {
150 	case PLATFORM_MACOS:
151 		return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
152 	case PLATFORM_IOS:
153 	case PLATFORM_IOSSIMULATOR:
154 	case PLATFORM_MACCATALYST:
155 		return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
156 	case PLATFORM_BRIDGEOS:
157 		return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
158 	case PLATFORM_TVOS:
159 	case PLATFORM_TVOSSIMULATOR:
160 		return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
161 	case PLATFORM_WATCHOS:
162 	case PLATFORM_WATCHOSSIMULATOR:
163 		return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
164 	default:
165 		/*
166 		 * tough call, but let's give new platforms the benefit of the doubt
167 		 * to avoid a re-occurence of rdar://89843927
168 		 */
169 		return true;
170 	}
171 }
172 
173 static __attribute__((always_inline, warn_unused_result))
174 kern_return_t
mmap_sanitize(vm_map_t user_map,vm_prot_ut prot_u,vm_addr_struct_t pos_u,vm_size_struct_t len_u,vm_addr_struct_t addr_u,int flags,vm_prot_t * prot,vm_object_offset_t * file_pos,vm_object_offset_t * file_end,vm_map_size_t * file_size,vm_map_offset_t * user_addr,vm_map_offset_t * user_end,vm_map_size_t * user_size)175 mmap_sanitize(
176 	vm_map_t                user_map,
177 	vm_prot_ut              prot_u,
178 	vm_addr_struct_t        pos_u,
179 	vm_size_struct_t        len_u,
180 	vm_addr_struct_t        addr_u,
181 	int                     flags,
182 	vm_prot_t              *prot,
183 	vm_object_offset_t     *file_pos,
184 	vm_object_offset_t     *file_end,
185 	vm_map_size_t          *file_size,
186 	vm_map_offset_t        *user_addr,
187 	vm_map_offset_t        *user_end,
188 	vm_map_size_t          *user_size)
189 {
190 	kern_return_t           kr;
191 	vm_map_offset_t         user_mask = vm_map_page_mask(user_map);
192 	vm_sanitize_flags_t     vm_sanitize_flags;
193 
194 	*prot = vm_sanitize_prot_bsd(prot_u, VM_SANITIZE_CALLER_MMAP);
195 	*prot &= VM_PROT_ALL;
196 
197 	/*
198 	 * Check file_pos doesn't overflow with PAGE_MASK since VM objects use
199 	 * this page mask internally, and it can be wider than the user_map's.
200 	 */
201 	if (flags & MAP_UNIX03) {
202 		vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS;
203 	} else {
204 		vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
205 	}
206 
207 	kr = vm_sanitize_addr_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP, PAGE_MASK,
208 	    vm_sanitize_flags | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
209 	    file_pos, file_end, file_size);
210 	if (__improbable(kr != KERN_SUCCESS)) {
211 		return kr;
212 	}
213 
214 	/*
215 	 * Check that file_pos is page aligned for the user page size when
216 	 * UNIX03 compliance is requested.
217 	 * The user page size may be different from the kernel page size we
218 	 * use to check for overflows in the sanitizer call above).
219 	 */
220 	if ((flags & MAP_UNIX03) && (*file_pos & user_mask)) {
221 		return KERN_INVALID_ARGUMENT;
222 	}
223 
224 	if (flags & MAP_FIXED) {
225 		kr = vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MMAP,
226 		    user_map,
227 		    VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
228 		    user_addr, user_end, user_size);
229 		if (__improbable(kr != KERN_SUCCESS)) {
230 			return kr;
231 		}
232 
233 		/*
234 		 * Further validation since we allowed a misaligned user_addr
235 		 * for fixed mappings.
236 		 *
237 		 * The specified address must have the same remainder
238 		 * as the file offset taken modulo PAGE_SIZE, so it
239 		 * should be aligned after adjustment by (file_pos & user_mask).
240 		 */
241 		if (!VM_SANITIZE_UNSAFE_IS_EQUAL(
242 			    addr_u, *user_addr + (*file_pos & user_mask))) {
243 			return KERN_INVALID_ARGUMENT;
244 		}
245 	} else {
246 		/*
247 		 * For "anywhere" mappings, the address is only a hint,
248 		 * mach_vm_map_kernel() will fail with KERN_NO_SPACE
249 		 * if user_addr + user_size overflows,
250 		 * and mmap will start scanning again.
251 		 *
252 		 * Unlike Mach VM APIs, the hint is taken as a strict
253 		 * "start" which is why we round the sanitized address up,
254 		 * rather than truncate.
255 		 */
256 		*user_addr = vm_sanitize_addr(user_map,
257 		    vm_sanitize_compute_ut_end(addr_u, user_mask));
258 		kr = vm_sanitize_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP,
259 		    user_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
260 		    user_size);
261 		if (__improbable(kr != KERN_SUCCESS)) {
262 			return kr;
263 		}
264 	}
265 
266 	return KERN_SUCCESS;
267 }
268 
269 /*
270  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
271  * XXX usage is PROT_* from an interface perspective.  Thus the values of
272  * XXX VM_PROT_* and PROT_* need to correspond.
273  */
274 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)275 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
276 {
277 	/*
278 	 *	Map in special device (must be SHARED) or file
279 	 */
280 	struct fileproc        *fp;
281 	struct vnode           *vp = NULLVP;
282 	int                     flags;
283 	int                     prot;
284 	int                     err = 0;
285 	vm_map_t                user_map;
286 	kern_return_t           result;
287 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
288 	boolean_t               docow;
289 	vm_prot_t               maxprot;
290 	void                   *handle;
291 	memory_object_t         pager = MEMORY_OBJECT_NULL;
292 	memory_object_control_t control;
293 	int                     mapanon = 0;
294 	int                     fpref = 0;
295 	int                     error = 0;
296 	int                     fd = uap->fd;
297 	int                     num_retries = 0;
298 	kern_return_t           kr;
299 	/* page-aligned "user_map" quantities */
300 	vm_map_offset_t         user_addr, user_end, user_mask;
301 	vm_map_size_t           user_size;
302 	/* unaligned "file" quantities */
303 	vm_object_offset_t      file_pos, file_end;
304 	vm_map_size_t           file_size;
305 
306 	/*
307 	 * Note that for UNIX03 conformance, there is additional parameter checking for
308 	 * mmap() system call in libsyscall prior to entering the kernel.  The sanity
309 	 * checks and argument validation done in this function are not the only places
310 	 * one can get returned errnos.
311 	 */
312 
313 	user_map  = current_map();
314 	flags     = uap->flags;
315 	user_mask = vm_map_page_mask(user_map);
316 
317 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
318 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
319 	AUDIT_ARG(fd, uap->fd);
320 
321 	/*
322 	 * Sanitize any input parameters that are addr/size/protections
323 	 */
324 	kr = mmap_sanitize(user_map,
325 	    uap->prot,
326 	    uap->pos,
327 	    uap->len,
328 	    uap->addr,
329 	    flags,
330 	    &prot,
331 	    &file_pos,
332 	    &file_end,
333 	    &file_size,
334 	    &user_addr,
335 	    &user_end,
336 	    &user_size);
337 	if (__improbable(kr != KERN_SUCCESS)) {
338 		assert(vm_sanitize_get_kr(kr));
339 		return EINVAL;
340 	}
341 
342 #if 3777787
343 	/*
344 	 * Since the hardware currently does not support writing without
345 	 * read-before-write, or execution-without-read, if the request is
346 	 * for write or execute access, we must imply read access as well;
347 	 * otherwise programs expecting this to work will fail to operate.
348 	 */
349 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
350 		prot |= VM_PROT_READ;
351 	}
352 #endif  /* radar 3777787 */
353 
354 	/*
355 	 * verify no unknown flags are passed in, and if any are,
356 	 * fail out early to make sure the logic below never has to deal
357 	 * with invalid flag values. only do so for processes compiled
358 	 * with Fall 2020 or later SDK, which is where we drew this
359 	 * line and documented it as such.
360 	 */
361 	if (flags & ~(MAP_SHARED |
362 	    MAP_PRIVATE |
363 	    MAP_COPY |
364 	    MAP_FIXED |
365 	    MAP_RENAME |
366 	    MAP_NORESERVE |
367 	    MAP_RESERVED0080 |                                  //grandfathered in as accepted and ignored
368 	    MAP_NOEXTEND |
369 	    MAP_HASSEMAPHORE |
370 	    MAP_NOCACHE |
371 	    MAP_JIT |
372 	    MAP_TPRO |
373 	    MAP_FILE |
374 	    MAP_ANON |
375 	    MAP_RESILIENT_CODESIGN |
376 	    MAP_RESILIENT_MEDIA |
377 #if XNU_TARGET_OS_OSX
378 	    MAP_32BIT |
379 #endif
380 	    MAP_TRANSLATED_ALLOW_EXECUTE |
381 	    MAP_UNIX03)) {
382 		if (proc_2020_fall_os_sdk_or_later()) {
383 			return EINVAL;
384 		}
385 	}
386 
387 
388 	if (flags & MAP_UNIX03) {
389 		/*
390 		 * Enforce UNIX03 compliance.
391 		 */
392 		if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
393 			/* need either MAP_PRIVATE or MAP_SHARED */
394 			return EINVAL;
395 		}
396 	}
397 
398 
399 	if (flags & MAP_JIT) {
400 		if ((flags & MAP_FIXED) ||
401 		    (flags & MAP_SHARED) ||
402 		    !(flags & MAP_ANON) ||
403 		    (flags & MAP_RESILIENT_CODESIGN) ||
404 		    (flags & MAP_RESILIENT_MEDIA) ||
405 		    (flags & MAP_TPRO)) {
406 			return EINVAL;
407 		}
408 	}
409 
410 	if ((flags & MAP_RESILIENT_CODESIGN) ||
411 	    (flags & MAP_RESILIENT_MEDIA)) {
412 		if ((flags & MAP_ANON) ||
413 		    (flags & MAP_JIT) ||
414 		    (flags & MAP_TPRO)) {
415 			return EINVAL;
416 		}
417 	}
418 	if (flags & MAP_RESILIENT_CODESIGN) {
419 		int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
420 		if (prot & reject_prot) {
421 			/*
422 			 * Quick sanity check. maxprot is calculated below and
423 			 * we will test it again.
424 			 */
425 			return EPERM;
426 		}
427 	}
428 	if (flags & MAP_SHARED) {
429 		/*
430 		 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
431 		 * there is no place to inject zero-filled pages without
432 		 * actually adding them to the file.
433 		 * Since we didn't reject that combination before, there might
434 		 * already be callers using it and getting a valid MAP_SHARED
435 		 * mapping but without the resilience.
436 		 * For backwards compatibility's sake, let's keep ignoring
437 		 * MAP_RESILIENT_MEDIA in that case.
438 		 */
439 		flags &= ~MAP_RESILIENT_MEDIA;
440 	}
441 	if (flags & MAP_RESILIENT_MEDIA) {
442 		if ((flags & MAP_ANON) ||
443 		    (flags & MAP_SHARED)) {
444 			return EINVAL;
445 		}
446 	}
447 	if (flags & MAP_TPRO) {
448 		/*
449 		 * MAP_TPRO without VM_PROT_WRITE is not valid here because
450 		 * the TPRO mapping is handled at the PMAP layer with implicit RW
451 		 * protections.
452 		 *
453 		 * This would enable bypassing of file-based protections, i.e.
454 		 * a file open/mapped as read-only could be written to.
455 		 */
456 		if ((prot & VM_PROT_EXECUTE) ||
457 		    !(prot & VM_PROT_WRITE)) {
458 			return EPERM;
459 		}
460 	}
461 
462 	/* Entitlement check against code signing monitor */
463 	if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(user_map) != KERN_SUCCESS)) {
464 		printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
465 		return EPERM;
466 	}
467 
468 	if (flags & MAP_ANON) {
469 		maxprot = VM_PROT_ALL;
470 #if CONFIG_MACF
471 		/*
472 		 * Entitlement check.
473 		 */
474 		error = mac_proc_check_map_anon(p, current_cached_proc_cred(p),
475 		    user_addr, user_size, prot, flags, &maxprot);
476 		if (error) {
477 			return EINVAL;
478 		}
479 #endif /* MAC */
480 
481 		/*
482 		 * Mapping blank space is trivial.  Use positive fds as the alias
483 		 * value for memory tracking.
484 		 */
485 		if (fd != -1) {
486 			/*
487 			 * Use "fd" to pass (some) Mach VM allocation flags,
488 			 * (see the VM_FLAGS_* definitions).
489 			 */
490 			int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
491 			    VM_FLAGS_SUPERPAGE_MASK |
492 			    VM_FLAGS_PURGABLE |
493 			    VM_FLAGS_4GB_CHUNK);
494 
495 			if (vm_flags != fd) {
496 				/* reject if there are any extra flags */
497 				return EINVAL;
498 			}
499 
500 			/*
501 			 * vm_map_kernel_flags_set_vmflags() will assume that
502 			 * the full set of VM flags are passed, which is
503 			 * problematic for FIXED/ANYWHERE.
504 			 *
505 			 * The block handling MAP_FIXED below will do the same
506 			 * thing again which is fine because it's idempotent.
507 			 */
508 			if (flags & MAP_FIXED) {
509 				vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
510 			} else {
511 				vm_flags |= VM_FLAGS_ANYWHERE;
512 			}
513 			vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
514 		}
515 
516 #if CONFIG_MAP_RANGES
517 		/*
518 		 * if the client specified a tag, let the system policy apply.
519 		 *
520 		 * otherwise, force the heap range.
521 		 */
522 		if (vmk_flags.vm_tag) {
523 			vm_map_kernel_flags_update_range_id(&vmk_flags, user_map, user_size);
524 		} else {
525 			vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
526 		}
527 #endif /* CONFIG_MAP_RANGES */
528 
529 		handle = NULL;
530 		file_pos = 0;
531 		mapanon = 1;
532 	} else {
533 		struct vnode_attr va;
534 		vfs_context_t ctx = vfs_context_current();
535 
536 		if (flags & MAP_JIT) {
537 			return EINVAL;
538 		}
539 
540 		/*
541 		 * Mapping file, get fp for validation. Obtain vnode and make
542 		 * sure it is of appropriate type.
543 		 */
544 		err = fp_lookup(p, fd, &fp, 0);
545 		if (err) {
546 			return err;
547 		}
548 		fpref = 1;
549 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
550 		case DTYPE_PSXSHM:
551 			error = pshm_mmap(p, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr),
552 			    user_size, prot, flags, fp,
553 			    vm_map_trunc_page(file_pos, user_mask),
554 			    file_pos & user_mask, retval);
555 			goto bad;
556 		case DTYPE_VNODE:
557 			break;
558 		default:
559 			error = EINVAL;
560 			goto bad;
561 		}
562 		vp = (struct vnode *)fp_get_data(fp);
563 		error = vnode_getwithref(vp);
564 		if (error != 0) {
565 			goto bad;
566 		}
567 
568 		if (vp->v_type != VREG && vp->v_type != VCHR) {
569 			(void)vnode_put(vp);
570 			error = EINVAL;
571 			goto bad;
572 		}
573 
574 		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
575 
576 		/*
577 		 * POSIX: mmap needs to update access time for mapped files
578 		 */
579 		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
580 			VATTR_INIT(&va);
581 			nanotime(&va.va_access_time);
582 			VATTR_SET_ACTIVE(&va, va_access_time);
583 			vnode_setattr(vp, &va, ctx);
584 		}
585 
586 		/*
587 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
588 		 * SunOS).
589 		 */
590 		if (vp->v_type == VCHR || vp->v_type == VSTR) {
591 			(void)vnode_put(vp);
592 			error = ENODEV;
593 			goto bad;
594 		} else {
595 			/*
596 			 * Ensure that file and memory protections are
597 			 * compatible.  Note that we only worry about
598 			 * writability if mapping is shared; in this case,
599 			 * current and max prot are dictated by the open file.
600 			 * XXX use the vnode instead?  Problem is: what
601 			 * credentials do we use for determination? What if
602 			 * proc does a setuid?
603 			 */
604 			maxprot = VM_PROT_EXECUTE;      /* TODO: Remove this and restrict maxprot? */
605 			if (fp->fp_glob->fg_flag & FREAD) {
606 				maxprot |= VM_PROT_READ;
607 			} else if (prot & PROT_READ) {
608 				(void)vnode_put(vp);
609 				error = EACCES;
610 				goto bad;
611 			}
612 			/*
613 			 * If we are sharing potential changes (either via
614 			 * MAP_SHARED or via the implicit sharing of character
615 			 * device mappings), and we are trying to get write
616 			 * permission although we opened it without asking
617 			 * for it, bail out.
618 			 */
619 
620 			if ((flags & MAP_SHARED) != 0) {
621 				if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
622 				    /*
623 				     * Do not allow writable mappings of
624 				     * swap files (see vm_swapfile_pager.c).
625 				     */
626 				    !vnode_isswap(vp)) {
627 					/*
628 					 * check for write access
629 					 *
630 					 * Note that we already made this check when granting FWRITE
631 					 * against the file, so it seems redundant here.
632 					 */
633 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
634 
635 					/* if not granted for any reason, but we wanted it, bad */
636 					if ((prot & PROT_WRITE) && (error != 0)) {
637 						vnode_put(vp);
638 						goto bad;
639 					}
640 
641 					/* if writable, remember */
642 					if (error == 0) {
643 						maxprot |= VM_PROT_WRITE;
644 					}
645 				} else if ((prot & PROT_WRITE) != 0) {
646 					(void)vnode_put(vp);
647 					error = EACCES;
648 					goto bad;
649 				}
650 			} else {
651 				maxprot |= VM_PROT_WRITE;
652 			}
653 
654 			handle = (void *)vp;
655 #if CONFIG_MACF
656 			error = mac_file_check_mmap(vfs_context_ucred(ctx),
657 			    fp->fp_glob, prot, flags, file_pos, &maxprot);
658 			if (error) {
659 				(void)vnode_put(vp);
660 				goto bad;
661 			}
662 #endif /* MAC */
663 			/*
664 			 * Consult the file system to determine if this
665 			 * particular file object can be mapped.
666 			 *
667 			 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
668 			 * then we don't check for writeability on the file
669 			 * object, because it will only ever see reads.
670 			 */
671 			error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
672 			    (prot & ~PROT_WRITE) : prot, ctx);
673 			if (error) {
674 				(void)vnode_put(vp);
675 				goto bad;
676 			}
677 		}
678 
679 		/*
680 		 * No copy-on-read for mmap() mappings themselves.
681 		 */
682 		vmk_flags.vmkf_no_copy_on_read = 1;
683 #if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
684 		/* force file ranges on !macOS */
685 		vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
686 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
687 		/*
688 		 * Put allocations on iOS with EXTENDED_USER_VA_SUPPORT
689 		 * in the large file range, if the process has the "extra jumbo" entitlement.
690 		 * Otherwise, place allocation into the heap range.
691 		 */
692 		vmk_flags.vmkf_range_id = UMEM_RANGE_ID_LARGE_FILE;
693 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
694 #endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
695 	}
696 
697 	if (user_size == 0) {
698 		if (!mapanon) {
699 			(void)vnode_put(vp);
700 		}
701 		error = 0;
702 		goto bad;
703 	}
704 
705 	if (flags & MAP_FIXED) {
706 		/*
707 		 * mmap(MAP_FIXED) will replace any existing mappings in the
708 		 * specified range, if the new mapping is successful.
709 		 * If we just deallocate the specified address range here,
710 		 * another thread might jump in and allocate memory in that
711 		 * range before we get a chance to establish the new mapping,
712 		 * and we won't have a chance to restore the old mappings.
713 		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
714 		 * has to deallocate the existing mappings and establish the
715 		 * new ones atomically.
716 		 */
717 		vmk_flags.vmf_fixed = true;
718 		vmk_flags.vmf_overwrite = true;
719 	}
720 
721 	if (flags & MAP_NOCACHE) {
722 		vmk_flags.vmf_no_cache = true;
723 	}
724 
725 	if (flags & MAP_JIT) {
726 		vmk_flags.vmkf_map_jit = TRUE;
727 	}
728 
729 	if (flags & MAP_TPRO) {
730 		vmk_flags.vmf_tpro = true;
731 	}
732 
733 #if CONFIG_ROSETTA
734 	if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
735 		if (!proc_is_translated(p)) {
736 			if (!mapanon) {
737 				(void)vnode_put(vp);
738 			}
739 			error = EINVAL;
740 			goto bad;
741 		}
742 		vmk_flags.vmkf_translated_allow_execute = TRUE;
743 	}
744 #endif
745 
746 	if (flags & MAP_RESILIENT_CODESIGN) {
747 		vmk_flags.vmf_resilient_codesign = true;
748 	}
749 	if (flags & MAP_RESILIENT_MEDIA) {
750 		vmk_flags.vmf_resilient_media = true;
751 	}
752 
753 #if XNU_TARGET_OS_OSX
754 	/* macOS-specific MAP_32BIT flag handling */
755 	if (flags & MAP_32BIT) {
756 		vmk_flags.vmkf_32bit_map_va = TRUE;
757 	}
758 #endif
759 
760 	/*
761 	 * Lookup/allocate object.
762 	 */
763 	if (handle == NULL) {
764 		control = NULL;
765 #ifdef notyet
766 /* Hmm .. */
767 #if defined(VM_PROT_READ_IS_EXEC)
768 		if (prot & VM_PROT_READ) {
769 			prot |= VM_PROT_EXECUTE;
770 		}
771 		if (maxprot & VM_PROT_READ) {
772 			maxprot |= VM_PROT_EXECUTE;
773 		}
774 #endif
775 #endif
776 
777 #if 3777787
778 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
779 			prot |= VM_PROT_READ;
780 		}
781 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
782 			maxprot |= VM_PROT_READ;
783 		}
784 #endif  /* radar 3777787 */
785 map_anon_retry:
786 
787 		result = mach_vm_map_kernel(user_map,
788 		    vm_sanitize_wrap_addr_ref(&user_addr), user_size,
789 		    0, vmk_flags,
790 		    IPC_PORT_NULL, 0, FALSE,
791 		    prot, maxprot,
792 		    (flags & MAP_SHARED) ?
793 		    VM_INHERIT_SHARE :
794 		    VM_INHERIT_DEFAULT);
795 
796 		/* If a non-binding address was specified for this anonymous
797 		 * mapping, retry the mapping with a zero base
798 		 * in the event the mapping operation failed due to
799 		 * lack of space between the address and the map's maximum.
800 		 */
801 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
802 			user_addr = vm_map_page_size(user_map);
803 			goto map_anon_retry;
804 		}
805 	} else {
806 		if (vnode_isswap(vp)) {
807 			/*
808 			 * Map swap files with a special pager
809 			 * that returns obfuscated contents.
810 			 */
811 			control = NULL;
812 			pager = swapfile_pager_setup(vp);
813 			if (pager != MEMORY_OBJECT_NULL) {
814 				control = swapfile_pager_control(pager);
815 			}
816 		} else {
817 			control = ubc_getobject(vp, UBC_FLAGS_NONE);
818 		}
819 
820 		if (control == NULL) {
821 			(void)vnode_put(vp);
822 			error = ENOMEM;
823 			goto bad;
824 		}
825 
826 #if FBDP_DEBUG_OBJECT_NO_PAGER
827 //#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
828 #define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
829 #define FBDP_FILE_NAME1 "icutz44l.dat"
830 #define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
831 #define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
832 #define FBDP_FILE_NAME_END2 "-v2.csstore"
833 		if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
834 			char *path;
835 			int len;
836 			bool already_tracked;
837 			len = MAXPATHLEN;
838 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
839 			vn_getpath(vp, path, &len);
840 			if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
841 				if (memory_object_mark_as_tracked(control,
842 				    true,
843 				    &already_tracked) == KERN_SUCCESS &&
844 				    !already_tracked) {
845 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
846 				}
847 			}
848 			zfree(ZV_NAMEI, path);
849 		} else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
850 		    strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
851 		    !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
852 		    FBDP_FILE_NAME_END2,
853 		    strlen(FBDP_FILE_NAME_END2))) {
854 			char *path;
855 			int len;
856 			bool already_tracked;
857 			len = MAXPATHLEN;
858 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
859 			vn_getpath(vp, path, &len);
860 			if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
861 				if (memory_object_mark_as_tracked(control,
862 				    true,
863 				    &already_tracked) == KERN_SUCCESS &&
864 				    !already_tracked) {
865 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
866 				}
867 			}
868 			zfree(ZV_NAMEI, path);
869 		}
870 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
871 
872 		/*
873 		 *  Set credentials:
874 		 *	FIXME: if we're writing the file we need a way to
875 		 *      ensure that someone doesn't replace our R/W creds
876 		 *      with ones that only work for read.
877 		 */
878 
879 		ubc_setthreadcred(vp, p, current_thread());
880 		docow = FALSE;
881 		if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
882 			docow = TRUE;
883 		}
884 
885 #ifdef notyet
886 /* Hmm .. */
887 #if defined(VM_PROT_READ_IS_EXEC)
888 		if (prot & VM_PROT_READ) {
889 			prot |= VM_PROT_EXECUTE;
890 		}
891 		if (maxprot & VM_PROT_READ) {
892 			maxprot |= VM_PROT_EXECUTE;
893 		}
894 #endif
895 #endif /* notyet */
896 
897 #if 3777787
898 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
899 			prot |= VM_PROT_READ;
900 		}
901 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
902 			maxprot |= VM_PROT_READ;
903 		}
904 #endif  /* radar 3777787 */
905 
906 map_file_retry:
907 		if (flags & MAP_RESILIENT_CODESIGN) {
908 			int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
909 			if (prot & reject_prot) {
910 				/*
911 				 * Would like to use (prot | maxprot) here
912 				 * but the assignment of VM_PROT_EXECUTE
913 				 * to maxprot above would always fail the test.
914 				 *
915 				 * Skipping the check is ok, however, because we
916 				 * restrict maxprot to prot just below in this
917 				 * block.
918 				 */
919 				assert(!mapanon);
920 				vnode_put(vp);
921 				error = EPERM;
922 				goto bad;
923 			}
924 			/* strictly limit access to "prot" */
925 			maxprot &= prot;
926 		}
927 
928 		result = vm_map_enter_mem_object_control(user_map,
929 		    vm_sanitize_wrap_addr_ref(&user_addr), user_size,
930 		    0, vmk_flags,
931 		    control, vm_map_trunc_page(file_pos, user_mask),
932 		    docow, prot, maxprot,
933 		    (flags & MAP_SHARED) ?
934 		    VM_INHERIT_SHARE :
935 		    VM_INHERIT_DEFAULT);
936 
937 		/* If a non-binding address was specified for this file backed
938 		 * mapping, retry the mapping with a zero base
939 		 * in the event the mapping operation failed due to
940 		 * lack of space between the address and the map's maximum.
941 		 */
942 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
943 			user_addr = vm_map_page_size(user_map);
944 			goto map_file_retry;
945 		}
946 	}
947 
948 	if (!mapanon) {
949 		(void)vnode_put(vp);
950 	}
951 
952 	switch (result) {
953 	case KERN_SUCCESS:
954 		*retval = user_addr + (file_pos & user_mask);
955 		error = 0;
956 		break;
957 	case KERN_INVALID_ADDRESS:
958 	case KERN_NO_SPACE:
959 		error =  ENOMEM;
960 		break;
961 	case KERN_PROTECTION_FAILURE:
962 		error =  EACCES;
963 		break;
964 	default:
965 		error =  EINVAL;
966 		break;
967 	}
968 bad:
969 	if (pager != MEMORY_OBJECT_NULL) {
970 		/*
971 		 * Release the reference on the pager.
972 		 * If the mapping was successful, it now holds
973 		 * an extra reference.
974 		 */
975 		memory_object_deallocate(pager);
976 	}
977 	if (fpref) {
978 		fp_drop(p, fd, fp, 0);
979 	}
980 
981 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
982 #if XNU_TARGET_OS_OSX
983 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
984 	    (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
985 #endif /* XNU_TARGET_OS_OSX */
986 	return error;
987 }
988 
989 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)990 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
991 {
992 	__pthread_testcancel(1);
993 	return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
994 }
995 
996 static __attribute__((always_inline, warn_unused_result))
997 kern_return_t
msync_sanitize(vm_map_t user_map,user_addr_ut addr_u,user_size_ut len_u,mach_vm_offset_t * addr,mach_vm_offset_t * size)998 msync_sanitize(
999 	vm_map_t                user_map,
1000 	user_addr_ut            addr_u,
1001 	user_size_ut            len_u,
1002 	mach_vm_offset_t       *addr,
1003 	mach_vm_offset_t       *size)
1004 {
1005 	mach_vm_offset_t        end;
1006 
1007 	/*
1008 	 * UNIX SPEC: user address is not page-aligned, return EINVAL
1009 	 *
1010 	 * len == 0
1011 	 *   FreeBSD and NetBSD support msync with a length of zero to
1012 	 *   sync all pages within the region containing the address.
1013 	 *   We cannot support this mode without maintaining a list all
1014 	 *   mmaps performed. (Our list of vm_map_entry is not suitable
1015 	 *   because they may be split or coalesced for other reasons.)
1016 	 *   We therefore reject len==0 with an error, instead of
1017 	 *   doing the wrong thing or silently doing nothing.
1018 	 *
1019 	 *   Platforms that do not mention len==0 in their man pages,
1020 	 *   and are thus presumed not to support that mode either:
1021 	 *     Linux, Solaris, POSIX
1022 	 */
1023 	return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MSYNC,
1024 	           user_map,
1025 	           VM_SANITIZE_FLAGS_CHECK_ALIGNED_START |
1026 	           VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS,
1027 	           addr, &end, size);
1028 }
1029 
1030 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)1031 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
1032 {
1033 	mach_vm_offset_t addr;
1034 	mach_vm_size_t size;
1035 	kern_return_t kr;
1036 	int flags;
1037 	vm_map_t user_map;
1038 	int rv;
1039 	vm_sync_t sync_flags = 0;
1040 
1041 	user_map = current_map();
1042 	flags    = uap->flags;
1043 
1044 	/*
1045 	 * Sanitize all input parameters that are addr/offset/size/prot/inheritance
1046 	 */
1047 	kr = msync_sanitize(user_map,
1048 	    uap->addr,
1049 	    uap->len,
1050 	    &addr,
1051 	    &size);
1052 
1053 #if XNU_TARGET_OS_OSX
1054 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1055 #endif /* XNU_TARGET_OS_OSX */
1056 
1057 	if (__improbable(kr != KERN_SUCCESS)) {
1058 		assert(vm_sanitize_get_kr(kr));
1059 		return EINVAL;
1060 	}
1061 
1062 	/* disallow contradictory flags */
1063 	if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1064 		return EINVAL;
1065 	}
1066 
1067 	if (flags & MS_KILLPAGES) {
1068 		sync_flags |= VM_SYNC_KILLPAGES;
1069 	}
1070 	if (flags & MS_DEACTIVATE) {
1071 		sync_flags |= VM_SYNC_DEACTIVATE;
1072 	}
1073 	if (flags & MS_INVALIDATE) {
1074 		sync_flags |= VM_SYNC_INVALIDATE;
1075 	}
1076 
1077 	if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1078 		if (flags & MS_ASYNC) {
1079 			sync_flags |= VM_SYNC_ASYNCHRONOUS;
1080 		} else {
1081 			sync_flags |= VM_SYNC_SYNCHRONOUS;
1082 		}
1083 	}
1084 
1085 	sync_flags |= VM_SYNC_CONTIGUOUS;       /* complain if holes */
1086 
1087 	rv = mach_vm_msync(user_map, addr, size, sync_flags);
1088 
1089 	switch (rv) {
1090 	case KERN_SUCCESS:
1091 		break;
1092 	case KERN_INVALID_ADDRESS:      /* hole in region being sync'ed */
1093 		return ENOMEM;
1094 	case KERN_FAILURE:
1095 		return EIO;
1096 	default:
1097 		return EINVAL;
1098 	}
1099 	return 0;
1100 }
1101 
1102 static __attribute__((always_inline, warn_unused_result))
1103 kern_return_t
munmap_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1104 munmap_sanitize(
1105 	vm_map_t                user_map,
1106 	vm_addr_struct_t        addr_u,
1107 	vm_size_struct_t        len_u,
1108 	mach_vm_offset_t       *user_addr,
1109 	mach_vm_offset_t       *user_end,
1110 	mach_vm_size_t         *user_size)
1111 {
1112 	return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MUNMAP,
1113 	           user_map,
1114 	           VM_SANITIZE_FLAGS_CHECK_ALIGNED_START |
1115 	           VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS,
1116 	           user_addr, user_end, user_size);
1117 }
1118 
1119 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1120 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1121 {
1122 	mach_vm_offset_t        user_addr, user_end;
1123 	mach_vm_size_t          user_size;
1124 	kern_return_t           result;
1125 	vm_map_t                user_map;
1126 
1127 	user_map = current_map();
1128 
1129 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1130 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1131 
1132 	/*
1133 	 * Sanitize any input parameters that are addr/size/protections
1134 	 */
1135 	result = munmap_sanitize(user_map,
1136 	    uap->addr,
1137 	    uap->len,
1138 	    &user_addr,
1139 	    &user_end,
1140 	    &user_size);
1141 	if (__improbable(result != KERN_SUCCESS)) {
1142 		assert(vm_sanitize_get_kr(result) ==
1143 		    KERN_INVALID_ARGUMENT);
1144 		return EINVAL;
1145 	}
1146 	if (mach_vm_deallocate(user_map, user_addr, user_size)) {
1147 		return EINVAL;
1148 	}
1149 	return 0;
1150 }
1151 
1152 static __attribute__((always_inline, warn_unused_result))
1153 kern_return_t
mprotect_sanitize(vm_map_t user_map,mach_vm_offset_ut user_addr_u,mach_vm_size_ut user_size_u,vm_prot_ut prot_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end_aligned,mach_vm_size_t * user_size,vm_prot_t * prot)1154 mprotect_sanitize(
1155 	vm_map_t                 user_map,
1156 	mach_vm_offset_ut        user_addr_u,
1157 	mach_vm_size_ut          user_size_u,
1158 	vm_prot_ut               prot_u,
1159 	mach_vm_offset_t        *user_addr,
1160 	mach_vm_offset_t        *user_end_aligned,
1161 	mach_vm_size_t          *user_size,
1162 	vm_prot_t               *prot)
1163 {
1164 	kern_return_t            result;
1165 
1166 	/*
1167 	 * Validate addr and size. Use VM_SANITIZE_FLAGS_CHECK_ALIGNED_START to
1168 	 * check unaligned start due to UNIX SPEC: user address is not page-aligned,
1169 	 * return EINVAL
1170 	 */
1171 	vm_sanitize_flags_t     flags = VM_SANITIZE_FLAGS_CHECK_ALIGNED_START |
1172 	    VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
1173 
1174 
1175 	result = vm_sanitize_addr_size(user_addr_u, user_size_u,
1176 	    VM_SANITIZE_CALLER_MPROTECT, user_map, flags,
1177 	    user_addr, user_end_aligned, user_size);
1178 	if (__improbable(result != KERN_SUCCESS)) {
1179 		return result;
1180 	}
1181 
1182 	/* prot is sanitized by masking out invalid flags; it cannot fail. */
1183 	*prot = vm_sanitize_prot_bsd(prot_u, VM_SANITIZE_CALLER_MPROTECT);
1184 
1185 	return KERN_SUCCESS;
1186 }
1187 
1188 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1189 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1190 {
1191 	vm_prot_t                prot;
1192 	mach_vm_offset_ut        user_addr_u;
1193 	mach_vm_size_ut          user_size_u;
1194 	vm_prot_ut               prot_u;
1195 	mach_vm_offset_t         user_addr;
1196 	mach_vm_offset_t         user_end_aligned;
1197 	mach_vm_size_t           user_size;
1198 	kern_return_t            result;
1199 	vm_map_t                 user_map;
1200 #if CONFIG_MACF
1201 	int error;
1202 #endif
1203 
1204 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1205 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1206 	AUDIT_ARG(value32, uap->prot);
1207 
1208 	user_map = current_map();
1209 	user_addr_u = uap->addr;
1210 	user_size_u = uap->len;
1211 	prot_u = vm_sanitize_wrap_prot((vm_prot_t)uap->prot);
1212 
1213 	/*
1214 	 * Sanitize any input parameters that are addr/size/prot/inheritance
1215 	 */
1216 	result = mprotect_sanitize(user_map,
1217 	    user_addr_u,
1218 	    user_size_u,
1219 	    prot_u,
1220 	    &user_addr,
1221 	    &user_end_aligned,
1222 	    &user_size,
1223 	    &prot);
1224 	if (__improbable(result != KERN_SUCCESS)) {
1225 		result = vm_sanitize_get_kr(result);
1226 		switch (result) {
1227 		case KERN_SUCCESS:
1228 			return 0;
1229 		case KERN_INVALID_ADDRESS:
1230 			/* UNIX SPEC: for an invalid address range, return ENOMEM */
1231 			return ENOMEM;
1232 		case KERN_INVALID_ARGUMENT:
1233 			return EINVAL;
1234 		default:
1235 			return EINVAL;
1236 		}
1237 	}
1238 
1239 	/* user_size may be zero here */
1240 
1241 #ifdef notyet
1242 /* Hmm .. */
1243 #if defined(VM_PROT_READ_IS_EXEC)
1244 	if (prot & VM_PROT_READ) {
1245 		prot |= VM_PROT_EXECUTE;
1246 	}
1247 #endif
1248 #endif /* notyet */
1249 
1250 #if 3936456
1251 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1252 		prot |= VM_PROT_READ;
1253 	}
1254 #endif  /* 3936456 */
1255 
1256 #if CONFIG_MACF
1257 	/*
1258 	 * The MAC check for mprotect is of limited use for 2 reasons:
1259 	 * Without mmap revocation, the caller could have asked for the max
1260 	 * protections initially instead of a reduced set, so a mprotect
1261 	 * check would offer no new security.
1262 	 * It is not possible to extract the vnode from the pager object(s)
1263 	 * of the target memory range.
1264 	 * However, the MAC check may be used to prevent a process from,
1265 	 * e.g., making the stack executable.
1266 	 */
1267 	error = mac_proc_check_mprotect(p, user_addr,
1268 	    user_size, prot);
1269 	if (error) {
1270 		return error;
1271 	}
1272 #endif
1273 
1274 	if (prot & VM_PROT_TRUSTED) {
1275 #if CONFIG_DYNAMIC_CODE_SIGNING
1276 		/* CODE SIGNING ENFORCEMENT - JIT support */
1277 		/* The special protection value VM_PROT_TRUSTED requests that we treat
1278 		 * this page as if it had a valid code signature.
1279 		 * If this is enabled, there MUST be a MAC policy implementing the
1280 		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1281 		 * compromised because the check would always succeed and thusly any
1282 		 * process could sign dynamically. */
1283 		result = vm_map_sign(
1284 			user_map,
1285 			user_addr,
1286 			user_end_aligned);
1287 		switch (result) {
1288 		case KERN_SUCCESS:
1289 			break;
1290 		case KERN_INVALID_ADDRESS:
1291 			/* UNIX SPEC: for an invalid address range, return ENOMEM */
1292 			return ENOMEM;
1293 		default:
1294 			return EINVAL;
1295 		}
1296 #else
1297 		return ENOTSUP;
1298 #endif
1299 	}
1300 	prot &= ~VM_PROT_TRUSTED;
1301 
1302 	result = mach_vm_protect(user_map, user_addr, user_size,
1303 	    false, prot);
1304 	switch (result) {
1305 	case KERN_SUCCESS:
1306 		return 0;
1307 	case KERN_PROTECTION_FAILURE:
1308 		return EACCES;
1309 	case KERN_INVALID_ADDRESS:
1310 		/* UNIX SPEC: for an invalid address range, return ENOMEM */
1311 		return ENOMEM;
1312 	}
1313 	return EINVAL;
1314 }
1315 
1316 static __attribute__((always_inline, warn_unused_result))
1317 kern_return_t
minherit_sanitize(vm_map_t user_map,mach_vm_offset_ut addr_u,mach_vm_size_ut size_u,vm_inherit_ut inherit_u,mach_vm_offset_t * addr,mach_vm_size_t * size,vm_inherit_t * inherit)1318 minherit_sanitize(
1319 	vm_map_t                 user_map,
1320 	mach_vm_offset_ut        addr_u,
1321 	mach_vm_size_ut          size_u,
1322 	vm_inherit_ut            inherit_u,
1323 	mach_vm_offset_t        *addr,
1324 	mach_vm_size_t          *size,
1325 	vm_inherit_t            *inherit)
1326 {
1327 	kern_return_t            result;
1328 	mach_vm_offset_t         addr_end;
1329 
1330 	vm_sanitize_flags_t flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
1331 
1332 
1333 	result = vm_sanitize_addr_size(addr_u, size_u, VM_SANITIZE_CALLER_MINHERIT,
1334 	    user_map, flags, addr, &addr_end, size);
1335 	if (__improbable(result != KERN_SUCCESS)) {
1336 		return result;
1337 	}
1338 	result = vm_sanitize_inherit(inherit_u, VM_SANITIZE_CALLER_MINHERIT,
1339 	    inherit);
1340 	if (__improbable(result != KERN_SUCCESS)) {
1341 		return result;
1342 	}
1343 
1344 	return KERN_SUCCESS;
1345 }
1346 
1347 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1348 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1349 {
1350 	mach_vm_offset_ut addr_u;
1351 	mach_vm_size_ut size_u;
1352 	vm_inherit_ut inherit_u;
1353 	vm_map_t        user_map;
1354 	kern_return_t   result;
1355 	mach_vm_offset_t addr;
1356 	mach_vm_size_t size;
1357 	vm_inherit_t inherit;
1358 
1359 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1360 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1361 	AUDIT_ARG(value32, uap->inherit);
1362 
1363 	user_map = current_map();
1364 	addr_u = uap->addr;
1365 	size_u = uap->len;
1366 	inherit_u = vm_sanitize_wrap_inherit((vm_inherit_t)uap->inherit);
1367 
1368 	/*
1369 	 * Sanitize all input parameters that are addr/offset/size/prot/inheritance
1370 	 */
1371 	result = minherit_sanitize(user_map,
1372 	    addr_u,
1373 	    size_u,
1374 	    inherit_u,
1375 	    &addr,
1376 	    &size,
1377 	    &inherit);
1378 	if (__improbable(result != KERN_SUCCESS)) {
1379 		assert(vm_sanitize_get_kr(result) == KERN_INVALID_ARGUMENT);
1380 		return EINVAL;
1381 	}
1382 
1383 	result = mach_vm_inherit(user_map, addr, size, inherit);
1384 	switch (result) {
1385 	case KERN_SUCCESS:
1386 		return 0;
1387 	case KERN_PROTECTION_FAILURE:
1388 		return EACCES;
1389 	}
1390 	return EINVAL;
1391 }
1392 
1393 static __attribute__((always_inline, warn_unused_result))
1394 kern_return_t
madvise_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * start,mach_vm_offset_t * end,mach_vm_size_t * size)1395 madvise_sanitize(
1396 	vm_map_t                user_map,
1397 	vm_addr_struct_t        addr_u,
1398 	vm_size_struct_t        len_u,
1399 	mach_vm_offset_t       *start,
1400 	mach_vm_offset_t       *end,
1401 	mach_vm_size_t         *size)
1402 {
1403 	vm_sanitize_flags_t     flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
1404 
1405 
1406 	return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MADVISE,
1407 	           user_map, flags, start, end, size);
1408 }
1409 
1410 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1411 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1412 {
1413 	vm_map_t user_map;
1414 	mach_vm_offset_t start, end;
1415 	mach_vm_size_t size;
1416 	vm_behavior_t new_behavior;
1417 	kern_return_t   result;
1418 
1419 	user_map = current_map();
1420 
1421 	result = madvise_sanitize(user_map, uap->addr, uap->len, &start, &end, &size);
1422 	if (__improbable(result != KERN_SUCCESS)) {
1423 		assert(vm_sanitize_get_kr(result) == KERN_INVALID_ARGUMENT);
1424 		return EINVAL;
1425 	}
1426 	/*
1427 	 * Since this routine is only advisory, we default to conservative
1428 	 * behavior.
1429 	 */
1430 	switch (uap->behav) {
1431 	case MADV_RANDOM:
1432 		new_behavior = VM_BEHAVIOR_RANDOM;
1433 		break;
1434 	case MADV_SEQUENTIAL:
1435 		new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1436 		break;
1437 	case MADV_NORMAL:
1438 		new_behavior = VM_BEHAVIOR_DEFAULT;
1439 		break;
1440 	case MADV_WILLNEED:
1441 		new_behavior = VM_BEHAVIOR_WILLNEED;
1442 		break;
1443 	case MADV_DONTNEED:
1444 		new_behavior = VM_BEHAVIOR_DONTNEED;
1445 		break;
1446 	case MADV_FREE:
1447 		new_behavior = VM_BEHAVIOR_FREE;
1448 		break;
1449 	case MADV_ZERO_WIRED_PAGES:
1450 		new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1451 		break;
1452 	case MADV_FREE_REUSABLE:
1453 		new_behavior = VM_BEHAVIOR_REUSABLE;
1454 		break;
1455 	case MADV_FREE_REUSE:
1456 		new_behavior = VM_BEHAVIOR_REUSE;
1457 		break;
1458 	case MADV_CAN_REUSE:
1459 		new_behavior = VM_BEHAVIOR_CAN_REUSE;
1460 		break;
1461 	case MADV_PAGEOUT:
1462 #if MACH_ASSERT
1463 		new_behavior = VM_BEHAVIOR_PAGEOUT;
1464 		break;
1465 #else /* MACH_ASSERT */
1466 		return ENOTSUP;
1467 #endif /* MACH_ASSERT */
1468 	case MADV_ZERO:
1469 		new_behavior = VM_BEHAVIOR_ZERO;
1470 		break;
1471 	default:
1472 		return EINVAL;
1473 	}
1474 
1475 #if __arm64__
1476 	if (start == 0 &&
1477 	    size != 0 &&
1478 	    (uap->behav == MADV_FREE ||
1479 	    uap->behav == MADV_FREE_REUSABLE)) {
1480 		printf("** %s: %d[%s] "
1481 		    "failing madvise(0x%llx,0x%llx,%s)\n",
1482 		    __func__, proc_getpid(p), p->p_comm, start, size,
1483 		    ((uap->behav == MADV_FREE_REUSABLE)
1484 		    ? "MADV_FREE_REUSABLE"
1485 		    : "MADV_FREE"));
1486 		return EINVAL;
1487 	}
1488 #endif /* __arm64__ */
1489 
1490 	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1491 	switch (result) {
1492 	case KERN_SUCCESS:
1493 		return 0;
1494 	case KERN_INVALID_ADDRESS:
1495 		return EINVAL;
1496 	case KERN_NO_SPACE:
1497 		return ENOMEM;
1498 	case KERN_PROTECTION_FAILURE:
1499 		return EPERM;
1500 	case KERN_NO_ACCESS:
1501 		return ENOTSUP;
1502 	}
1503 
1504 	return EINVAL;
1505 }
1506 
1507 static __attribute__((always_inline, warn_unused_result))
1508 kern_return_t
mincore_sanitize(vm_map_t map,mach_vm_offset_ut addr_u,mach_vm_size_ut len_u,mach_vm_offset_t * addr,mach_vm_offset_t * end,mach_vm_size_t * size)1509 mincore_sanitize(
1510 	vm_map_t                 map,
1511 	mach_vm_offset_ut        addr_u,
1512 	mach_vm_size_ut          len_u,
1513 	mach_vm_offset_t        *addr,
1514 	mach_vm_offset_t        *end,
1515 	mach_vm_size_t          *size)
1516 {
1517 	vm_sanitize_flags_t flags = VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS;
1518 
1519 	return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MINCORE,
1520 	           map, flags, addr, end, size);
1521 }
1522 
1523 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1524 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1525 {
1526 	mach_vm_offset_t addr = 0, end = 0, cur_end = 0;
1527 	mach_vm_size_t size;
1528 	vm_map_t map = VM_MAP_NULL;
1529 	user_addr_t vec = 0;
1530 	int error = 0;
1531 	int64_t lastvecindex = 0;
1532 	int mincoreinfo = 0;
1533 	int pqueryinfo = 0;
1534 	uint64_t pqueryinfo_vec_size = 0;
1535 	vm_page_info_basic_t info = NULL;
1536 	mach_msg_type_number_t count = 0;
1537 	char *kernel_vec = NULL;
1538 	uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1539 	kern_return_t kr = KERN_SUCCESS;
1540 	int effective_page_shift, effective_page_size;
1541 
1542 	map = current_map();
1543 
1544 	/*
1545 	 * Make sure that the addresses presented are valid for user
1546 	 * mode.
1547 	 */
1548 	kr = mincore_sanitize(map,
1549 	    uap->addr,
1550 	    uap->len,
1551 	    &addr,
1552 	    &end,
1553 	    &size);
1554 	if (__improbable(kr != KERN_SUCCESS)) {
1555 		return vm_sanitize_get_kr(kr) ? EINVAL : 0;
1556 	}
1557 
1558 	/*
1559 	 * On systems with 4k kernel space and 16k user space, we will
1560 	 * use the kernel page size to report back the residency information.
1561 	 * This is for backwards compatibility since we already have
1562 	 * processes that depend on this behavior.
1563 	 */
1564 	if (vm_map_page_shift(map) < PAGE_SHIFT) {
1565 		effective_page_shift = vm_map_page_shift(map);
1566 		effective_page_size = vm_map_page_size(map);
1567 	} else {
1568 		effective_page_shift = PAGE_SHIFT;
1569 		effective_page_size = PAGE_SIZE;
1570 	}
1571 
1572 	/*
1573 	 * We are going to loop through the whole 'req_vec_size' pages
1574 	 * range in chunks of 'cur_vec_size'.
1575 	 */
1576 
1577 	req_vec_size_pages = size >> effective_page_shift;
1578 	cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1579 	size_t kernel_vec_size = cur_vec_size_pages;
1580 
1581 	kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1582 
1583 	if (kernel_vec == NULL) {
1584 		return ENOMEM;
1585 	}
1586 
1587 	/*
1588 	 * Address of byte vector
1589 	 */
1590 	vec = uap->vec;
1591 
1592 	pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1593 
1594 	info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1595 
1596 	if (info == NULL) {
1597 		kfree_data(kernel_vec, kernel_vec_size);
1598 		return ENOMEM;
1599 	}
1600 
1601 	while (addr < end) {
1602 		mach_vm_offset_t first_addr = addr;
1603 
1604 		cur_end = addr + (cur_vec_size_pages * effective_page_size);
1605 
1606 		count =  VM_PAGE_INFO_BASIC_COUNT;
1607 		kr = vm_map_page_range_info_internal(map,
1608 		    addr,
1609 		    cur_end,
1610 		    effective_page_shift,
1611 		    VM_PAGE_INFO_BASIC,
1612 		    (vm_page_info_t) info,
1613 		    &count);
1614 
1615 		assert(kr == KERN_SUCCESS);
1616 
1617 		/*
1618 		 * Do this on a map entry basis so that if the pages are not
1619 		 * in the current processes address space, we can easily look
1620 		 * up the pages elsewhere.
1621 		 */
1622 		lastvecindex = -1;
1623 
1624 		for (; addr < cur_end; addr += effective_page_size) {
1625 			pqueryinfo = info[lastvecindex + 1].disposition;
1626 
1627 			mincoreinfo = 0;
1628 
1629 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1630 				mincoreinfo |= MINCORE_INCORE;
1631 			}
1632 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1633 				mincoreinfo |= MINCORE_REFERENCED;
1634 			}
1635 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1636 				mincoreinfo |= MINCORE_MODIFIED;
1637 			}
1638 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1639 				mincoreinfo |= MINCORE_PAGED_OUT;
1640 			}
1641 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1642 				mincoreinfo |= MINCORE_COPIED;
1643 			}
1644 			if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1645 				mincoreinfo |= MINCORE_ANONYMOUS;
1646 			}
1647 			/*
1648 			 * calculate index into user supplied byte vector
1649 			 */
1650 			vecindex = (addr - first_addr) >> effective_page_shift;
1651 			kernel_vec[vecindex] = (char)mincoreinfo;
1652 			lastvecindex = vecindex;
1653 		}
1654 
1655 
1656 		assert(vecindex == (cur_vec_size_pages - 1));
1657 
1658 		error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1659 
1660 		if (error) {
1661 			break;
1662 		}
1663 
1664 		/*
1665 		 * For the next chunk, we'll need:
1666 		 * - bump the location in the user buffer for our next disposition.
1667 		 * - new length
1668 		 * - starting address
1669 		 */
1670 		vec += cur_vec_size_pages * sizeof(char);
1671 		req_vec_size_pages = (end - addr) >> effective_page_shift;
1672 		cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1673 
1674 		first_addr = addr;
1675 	}
1676 
1677 	kfree_data(info, pqueryinfo_vec_size);
1678 	kfree_data(kernel_vec, kernel_vec_size);
1679 
1680 	if (error) {
1681 		return EFAULT;
1682 	}
1683 
1684 	return 0;
1685 }
1686 
1687 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1688 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1689 {
1690 	kern_return_t result;
1691 
1692 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1693 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1694 
1695 	/* have to call vm_map_wire directly to pass "I don't know" protections */
1696 	result = vm_map_wire_kernel(current_map(), uap->addr,
1697 	    vm_sanitize_compute_ut_end(uap->addr, uap->len),
1698 	    vm_sanitize_wrap_prot(VM_PROT_NONE), VM_KERN_MEMORY_MLOCK, TRUE);
1699 
1700 	switch (result) {
1701 	case KERN_SUCCESS:
1702 		return 0;
1703 	case KERN_INVALID_ARGUMENT:
1704 		return EINVAL;
1705 	case KERN_RESOURCE_SHORTAGE:
1706 		return EAGAIN;
1707 	case KERN_PROTECTION_FAILURE:
1708 		return EPERM;
1709 	default:
1710 		return ENOMEM;
1711 	}
1712 }
1713 
1714 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1715 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1716 {
1717 	kern_return_t result;
1718 
1719 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1720 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1721 
1722 	/* JMM - need to remove all wirings by spec - this just removes one */
1723 	result = vm_map_unwire(current_map(), uap->addr,
1724 	    vm_sanitize_compute_ut_end(uap->addr, uap->len), TRUE);
1725 
1726 	switch (result) {
1727 	case KERN_SUCCESS:
1728 		return 0;
1729 	case KERN_INVALID_ARGUMENT:
1730 		return EINVAL;
1731 	default:
1732 		return ENOMEM;
1733 	}
1734 }
1735 
1736 
1737 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1738 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1739 {
1740 	return ENOSYS;
1741 }
1742 
1743 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1744 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1745 {
1746 	return ENOSYS;
1747 }
1748 
1749 #if CONFIG_CODE_DECRYPTION
1750 static __attribute__((always_inline, warn_unused_result))
1751 kern_return_t
mremap_encrypted_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1752 mremap_encrypted_sanitize(
1753 	vm_map_t                user_map,
1754 	vm_addr_struct_t        addr_u,
1755 	vm_size_struct_t        len_u,
1756 	mach_vm_offset_t       *user_addr,
1757 	mach_vm_offset_t       *user_end,
1758 	mach_vm_size_t         *user_size)
1759 {
1760 	return vm_sanitize_addr_size(addr_u, len_u,
1761 	           VM_SANITIZE_CALLER_MREMAP_ENCRYPTED, user_map,
1762 	           VM_SANITIZE_FLAGS_CHECK_ALIGNED_START |
1763 	           VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
1764 	           user_addr, user_end, user_size);
1765 }
1766 
1767 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1768 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1769 {
1770 	mach_vm_offset_t    user_addr, user_end;
1771 	mach_vm_size_t      user_size;
1772 	kern_return_t       result;
1773 	vm_map_t    user_map;
1774 	uint32_t    cryptid;
1775 	cpu_type_t  cputype;
1776 	cpu_subtype_t       cpusubtype;
1777 	pager_crypt_info_t  crypt_info;
1778 	const char * cryptname = 0;
1779 	char *vpath;
1780 	int len, ret;
1781 	struct proc_regioninfo_internal pinfo;
1782 	vnode_t vp;
1783 	uintptr_t vnodeaddr;
1784 	uint32_t vid;
1785 
1786 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1787 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1788 
1789 	user_map   = current_map();
1790 	cryptid    = uap->cryptid;
1791 	cputype    = uap->cputype;
1792 	cpusubtype = uap->cpusubtype;
1793 
1794 	/*
1795 	 * Sanitize any input parameters that are addr/size/protections
1796 	 */
1797 	result = mremap_encrypted_sanitize(user_map,
1798 	    uap->addr,
1799 	    uap->len,
1800 	    &user_addr,
1801 	    &user_end,
1802 	    &user_size);
1803 	if (__improbable(result != KERN_SUCCESS)) {
1804 		assert(vm_sanitize_get_kr(result));
1805 		return EINVAL;
1806 	}
1807 
1808 	switch (cryptid) {
1809 	case CRYPTID_NO_ENCRYPTION:
1810 		/* not encrypted, just an empty load command */
1811 		return 0;
1812 	case CRYPTID_APP_ENCRYPTION:
1813 	case CRYPTID_MODEL_ENCRYPTION:
1814 		cryptname = "com.apple.unfree";
1815 		break;
1816 	case 0x10:
1817 		/* some random cryptid that you could manually put into
1818 		 * your binary if you want NULL */
1819 		cryptname = "com.apple.null";
1820 		break;
1821 	default:
1822 		return EINVAL;
1823 	}
1824 
1825 	if (NULL == text_crypter_create) {
1826 		return ENOTSUP;
1827 	}
1828 
1829 	ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1830 	if (ret == 0 || !vnodeaddr) {
1831 		/* No really, this returns 0 if the memory address is not backed by a file */
1832 		return EINVAL;
1833 	}
1834 
1835 	vp = (vnode_t)vnodeaddr;
1836 	if ((vnode_getwithvid(vp, vid)) == 0) {
1837 		vpath = zalloc(ZV_NAMEI);
1838 
1839 		len = MAXPATHLEN;
1840 		ret = vn_getpath(vp, vpath, &len);
1841 		if (ret) {
1842 			zfree(ZV_NAMEI, vpath);
1843 			vnode_put(vp);
1844 			return ret;
1845 		}
1846 
1847 		vnode_put(vp);
1848 	} else {
1849 		return EINVAL;
1850 	}
1851 
1852 #if 0
1853 	kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1854 	    __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1855 #endif
1856 
1857 	if (user_size == 0) {
1858 		printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1859 		zfree(ZV_NAMEI, vpath);
1860 		return 0;
1861 	}
1862 
1863 	/* set up decrypter first */
1864 	crypt_file_data_t crypt_data = {
1865 		.filename = vpath,
1866 		.cputype = cputype,
1867 		.cpusubtype = cpusubtype,
1868 		.origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1869 	};
1870 	result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1871 #if VM_MAP_DEBUG_APPLE_PROTECT
1872 	if (vm_map_debug_apple_protect) {
1873 		printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1874 		    proc_getpid(p), p->p_comm,
1875 		    user_map,
1876 		    (uint64_t) user_addr,
1877 		    (uint64_t) (user_addr + user_size),
1878 		    __FUNCTION__, vpath, result);
1879 	}
1880 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1881 	zfree(ZV_NAMEI, vpath);
1882 
1883 	if (result) {
1884 		printf("%s: unable to create decrypter %s, kr=%d\n",
1885 		    __FUNCTION__, cryptname, result);
1886 		if (result == kIOReturnNotPrivileged) {
1887 			/* text encryption returned decryption failure */
1888 			return EPERM;
1889 		} else {
1890 			return ENOMEM;
1891 		}
1892 	}
1893 
1894 	/* now remap using the decrypter */
1895 	vm_object_offset_t crypto_backing_offset;
1896 	crypto_backing_offset = -1; /* i.e. use map entry's offset */
1897 	result = vm_map_apple_protected(user_map,
1898 	    user_addr,
1899 	    user_addr + user_size,
1900 	    crypto_backing_offset,
1901 	    &crypt_info,
1902 	    cryptid);
1903 	if (result) {
1904 		printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1905 	}
1906 
1907 	if (result) {
1908 		return EPERM;
1909 	}
1910 	return 0;
1911 }
1912 #endif /* CONFIG_CODE_DECRYPTION */
1913