xref: /xnu-11215.1.10/bsd/kern/kern_mman.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988 University of Utah.
30  * Copyright (c) 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * the Systems Programming Group of the University of Utah Computer
35  * Science Department.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66  *
67  *	@(#)vm_mmap.c	8.10 (Berkeley) 2/19/95
68  */
69 /*
70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71  * support for mandatory and extensible security protections.  This notice
72  * is included in support of clause 2.2 (b) of the Apple Public License,
73  * Version 2.0.
74  */
75 
76 /*
77  * Mapped file (mmap) interface to VM
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98 
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102 
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105 
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117 #include <mach/vm_types_unsafe.h>
118 
119 #include <machine/machine_routines.h>
120 
121 #include <kern/cpu_number.h>
122 #include <kern/host.h>
123 #include <kern/task.h>
124 #include <kern/page_decrypt.h>
125 
126 #include <IOKit/IOReturn.h>
127 #include <IOKit/IOBSD.h>
128 
129 #include <vm/vm_kern_xnu.h>
130 #include <vm/vm_map_xnu.h>
131 #include <vm/vm_pager_xnu.h>
132 #include <vm/vm_sanitize_internal.h>
133 
134 #if CONFIG_MACF
135 #include <security/mac_framework.h>
136 #endif
137 #include <os/overflow.h>
138 
139 /*
140  * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
141  * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
142  * compiled with an SDK from fall of 2020 or later?""
143  */
144 static bool
proc_2020_fall_os_sdk_or_later(void)145 proc_2020_fall_os_sdk_or_later(void)
146 {
147 	const uint32_t proc_sdk_ver = proc_sdk(current_proc());
148 
149 	switch (proc_platform(current_proc())) {
150 	case PLATFORM_MACOS:
151 		return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
152 	case PLATFORM_IOS:
153 	case PLATFORM_IOSSIMULATOR:
154 	case PLATFORM_MACCATALYST:
155 		return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
156 	case PLATFORM_BRIDGEOS:
157 		return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
158 	case PLATFORM_TVOS:
159 	case PLATFORM_TVOSSIMULATOR:
160 		return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
161 	case PLATFORM_WATCHOS:
162 	case PLATFORM_WATCHOSSIMULATOR:
163 		return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
164 	default:
165 		/*
166 		 * tough call, but let's give new platforms the benefit of the doubt
167 		 * to avoid a re-occurence of rdar://89843927
168 		 */
169 		return true;
170 	}
171 }
172 
173 static inline kern_return_t
mmap_sanitize(vm_map_t user_map,vm_prot_ut prot_u,vm_addr_struct_t pos_u,vm_size_struct_t len_u,vm_addr_struct_t addr_u,int flags,vm_prot_t * prot,vm_object_offset_t * file_pos,vm_object_offset_t * file_end,vm_map_size_t * file_size,vm_map_offset_t * user_addr,vm_map_offset_t * user_end,vm_map_size_t * user_size)174 mmap_sanitize(
175 	vm_map_t                user_map,
176 	vm_prot_ut              prot_u,
177 	vm_addr_struct_t        pos_u,
178 	vm_size_struct_t        len_u,
179 	vm_addr_struct_t        addr_u,
180 	int                     flags,
181 	vm_prot_t              *prot,
182 	vm_object_offset_t     *file_pos,
183 	vm_object_offset_t     *file_end,
184 	vm_map_size_t          *file_size,
185 	vm_map_offset_t        *user_addr,
186 	vm_map_offset_t        *user_end,
187 	vm_map_size_t          *user_size)
188 {
189 	kern_return_t           kr;
190 	vm_map_offset_t         user_mask = vm_map_page_mask(user_map);
191 	vm_sanitize_flags_t     vm_sanitize_flags;
192 
193 	kr = vm_sanitize_prot_bsd(prot_u, VM_SANITIZE_CALLER_MMAP, prot);
194 	*prot &= VM_PROT_ALL;
195 	if (__improbable(kr != KERN_SUCCESS)) {
196 		return kr;
197 	}
198 
199 	/*
200 	 * Check file_pos doesn't overflow with PAGE_MASK since VM objects use
201 	 * this page mask internally, and it can be wider than the user_map's.
202 	 */
203 	if (flags & MAP_UNIX03) {
204 		vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS;
205 	} else {
206 		vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
207 	}
208 
209 	kr = vm_sanitize_addr_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP, PAGE_MASK,
210 	    vm_sanitize_flags | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
211 	    file_pos, file_end, file_size);
212 	if (__improbable(kr != KERN_SUCCESS)) {
213 		return kr;
214 	}
215 
216 	/*
217 	 * Check that file_pos is page aligned for the user page size when
218 	 * UNIX03 compliance is requested.
219 	 * The user page size may be different from the kernel page size we
220 	 * use to check for overflows in the sanitizer call above).
221 	 */
222 	if ((flags & MAP_UNIX03) && (*file_pos & user_mask)) {
223 		return KERN_INVALID_ARGUMENT;
224 	}
225 
226 	if (flags & MAP_FIXED) {
227 		kr = vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MMAP,
228 		    user_map,
229 		    VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
230 		    user_addr, user_end, user_size);
231 		if (__improbable(kr != KERN_SUCCESS)) {
232 			return kr;
233 		}
234 
235 		/*
236 		 * Further validation since we allowed a misaligned user_addr
237 		 * for fixed mappings.
238 		 *
239 		 * The specified address must have the same remainder
240 		 * as the file offset taken modulo PAGE_SIZE, so it
241 		 * should be aligned after adjustment by (file_pos & user_mask).
242 		 */
243 		if (!VM_SANITIZE_UNSAFE_IS_EQUAL(addr_u, *user_addr + (*file_pos & user_mask))) {
244 			return KERN_INVALID_ARGUMENT;
245 		}
246 	} else {
247 		/*
248 		 * For "anywhere" mappings, the address is only a hint,
249 		 * mach_vm_map_kernel() will fail with KERN_NO_SPACE
250 		 * if user_addr + user_size overflows,
251 		 * and mmap will start scanning again.
252 		 *
253 		 * Unlike Mach VM APIs, the hint is taken as a strict
254 		 * "start" which is why we round the sanitized address up,
255 		 * rather than truncate.
256 		 */
257 		*user_addr = vm_sanitize_addr(user_map,
258 		    vm_sanitize_compute_unsafe_end(addr_u, user_mask));
259 		kr = vm_sanitize_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP,
260 		    user_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
261 		    user_size);
262 		if (__improbable(kr != KERN_SUCCESS)) {
263 			return kr;
264 		}
265 	}
266 
267 	return KERN_SUCCESS;
268 }
269 
270 /*
271  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
272  * XXX usage is PROT_* from an interface perspective.  Thus the values of
273  * XXX VM_PROT_* and PROT_* need to correspond.
274  */
275 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)276 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
277 {
278 	/*
279 	 *	Map in special device (must be SHARED) or file
280 	 */
281 	struct fileproc        *fp;
282 	struct vnode           *vp = NULLVP;
283 	int                     flags;
284 	int                     prot;
285 	int                     err = 0;
286 	vm_map_t                user_map;
287 	kern_return_t           result;
288 	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
289 	boolean_t               docow;
290 	vm_prot_t               maxprot;
291 	void                   *handle;
292 	memory_object_t         pager = MEMORY_OBJECT_NULL;
293 	memory_object_control_t control;
294 	int                     mapanon = 0;
295 	int                     fpref = 0;
296 	int                     error = 0;
297 	int                     fd = uap->fd;
298 	int                     num_retries = 0;
299 	kern_return_t           kr;
300 	/* page-aligned "user_map" quantities */
301 	vm_map_offset_t         user_addr, user_end, user_mask;
302 	vm_map_size_t           user_size;
303 	/* unaligned "file" quantities */
304 	vm_object_offset_t      file_pos, file_end;
305 	vm_map_size_t           file_size;
306 
307 	/*
308 	 * Note that for UNIX03 conformance, there is additional parameter checking for
309 	 * mmap() system call in libsyscall prior to entering the kernel.  The sanity
310 	 * checks and argument validation done in this function are not the only places
311 	 * one can get returned errnos.
312 	 */
313 
314 	user_map  = current_map();
315 	flags     = uap->flags;
316 	user_mask = vm_map_page_mask(user_map);
317 
318 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
319 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
320 	AUDIT_ARG(fd, uap->fd);
321 
322 	/*
323 	 * Sanitize any input parameters that are addr/size/protections
324 	 */
325 	kr = mmap_sanitize(user_map,
326 	    uap->prot,
327 	    uap->pos,
328 	    uap->len,
329 	    uap->addr,
330 	    flags,
331 	    &prot,
332 	    &file_pos,
333 	    &file_end,
334 	    &file_size,
335 	    &user_addr,
336 	    &user_end,
337 	    &user_size);
338 	if (__improbable(kr != KERN_SUCCESS)) {
339 		assert(vm_sanitize_get_kr(kr));
340 		return EINVAL;
341 	}
342 
343 #if 3777787
344 	/*
345 	 * Since the hardware currently does not support writing without
346 	 * read-before-write, or execution-without-read, if the request is
347 	 * for write or execute access, we must imply read access as well;
348 	 * otherwise programs expecting this to work will fail to operate.
349 	 */
350 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
351 		prot |= VM_PROT_READ;
352 	}
353 #endif  /* radar 3777787 */
354 
355 	/*
356 	 * verify no unknown flags are passed in, and if any are,
357 	 * fail out early to make sure the logic below never has to deal
358 	 * with invalid flag values. only do so for processes compiled
359 	 * with Fall 2020 or later SDK, which is where we drew this
360 	 * line and documented it as such.
361 	 */
362 	if (flags & ~(MAP_SHARED |
363 	    MAP_PRIVATE |
364 	    MAP_COPY |
365 	    MAP_FIXED |
366 	    MAP_RENAME |
367 	    MAP_NORESERVE |
368 	    MAP_RESERVED0080 |                                  //grandfathered in as accepted and ignored
369 	    MAP_NOEXTEND |
370 	    MAP_HASSEMAPHORE |
371 	    MAP_NOCACHE |
372 	    MAP_JIT |
373 	    MAP_TPRO |
374 	    MAP_FILE |
375 	    MAP_ANON |
376 	    MAP_RESILIENT_CODESIGN |
377 	    MAP_RESILIENT_MEDIA |
378 #if XNU_TARGET_OS_OSX
379 	    MAP_32BIT |
380 #endif
381 	    MAP_TRANSLATED_ALLOW_EXECUTE |
382 	    MAP_UNIX03)) {
383 		if (proc_2020_fall_os_sdk_or_later()) {
384 			return EINVAL;
385 		}
386 	}
387 
388 
389 	if (flags & MAP_UNIX03) {
390 		/*
391 		 * Enforce UNIX03 compliance.
392 		 */
393 		if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
394 			/* need either MAP_PRIVATE or MAP_SHARED */
395 			return EINVAL;
396 		}
397 	}
398 
399 
400 	if (flags & MAP_JIT) {
401 		if ((flags & MAP_FIXED) ||
402 		    (flags & MAP_SHARED) ||
403 		    !(flags & MAP_ANON) ||
404 		    (flags & MAP_RESILIENT_CODESIGN) ||
405 		    (flags & MAP_RESILIENT_MEDIA) ||
406 		    (flags & MAP_TPRO)) {
407 			return EINVAL;
408 		}
409 	}
410 
411 	if ((flags & MAP_RESILIENT_CODESIGN) ||
412 	    (flags & MAP_RESILIENT_MEDIA)) {
413 		if ((flags & MAP_ANON) ||
414 		    (flags & MAP_JIT) ||
415 		    (flags & MAP_TPRO)) {
416 			return EINVAL;
417 		}
418 	}
419 	if (flags & MAP_RESILIENT_CODESIGN) {
420 		int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
421 		if (prot & reject_prot) {
422 			/*
423 			 * Quick sanity check. maxprot is calculated below and
424 			 * we will test it again.
425 			 */
426 			return EPERM;
427 		}
428 	}
429 	if (flags & MAP_SHARED) {
430 		/*
431 		 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
432 		 * there is no place to inject zero-filled pages without
433 		 * actually adding them to the file.
434 		 * Since we didn't reject that combination before, there might
435 		 * already be callers using it and getting a valid MAP_SHARED
436 		 * mapping but without the resilience.
437 		 * For backwards compatibility's sake, let's keep ignoring
438 		 * MAP_RESILIENT_MEDIA in that case.
439 		 */
440 		flags &= ~MAP_RESILIENT_MEDIA;
441 	}
442 	if (flags & MAP_RESILIENT_MEDIA) {
443 		if ((flags & MAP_ANON) ||
444 		    (flags & MAP_SHARED)) {
445 			return EINVAL;
446 		}
447 	}
448 	if (flags & MAP_TPRO) {
449 		/*
450 		 * MAP_TPRO without VM_PROT_WRITE is not valid here because
451 		 * the TPRO mapping is handled at the PMAP layer with implicit RW
452 		 * protections.
453 		 *
454 		 * This would enable bypassing of file-based protections, i.e.
455 		 * a file open/mapped as read-only could be written to.
456 		 */
457 		if ((prot & VM_PROT_EXECUTE) ||
458 		    !(prot & VM_PROT_WRITE)) {
459 			return EPERM;
460 		}
461 	}
462 
463 	/* Entitlement check against code signing monitor */
464 	if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(user_map) != KERN_SUCCESS)) {
465 		printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
466 		return EPERM;
467 	}
468 
469 	if (flags & MAP_ANON) {
470 		maxprot = VM_PROT_ALL;
471 #if CONFIG_MACF
472 		/*
473 		 * Entitlement check.
474 		 */
475 		error = mac_proc_check_map_anon(p, current_cached_proc_cred(p),
476 		    user_addr, user_size, prot, flags, &maxprot);
477 		if (error) {
478 			return EINVAL;
479 		}
480 #endif /* MAC */
481 
482 		/*
483 		 * Mapping blank space is trivial.  Use positive fds as the alias
484 		 * value for memory tracking.
485 		 */
486 		if (fd != -1) {
487 			/*
488 			 * Use "fd" to pass (some) Mach VM allocation flags,
489 			 * (see the VM_FLAGS_* definitions).
490 			 */
491 			int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
492 			    VM_FLAGS_SUPERPAGE_MASK |
493 			    VM_FLAGS_PURGABLE |
494 			    VM_FLAGS_4GB_CHUNK);
495 
496 			if (vm_flags != fd) {
497 				/* reject if there are any extra flags */
498 				return EINVAL;
499 			}
500 
501 			/*
502 			 * vm_map_kernel_flags_set_vmflags() will assume that
503 			 * the full set of VM flags are passed, which is
504 			 * problematic for FIXED/ANYWHERE.
505 			 *
506 			 * The block handling MAP_FIXED below will do the same
507 			 * thing again which is fine because it's idempotent.
508 			 */
509 			if (flags & MAP_FIXED) {
510 				vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
511 			} else {
512 				vm_flags |= VM_FLAGS_ANYWHERE;
513 			}
514 			vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
515 		}
516 
517 #if CONFIG_MAP_RANGES
518 		/*
519 		 * if the client specified a tag, let the system policy apply.
520 		 *
521 		 * otherwise, force the heap range.
522 		 */
523 		if (vmk_flags.vm_tag) {
524 			vm_map_kernel_flags_update_range_id(&vmk_flags, user_map, user_size);
525 		} else {
526 			vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
527 		}
528 #endif /* CONFIG_MAP_RANGES */
529 
530 		handle = NULL;
531 		file_pos = 0;
532 		mapanon = 1;
533 	} else {
534 		struct vnode_attr va;
535 		vfs_context_t ctx = vfs_context_current();
536 
537 		if (flags & MAP_JIT) {
538 			return EINVAL;
539 		}
540 
541 		/*
542 		 * Mapping file, get fp for validation. Obtain vnode and make
543 		 * sure it is of appropriate type.
544 		 */
545 		err = fp_lookup(p, fd, &fp, 0);
546 		if (err) {
547 			return err;
548 		}
549 		fpref = 1;
550 		switch (FILEGLOB_DTYPE(fp->fp_glob)) {
551 		case DTYPE_PSXSHM:
552 			error = pshm_mmap(p, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr),
553 			    user_size, prot, flags, fp,
554 			    vm_map_trunc_page(file_pos, user_mask),
555 			    file_pos & user_mask, retval);
556 			goto bad;
557 		case DTYPE_VNODE:
558 			break;
559 		default:
560 			error = EINVAL;
561 			goto bad;
562 		}
563 		vp = (struct vnode *)fp_get_data(fp);
564 		error = vnode_getwithref(vp);
565 		if (error != 0) {
566 			goto bad;
567 		}
568 
569 		if (vp->v_type != VREG && vp->v_type != VCHR) {
570 			(void)vnode_put(vp);
571 			error = EINVAL;
572 			goto bad;
573 		}
574 
575 		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
576 
577 		/*
578 		 * POSIX: mmap needs to update access time for mapped files
579 		 */
580 		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
581 			VATTR_INIT(&va);
582 			nanotime(&va.va_access_time);
583 			VATTR_SET_ACTIVE(&va, va_access_time);
584 			vnode_setattr(vp, &va, ctx);
585 		}
586 
587 		/*
588 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
589 		 * SunOS).
590 		 */
591 		if (vp->v_type == VCHR || vp->v_type == VSTR) {
592 			(void)vnode_put(vp);
593 			error = ENODEV;
594 			goto bad;
595 		} else {
596 			/*
597 			 * Ensure that file and memory protections are
598 			 * compatible.  Note that we only worry about
599 			 * writability if mapping is shared; in this case,
600 			 * current and max prot are dictated by the open file.
601 			 * XXX use the vnode instead?  Problem is: what
602 			 * credentials do we use for determination? What if
603 			 * proc does a setuid?
604 			 */
605 			maxprot = VM_PROT_EXECUTE;      /* TODO: Remove this and restrict maxprot? */
606 			if (fp->fp_glob->fg_flag & FREAD) {
607 				maxprot |= VM_PROT_READ;
608 			} else if (prot & PROT_READ) {
609 				(void)vnode_put(vp);
610 				error = EACCES;
611 				goto bad;
612 			}
613 			/*
614 			 * If we are sharing potential changes (either via
615 			 * MAP_SHARED or via the implicit sharing of character
616 			 * device mappings), and we are trying to get write
617 			 * permission although we opened it without asking
618 			 * for it, bail out.
619 			 */
620 
621 			if ((flags & MAP_SHARED) != 0) {
622 				if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
623 				    /*
624 				     * Do not allow writable mappings of
625 				     * swap files (see vm_swapfile_pager.c).
626 				     */
627 				    !vnode_isswap(vp)) {
628 					/*
629 					 * check for write access
630 					 *
631 					 * Note that we already made this check when granting FWRITE
632 					 * against the file, so it seems redundant here.
633 					 */
634 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
635 
636 					/* if not granted for any reason, but we wanted it, bad */
637 					if ((prot & PROT_WRITE) && (error != 0)) {
638 						vnode_put(vp);
639 						goto bad;
640 					}
641 
642 					/* if writable, remember */
643 					if (error == 0) {
644 						maxprot |= VM_PROT_WRITE;
645 					}
646 				} else if ((prot & PROT_WRITE) != 0) {
647 					(void)vnode_put(vp);
648 					error = EACCES;
649 					goto bad;
650 				}
651 			} else {
652 				maxprot |= VM_PROT_WRITE;
653 			}
654 
655 			handle = (void *)vp;
656 #if CONFIG_MACF
657 			error = mac_file_check_mmap(vfs_context_ucred(ctx),
658 			    fp->fp_glob, prot, flags, file_pos, &maxprot);
659 			if (error) {
660 				(void)vnode_put(vp);
661 				goto bad;
662 			}
663 #endif /* MAC */
664 			/*
665 			 * Consult the file system to determine if this
666 			 * particular file object can be mapped.
667 			 *
668 			 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
669 			 * then we don't check for writeability on the file
670 			 * object, because it will only ever see reads.
671 			 */
672 			error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
673 			    (prot & ~PROT_WRITE) : prot, ctx);
674 			if (error) {
675 				(void)vnode_put(vp);
676 				goto bad;
677 			}
678 		}
679 
680 		/*
681 		 * No copy-on-read for mmap() mappings themselves.
682 		 */
683 		vmk_flags.vmkf_no_copy_on_read = 1;
684 #if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
685 		/* force file ranges on !macOS */
686 		vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
687 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
688 		/*
689 		 * Put allocations on iOS with EXTENDED_USER_VA_SUPPORT
690 		 * in the large file range, if the process has the "extra jumbo" entitlement.
691 		 * Otherwise, place allocation into the heap range.
692 		 */
693 		vmk_flags.vmkf_range_id = UMEM_RANGE_ID_LARGE_FILE;
694 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
695 #endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
696 	}
697 
698 	if (user_size == 0) {
699 		if (!mapanon) {
700 			(void)vnode_put(vp);
701 		}
702 		error = 0;
703 		goto bad;
704 	}
705 
706 	if (flags & MAP_FIXED) {
707 		/*
708 		 * mmap(MAP_FIXED) will replace any existing mappings in the
709 		 * specified range, if the new mapping is successful.
710 		 * If we just deallocate the specified address range here,
711 		 * another thread might jump in and allocate memory in that
712 		 * range before we get a chance to establish the new mapping,
713 		 * and we won't have a chance to restore the old mappings.
714 		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
715 		 * has to deallocate the existing mappings and establish the
716 		 * new ones atomically.
717 		 */
718 		vmk_flags.vmf_fixed = true;
719 		vmk_flags.vmf_overwrite = true;
720 	}
721 
722 	if (flags & MAP_NOCACHE) {
723 		vmk_flags.vmf_no_cache = true;
724 	}
725 
726 	if (flags & MAP_JIT) {
727 		vmk_flags.vmkf_map_jit = TRUE;
728 	}
729 
730 	if (flags & MAP_TPRO) {
731 		vmk_flags.vmf_tpro = true;
732 	}
733 
734 #if CONFIG_ROSETTA
735 	if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
736 		if (!proc_is_translated(p)) {
737 			if (!mapanon) {
738 				(void)vnode_put(vp);
739 			}
740 			error = EINVAL;
741 			goto bad;
742 		}
743 		vmk_flags.vmkf_translated_allow_execute = TRUE;
744 	}
745 #endif
746 
747 	if (flags & MAP_RESILIENT_CODESIGN) {
748 		vmk_flags.vmf_resilient_codesign = true;
749 	}
750 	if (flags & MAP_RESILIENT_MEDIA) {
751 		vmk_flags.vmf_resilient_media = true;
752 	}
753 
754 #if XNU_TARGET_OS_OSX
755 	/* macOS-specific MAP_32BIT flag handling */
756 	if (flags & MAP_32BIT) {
757 		vmk_flags.vmkf_32bit_map_va = TRUE;
758 	}
759 #endif
760 
761 	/*
762 	 * Lookup/allocate object.
763 	 */
764 	if (handle == NULL) {
765 		control = NULL;
766 #ifdef notyet
767 /* Hmm .. */
768 #if defined(VM_PROT_READ_IS_EXEC)
769 		if (prot & VM_PROT_READ) {
770 			prot |= VM_PROT_EXECUTE;
771 		}
772 		if (maxprot & VM_PROT_READ) {
773 			maxprot |= VM_PROT_EXECUTE;
774 		}
775 #endif
776 #endif
777 
778 #if 3777787
779 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
780 			prot |= VM_PROT_READ;
781 		}
782 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
783 			maxprot |= VM_PROT_READ;
784 		}
785 #endif  /* radar 3777787 */
786 map_anon_retry:
787 
788 		result = mach_vm_map_kernel(user_map,
789 		    vm_sanitize_wrap_addr_ref(&user_addr), user_size,
790 		    0, vmk_flags,
791 		    IPC_PORT_NULL, 0, FALSE,
792 		    prot, maxprot,
793 		    (flags & MAP_SHARED) ?
794 		    VM_INHERIT_SHARE :
795 		    VM_INHERIT_DEFAULT);
796 
797 		/* If a non-binding address was specified for this anonymous
798 		 * mapping, retry the mapping with a zero base
799 		 * in the event the mapping operation failed due to
800 		 * lack of space between the address and the map's maximum.
801 		 */
802 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
803 			user_addr = vm_map_page_size(user_map);
804 			goto map_anon_retry;
805 		}
806 	} else {
807 		if (vnode_isswap(vp)) {
808 			/*
809 			 * Map swap files with a special pager
810 			 * that returns obfuscated contents.
811 			 */
812 			control = NULL;
813 			pager = swapfile_pager_setup(vp);
814 			if (pager != MEMORY_OBJECT_NULL) {
815 				control = swapfile_pager_control(pager);
816 			}
817 		} else {
818 			control = ubc_getobject(vp, UBC_FLAGS_NONE);
819 		}
820 
821 		if (control == NULL) {
822 			(void)vnode_put(vp);
823 			error = ENOMEM;
824 			goto bad;
825 		}
826 
827 #if FBDP_DEBUG_OBJECT_NO_PAGER
828 //#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
829 #define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
830 #define FBDP_FILE_NAME1 "icutz44l.dat"
831 #define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
832 #define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
833 #define FBDP_FILE_NAME_END2 "-v2.csstore"
834 		if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
835 			char *path;
836 			int len;
837 			bool already_tracked;
838 			len = MAXPATHLEN;
839 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
840 			vn_getpath(vp, path, &len);
841 			if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
842 				if (memory_object_mark_as_tracked(control,
843 				    true,
844 				    &already_tracked) == KERN_SUCCESS &&
845 				    !already_tracked) {
846 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
847 				}
848 			}
849 			zfree(ZV_NAMEI, path);
850 		} else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
851 		    strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
852 		    !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
853 		    FBDP_FILE_NAME_END2,
854 		    strlen(FBDP_FILE_NAME_END2))) {
855 			char *path;
856 			int len;
857 			bool already_tracked;
858 			len = MAXPATHLEN;
859 			path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
860 			vn_getpath(vp, path, &len);
861 			if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
862 				if (memory_object_mark_as_tracked(control,
863 				    true,
864 				    &already_tracked) == KERN_SUCCESS &&
865 				    !already_tracked) {
866 					printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
867 				}
868 			}
869 			zfree(ZV_NAMEI, path);
870 		}
871 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
872 
873 		/*
874 		 *  Set credentials:
875 		 *	FIXME: if we're writing the file we need a way to
876 		 *      ensure that someone doesn't replace our R/W creds
877 		 *      with ones that only work for read.
878 		 */
879 
880 		ubc_setthreadcred(vp, p, current_thread());
881 		docow = FALSE;
882 		if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
883 			docow = TRUE;
884 		}
885 
886 #ifdef notyet
887 /* Hmm .. */
888 #if defined(VM_PROT_READ_IS_EXEC)
889 		if (prot & VM_PROT_READ) {
890 			prot |= VM_PROT_EXECUTE;
891 		}
892 		if (maxprot & VM_PROT_READ) {
893 			maxprot |= VM_PROT_EXECUTE;
894 		}
895 #endif
896 #endif /* notyet */
897 
898 #if 3777787
899 		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
900 			prot |= VM_PROT_READ;
901 		}
902 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
903 			maxprot |= VM_PROT_READ;
904 		}
905 #endif  /* radar 3777787 */
906 
907 map_file_retry:
908 		if (flags & MAP_RESILIENT_CODESIGN) {
909 			int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
910 			if (prot & reject_prot) {
911 				/*
912 				 * Would like to use (prot | maxprot) here
913 				 * but the assignment of VM_PROT_EXECUTE
914 				 * to maxprot above would always fail the test.
915 				 *
916 				 * Skipping the check is ok, however, because we
917 				 * restrict maxprot to prot just below in this
918 				 * block.
919 				 */
920 				assert(!mapanon);
921 				vnode_put(vp);
922 				error = EPERM;
923 				goto bad;
924 			}
925 			/* strictly limit access to "prot" */
926 			maxprot &= prot;
927 		}
928 
929 		result = vm_map_enter_mem_object_control(user_map,
930 		    vm_sanitize_wrap_addr_ref(&user_addr), user_size,
931 		    0, vmk_flags,
932 		    control, vm_map_trunc_page(file_pos, user_mask),
933 		    docow, prot, maxprot,
934 		    (flags & MAP_SHARED) ?
935 		    VM_INHERIT_SHARE :
936 		    VM_INHERIT_DEFAULT);
937 
938 		/* If a non-binding address was specified for this file backed
939 		 * mapping, retry the mapping with a zero base
940 		 * in the event the mapping operation failed due to
941 		 * lack of space between the address and the map's maximum.
942 		 */
943 		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
944 			user_addr = vm_map_page_size(user_map);
945 			goto map_file_retry;
946 		}
947 	}
948 
949 	if (!mapanon) {
950 		(void)vnode_put(vp);
951 	}
952 
953 	switch (result) {
954 	case KERN_SUCCESS:
955 		*retval = user_addr + (file_pos & user_mask);
956 		error = 0;
957 		break;
958 	case KERN_INVALID_ADDRESS:
959 	case KERN_NO_SPACE:
960 		error =  ENOMEM;
961 		break;
962 	case KERN_PROTECTION_FAILURE:
963 		error =  EACCES;
964 		break;
965 	default:
966 		error =  EINVAL;
967 		break;
968 	}
969 bad:
970 	if (pager != MEMORY_OBJECT_NULL) {
971 		/*
972 		 * Release the reference on the pager.
973 		 * If the mapping was successful, it now holds
974 		 * an extra reference.
975 		 */
976 		memory_object_deallocate(pager);
977 	}
978 	if (fpref) {
979 		fp_drop(p, fd, fp, 0);
980 	}
981 
982 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
983 #if XNU_TARGET_OS_OSX
984 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
985 	    (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
986 #endif /* XNU_TARGET_OS_OSX */
987 	return error;
988 }
989 
990 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)991 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
992 {
993 	__pthread_testcancel(1);
994 	return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
995 }
996 
997 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)998 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
999 {
1000 	mach_vm_offset_t addr;
1001 	mach_vm_size_t size;
1002 	int flags;
1003 	vm_map_t user_map;
1004 	int rv;
1005 	vm_sync_t sync_flags = 0;
1006 
1007 	user_map = current_map();
1008 	addr = (mach_vm_offset_t) uap->addr;
1009 	size = (mach_vm_size_t) uap->len;
1010 #if XNU_TARGET_OS_OSX
1011 	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1012 #endif /* XNU_TARGET_OS_OSX */
1013 	if (vm_map_range_overflows(user_map, addr, size)) {
1014 		return EINVAL;
1015 	}
1016 	if (addr & vm_map_page_mask(user_map)) {
1017 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1018 		return EINVAL;
1019 	}
1020 	if (size == 0) {
1021 		/*
1022 		 * We cannot support this properly without maintaining
1023 		 * list all mmaps done. Cannot use vm_map_entry as they could be
1024 		 * split or coalesced by indepenedant actions. So instead of
1025 		 * inaccurate results, lets just return error as invalid size
1026 		 * specified
1027 		 */
1028 		return EINVAL; /* XXX breaks posix apps */
1029 	}
1030 
1031 	flags = uap->flags;
1032 	/* disallow contradictory flags */
1033 	if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1034 		return EINVAL;
1035 	}
1036 
1037 	if (flags & MS_KILLPAGES) {
1038 		sync_flags |= VM_SYNC_KILLPAGES;
1039 	}
1040 	if (flags & MS_DEACTIVATE) {
1041 		sync_flags |= VM_SYNC_DEACTIVATE;
1042 	}
1043 	if (flags & MS_INVALIDATE) {
1044 		sync_flags |= VM_SYNC_INVALIDATE;
1045 	}
1046 
1047 	if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1048 		if (flags & MS_ASYNC) {
1049 			sync_flags |= VM_SYNC_ASYNCHRONOUS;
1050 		} else {
1051 			sync_flags |= VM_SYNC_SYNCHRONOUS;
1052 		}
1053 	}
1054 
1055 	sync_flags |= VM_SYNC_CONTIGUOUS;       /* complain if holes */
1056 
1057 	rv = mach_vm_msync(user_map, addr, size, sync_flags);
1058 
1059 	switch (rv) {
1060 	case KERN_SUCCESS:
1061 		break;
1062 	case KERN_INVALID_ADDRESS:      /* hole in region being sync'ed */
1063 		return ENOMEM;
1064 	case KERN_FAILURE:
1065 		return EIO;
1066 	default:
1067 		return EINVAL;
1068 	}
1069 	return 0;
1070 }
1071 
1072 static inline kern_return_t
munmap_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1073 munmap_sanitize(
1074 	vm_map_t                user_map,
1075 	vm_addr_struct_t        addr_u,
1076 	vm_size_struct_t        len_u,
1077 	mach_vm_offset_t       *user_addr,
1078 	mach_vm_offset_t       *user_end,
1079 	mach_vm_size_t         *user_size)
1080 {
1081 	return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MUNMAP,
1082 	           user_map,
1083 	           VM_SANITIZE_FLAGS_CHECK_ALIGNED_START | VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS,
1084 	           user_addr, user_end, user_size);
1085 }
1086 
1087 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1088 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1089 {
1090 	mach_vm_offset_t        user_addr, user_end;
1091 	mach_vm_size_t          user_size;
1092 	kern_return_t           result;
1093 	vm_map_t                user_map;
1094 
1095 	user_map = current_map();
1096 
1097 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1098 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1099 
1100 	/*
1101 	 * Sanitize any input parameters that are addr/size/protections
1102 	 */
1103 	result = munmap_sanitize(user_map,
1104 	    uap->addr,
1105 	    uap->len,
1106 	    &user_addr,
1107 	    &user_end,
1108 	    &user_size);
1109 	if (__improbable(result != KERN_SUCCESS)) {
1110 		assert(vm_sanitize_get_kr(result) ==
1111 		    KERN_INVALID_ARGUMENT);
1112 		return EINVAL;
1113 	}
1114 	if (mach_vm_deallocate(user_map, user_addr, user_size)) {
1115 		return EINVAL;
1116 	}
1117 	return 0;
1118 }
1119 
1120 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1121 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1122 {
1123 	vm_prot_t prot;
1124 	mach_vm_offset_t        user_addr;
1125 	mach_vm_size_t  user_size;
1126 	kern_return_t   result;
1127 	vm_map_t        user_map;
1128 #if CONFIG_MACF
1129 	int error;
1130 #endif
1131 
1132 	AUDIT_ARG(addr, uap->addr);
1133 	AUDIT_ARG(len, uap->len);
1134 	AUDIT_ARG(value32, uap->prot);
1135 
1136 	user_map = current_map();
1137 	user_addr = (mach_vm_offset_t) uap->addr;
1138 	user_size = (mach_vm_size_t) uap->len;
1139 	prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1140 
1141 	if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1142 		return EINVAL;
1143 	}
1144 	if (user_addr & vm_map_page_mask(user_map)) {
1145 		/* UNIX SPEC: user address is not page-aligned, return EINVAL */
1146 		return EINVAL;
1147 	}
1148 
1149 #ifdef notyet
1150 /* Hmm .. */
1151 #if defined(VM_PROT_READ_IS_EXEC)
1152 	if (prot & VM_PROT_READ) {
1153 		prot |= VM_PROT_EXECUTE;
1154 	}
1155 #endif
1156 #endif /* notyet */
1157 
1158 #if 3936456
1159 	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1160 		prot |= VM_PROT_READ;
1161 	}
1162 #endif  /* 3936456 */
1163 
1164 #if CONFIG_MACF
1165 	/*
1166 	 * The MAC check for mprotect is of limited use for 2 reasons:
1167 	 * Without mmap revocation, the caller could have asked for the max
1168 	 * protections initially instead of a reduced set, so a mprotect
1169 	 * check would offer no new security.
1170 	 * It is not possible to extract the vnode from the pager object(s)
1171 	 * of the target memory range.
1172 	 * However, the MAC check may be used to prevent a process from,
1173 	 * e.g., making the stack executable.
1174 	 */
1175 	error = mac_proc_check_mprotect(p, user_addr,
1176 	    user_size, prot);
1177 	if (error) {
1178 		return error;
1179 	}
1180 #endif
1181 
1182 	if (prot & VM_PROT_TRUSTED) {
1183 #if CONFIG_DYNAMIC_CODE_SIGNING
1184 		/* CODE SIGNING ENFORCEMENT - JIT support */
1185 		/* The special protection value VM_PROT_TRUSTED requests that we treat
1186 		 * this page as if it had a valid code signature.
1187 		 * If this is enabled, there MUST be a MAC policy implementing the
1188 		 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1189 		 * compromised because the check would always succeed and thusly any
1190 		 * process could sign dynamically. */
1191 		result = vm_map_sign(
1192 			user_map,
1193 			vm_map_trunc_page(user_addr,
1194 			vm_map_page_mask(user_map)),
1195 			vm_map_round_page(user_addr + user_size,
1196 			vm_map_page_mask(user_map)));
1197 		switch (result) {
1198 		case KERN_SUCCESS:
1199 			break;
1200 		case KERN_INVALID_ADDRESS:
1201 			/* UNIX SPEC: for an invalid address range, return ENOMEM */
1202 			return ENOMEM;
1203 		default:
1204 			return EINVAL;
1205 		}
1206 #else
1207 		return ENOTSUP;
1208 #endif
1209 	}
1210 	prot &= ~VM_PROT_TRUSTED;
1211 
1212 	result = mach_vm_protect(user_map, user_addr, user_size,
1213 	    FALSE, prot);
1214 	switch (result) {
1215 	case KERN_SUCCESS:
1216 		return 0;
1217 	case KERN_PROTECTION_FAILURE:
1218 		return EACCES;
1219 	case KERN_INVALID_ADDRESS:
1220 		/* UNIX SPEC: for an invalid address range, return ENOMEM */
1221 		return ENOMEM;
1222 	}
1223 	return EINVAL;
1224 }
1225 
1226 
1227 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1228 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1229 {
1230 	mach_vm_offset_t addr;
1231 	mach_vm_size_t size;
1232 	vm_inherit_t inherit;
1233 	vm_map_t        user_map;
1234 	kern_return_t   result;
1235 
1236 	AUDIT_ARG(addr, uap->addr);
1237 	AUDIT_ARG(len, uap->len);
1238 	AUDIT_ARG(value32, uap->inherit);
1239 
1240 	user_map = current_map();
1241 	addr = (mach_vm_offset_t)uap->addr;
1242 	size = (mach_vm_size_t)uap->len;
1243 	inherit = uap->inherit;
1244 	if (vm_map_range_overflows(user_map, addr, size)) {
1245 		return EINVAL;
1246 	}
1247 	result = mach_vm_inherit(user_map, addr, size,
1248 	    inherit);
1249 	switch (result) {
1250 	case KERN_SUCCESS:
1251 		return 0;
1252 	case KERN_PROTECTION_FAILURE:
1253 		return EACCES;
1254 	}
1255 	return EINVAL;
1256 }
1257 
1258 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1259 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1260 {
1261 	vm_map_t user_map;
1262 	mach_vm_offset_t start;
1263 	mach_vm_size_t size;
1264 	vm_behavior_t new_behavior;
1265 	kern_return_t   result;
1266 
1267 	/*
1268 	 * Since this routine is only advisory, we default to conservative
1269 	 * behavior.
1270 	 */
1271 	switch (uap->behav) {
1272 	case MADV_RANDOM:
1273 		new_behavior = VM_BEHAVIOR_RANDOM;
1274 		break;
1275 	case MADV_SEQUENTIAL:
1276 		new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1277 		break;
1278 	case MADV_NORMAL:
1279 		new_behavior = VM_BEHAVIOR_DEFAULT;
1280 		break;
1281 	case MADV_WILLNEED:
1282 		new_behavior = VM_BEHAVIOR_WILLNEED;
1283 		break;
1284 	case MADV_DONTNEED:
1285 		new_behavior = VM_BEHAVIOR_DONTNEED;
1286 		break;
1287 	case MADV_FREE:
1288 		new_behavior = VM_BEHAVIOR_FREE;
1289 		break;
1290 	case MADV_ZERO_WIRED_PAGES:
1291 		new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1292 		break;
1293 	case MADV_FREE_REUSABLE:
1294 		new_behavior = VM_BEHAVIOR_REUSABLE;
1295 		break;
1296 	case MADV_FREE_REUSE:
1297 		new_behavior = VM_BEHAVIOR_REUSE;
1298 		break;
1299 	case MADV_CAN_REUSE:
1300 		new_behavior = VM_BEHAVIOR_CAN_REUSE;
1301 		break;
1302 	case MADV_PAGEOUT:
1303 #if MACH_ASSERT
1304 		new_behavior = VM_BEHAVIOR_PAGEOUT;
1305 		break;
1306 #else /* MACH_ASSERT */
1307 		return ENOTSUP;
1308 #endif /* MACH_ASSERT */
1309 	case MADV_ZERO:
1310 		new_behavior = VM_BEHAVIOR_ZERO;
1311 		break;
1312 	default:
1313 		return EINVAL;
1314 	}
1315 
1316 	user_map = current_map();
1317 	start = (mach_vm_offset_t) uap->addr;
1318 	size = (mach_vm_size_t) uap->len;
1319 	if (vm_map_range_overflows(user_map, start, size)) {
1320 		return EINVAL;
1321 	}
1322 #if __arm64__
1323 	if (start == 0 &&
1324 	    size != 0 &&
1325 	    (uap->behav == MADV_FREE ||
1326 	    uap->behav == MADV_FREE_REUSABLE)) {
1327 		printf("** %s: %d[%s] "
1328 		    "failing madvise(0x%llx,0x%llx,%s)\n",
1329 		    __func__, proc_getpid(p), p->p_comm, start, size,
1330 		    ((uap->behav == MADV_FREE_REUSABLE)
1331 		    ? "MADV_FREE_REUSABLE"
1332 		    : "MADV_FREE"));
1333 		return EINVAL;
1334 	}
1335 #endif /* __arm64__ */
1336 
1337 	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1338 	switch (result) {
1339 	case KERN_SUCCESS:
1340 		return 0;
1341 	case KERN_INVALID_ADDRESS:
1342 		return EINVAL;
1343 	case KERN_NO_SPACE:
1344 		return ENOMEM;
1345 	case KERN_PROTECTION_FAILURE:
1346 		return EPERM;
1347 	case KERN_NO_ACCESS:
1348 		return ENOTSUP;
1349 	}
1350 
1351 	return EINVAL;
1352 }
1353 
1354 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1355 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1356 {
1357 	mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1358 	vm_map_t map = VM_MAP_NULL;
1359 	user_addr_t vec = 0;
1360 	int error = 0;
1361 	int64_t lastvecindex = 0;
1362 	int mincoreinfo = 0;
1363 	int pqueryinfo = 0;
1364 	uint64_t pqueryinfo_vec_size = 0;
1365 	vm_page_info_basic_t info = NULL;
1366 	mach_msg_type_number_t count = 0;
1367 	char *kernel_vec = NULL;
1368 	uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1369 	kern_return_t kr = KERN_SUCCESS;
1370 	int effective_page_shift, effective_page_size;
1371 
1372 	map = current_map();
1373 
1374 	/*
1375 	 * On systems with 4k kernel space and 16k user space, we will
1376 	 * use the kernel page size to report back the residency information.
1377 	 * This is for backwards compatibility since we already have
1378 	 * processes that depend on this behavior.
1379 	 */
1380 	if (vm_map_page_shift(map) < PAGE_SHIFT) {
1381 		effective_page_shift = vm_map_page_shift(map);
1382 		effective_page_size = vm_map_page_size(map);
1383 	} else {
1384 		effective_page_shift = PAGE_SHIFT;
1385 		effective_page_size = PAGE_SIZE;
1386 	}
1387 
1388 	/*
1389 	 * Make sure that the addresses presented are valid for user
1390 	 * mode.
1391 	 */
1392 	first_addr = addr = vm_map_trunc_page(uap->addr,
1393 	    vm_map_page_mask(map));
1394 	end = vm_map_round_page(uap->addr + uap->len,
1395 	    vm_map_page_mask(map));
1396 
1397 	if (end < addr) {
1398 		return EINVAL;
1399 	}
1400 
1401 	if (end == addr) {
1402 		return 0;
1403 	}
1404 
1405 	/*
1406 	 * We are going to loop through the whole 'req_vec_size' pages
1407 	 * range in chunks of 'cur_vec_size'.
1408 	 */
1409 
1410 	req_vec_size_pages = (end - addr) >> effective_page_shift;
1411 	cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1412 	size_t kernel_vec_size = cur_vec_size_pages;
1413 
1414 	kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1415 
1416 	if (kernel_vec == NULL) {
1417 		return ENOMEM;
1418 	}
1419 
1420 	/*
1421 	 * Address of byte vector
1422 	 */
1423 	vec = uap->vec;
1424 
1425 	pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1426 
1427 	info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1428 
1429 	if (info == NULL) {
1430 		kfree_data(kernel_vec, kernel_vec_size);
1431 		return ENOMEM;
1432 	}
1433 
1434 	while (addr < end) {
1435 		cur_end = addr + (cur_vec_size_pages * effective_page_size);
1436 
1437 		count =  VM_PAGE_INFO_BASIC_COUNT;
1438 		kr = vm_map_page_range_info_internal(map,
1439 		    addr,
1440 		    cur_end,
1441 		    effective_page_shift,
1442 		    VM_PAGE_INFO_BASIC,
1443 		    (vm_page_info_t) info,
1444 		    &count);
1445 
1446 		assert(kr == KERN_SUCCESS);
1447 
1448 		/*
1449 		 * Do this on a map entry basis so that if the pages are not
1450 		 * in the current processes address space, we can easily look
1451 		 * up the pages elsewhere.
1452 		 */
1453 		lastvecindex = -1;
1454 
1455 		for (; addr < cur_end; addr += effective_page_size) {
1456 			pqueryinfo = info[lastvecindex + 1].disposition;
1457 
1458 			mincoreinfo = 0;
1459 
1460 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1461 				mincoreinfo |= MINCORE_INCORE;
1462 			}
1463 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1464 				mincoreinfo |= MINCORE_REFERENCED;
1465 			}
1466 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1467 				mincoreinfo |= MINCORE_MODIFIED;
1468 			}
1469 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1470 				mincoreinfo |= MINCORE_PAGED_OUT;
1471 			}
1472 			if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1473 				mincoreinfo |= MINCORE_COPIED;
1474 			}
1475 			if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1476 				mincoreinfo |= MINCORE_ANONYMOUS;
1477 			}
1478 			/*
1479 			 * calculate index into user supplied byte vector
1480 			 */
1481 			vecindex = (addr - first_addr) >> effective_page_shift;
1482 			kernel_vec[vecindex] = (char)mincoreinfo;
1483 			lastvecindex = vecindex;
1484 		}
1485 
1486 
1487 		assert(vecindex == (cur_vec_size_pages - 1));
1488 
1489 		error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1490 
1491 		if (error) {
1492 			break;
1493 		}
1494 
1495 		/*
1496 		 * For the next chunk, we'll need:
1497 		 * - bump the location in the user buffer for our next disposition.
1498 		 * - new length
1499 		 * - starting address
1500 		 */
1501 		vec += cur_vec_size_pages * sizeof(char);
1502 		req_vec_size_pages = (end - addr) >> effective_page_shift;
1503 		cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1504 
1505 		first_addr = addr;
1506 	}
1507 
1508 	kfree_data(info, pqueryinfo_vec_size);
1509 	kfree_data(kernel_vec, kernel_vec_size);
1510 
1511 	if (error) {
1512 		return EFAULT;
1513 	}
1514 
1515 	return 0;
1516 }
1517 
1518 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1519 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1520 {
1521 	kern_return_t result;
1522 
1523 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1524 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1525 
1526 	/* have to call vm_map_wire directly to pass "I don't know" protections */
1527 	result = vm_map_wire_kernel(current_map(), uap->addr,
1528 	    vm_sanitize_compute_unsafe_end(uap->addr, uap->len),
1529 	    vm_sanitize_wrap_prot(VM_PROT_NONE), VM_KERN_MEMORY_MLOCK, TRUE);
1530 
1531 	switch (result) {
1532 	case KERN_SUCCESS:
1533 		return 0;
1534 	case KERN_INVALID_ARGUMENT:
1535 		return EINVAL;
1536 	case KERN_RESOURCE_SHORTAGE:
1537 		return EAGAIN;
1538 	case KERN_PROTECTION_FAILURE:
1539 		return EPERM;
1540 	default:
1541 		return ENOMEM;
1542 	}
1543 }
1544 
1545 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1546 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1547 {
1548 	kern_return_t result;
1549 
1550 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1551 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1552 
1553 	/* JMM - need to remove all wirings by spec - this just removes one */
1554 	result = vm_map_unwire(current_map(), uap->addr,
1555 	    vm_sanitize_compute_unsafe_end(uap->addr, uap->len), TRUE);
1556 
1557 	switch (result) {
1558 	case KERN_SUCCESS:
1559 		return 0;
1560 	case KERN_INVALID_ARGUMENT:
1561 		return EINVAL;
1562 	default:
1563 		return ENOMEM;
1564 	}
1565 }
1566 
1567 
1568 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1569 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1570 {
1571 	return ENOSYS;
1572 }
1573 
1574 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1575 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1576 {
1577 	return ENOSYS;
1578 }
1579 
1580 #if CONFIG_CODE_DECRYPTION
1581 static inline kern_return_t
mremap_encrypted_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1582 mremap_encrypted_sanitize(
1583 	vm_map_t                user_map,
1584 	vm_addr_struct_t        addr_u,
1585 	vm_size_struct_t        len_u,
1586 	mach_vm_offset_t       *user_addr,
1587 	mach_vm_offset_t       *user_end,
1588 	mach_vm_size_t         *user_size)
1589 {
1590 	return vm_sanitize_addr_size(addr_u, len_u,
1591 	           VM_SANITIZE_CALLER_MREMAP_ENCRYPTED, user_map,
1592 	           VM_SANITIZE_FLAGS_CHECK_ALIGNED_START | VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
1593 	           user_addr, user_end, user_size);
1594 }
1595 
1596 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1597 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1598 {
1599 	mach_vm_offset_t    user_addr, user_end;
1600 	mach_vm_size_t      user_size;
1601 	kern_return_t       result;
1602 	vm_map_t    user_map;
1603 	uint32_t    cryptid;
1604 	cpu_type_t  cputype;
1605 	cpu_subtype_t       cpusubtype;
1606 	pager_crypt_info_t  crypt_info;
1607 	const char * cryptname = 0;
1608 	char *vpath;
1609 	int len, ret;
1610 	struct proc_regioninfo_internal pinfo;
1611 	vnode_t vp;
1612 	uintptr_t vnodeaddr;
1613 	uint32_t vid;
1614 
1615 	AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1616 	AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1617 
1618 	user_map   = current_map();
1619 	cryptid    = uap->cryptid;
1620 	cputype    = uap->cputype;
1621 	cpusubtype = uap->cpusubtype;
1622 
1623 	/*
1624 	 * Sanitize any input parameters that are addr/size/protections
1625 	 */
1626 	result = mremap_encrypted_sanitize(user_map,
1627 	    uap->addr,
1628 	    uap->len,
1629 	    &user_addr,
1630 	    &user_end,
1631 	    &user_size);
1632 	if (__improbable(result != KERN_SUCCESS)) {
1633 		assert(vm_sanitize_get_kr(result));
1634 		return EINVAL;
1635 	}
1636 
1637 	switch (cryptid) {
1638 	case CRYPTID_NO_ENCRYPTION:
1639 		/* not encrypted, just an empty load command */
1640 		return 0;
1641 	case CRYPTID_APP_ENCRYPTION:
1642 	case CRYPTID_MODEL_ENCRYPTION:
1643 		cryptname = "com.apple.unfree";
1644 		break;
1645 	case 0x10:
1646 		/* some random cryptid that you could manually put into
1647 		 * your binary if you want NULL */
1648 		cryptname = "com.apple.null";
1649 		break;
1650 	default:
1651 		return EINVAL;
1652 	}
1653 
1654 	if (NULL == text_crypter_create) {
1655 		return ENOTSUP;
1656 	}
1657 
1658 	ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1659 	if (ret == 0 || !vnodeaddr) {
1660 		/* No really, this returns 0 if the memory address is not backed by a file */
1661 		return EINVAL;
1662 	}
1663 
1664 	vp = (vnode_t)vnodeaddr;
1665 	if ((vnode_getwithvid(vp, vid)) == 0) {
1666 		vpath = zalloc(ZV_NAMEI);
1667 
1668 		len = MAXPATHLEN;
1669 		ret = vn_getpath(vp, vpath, &len);
1670 		if (ret) {
1671 			zfree(ZV_NAMEI, vpath);
1672 			vnode_put(vp);
1673 			return ret;
1674 		}
1675 
1676 		vnode_put(vp);
1677 	} else {
1678 		return EINVAL;
1679 	}
1680 
1681 #if 0
1682 	kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1683 	    __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1684 #endif
1685 
1686 	if (user_size == 0) {
1687 		printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1688 		zfree(ZV_NAMEI, vpath);
1689 		return 0;
1690 	}
1691 
1692 	/* set up decrypter first */
1693 	crypt_file_data_t crypt_data = {
1694 		.filename = vpath,
1695 		.cputype = cputype,
1696 		.cpusubtype = cpusubtype,
1697 		.origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1698 	};
1699 	result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1700 #if VM_MAP_DEBUG_APPLE_PROTECT
1701 	if (vm_map_debug_apple_protect) {
1702 		printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1703 		    proc_getpid(p), p->p_comm,
1704 		    user_map,
1705 		    (uint64_t) user_addr,
1706 		    (uint64_t) (user_addr + user_size),
1707 		    __FUNCTION__, vpath, result);
1708 	}
1709 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1710 	zfree(ZV_NAMEI, vpath);
1711 
1712 	if (result) {
1713 		printf("%s: unable to create decrypter %s, kr=%d\n",
1714 		    __FUNCTION__, cryptname, result);
1715 		if (result == kIOReturnNotPrivileged) {
1716 			/* text encryption returned decryption failure */
1717 			return EPERM;
1718 		} else {
1719 			return ENOMEM;
1720 		}
1721 	}
1722 
1723 	/* now remap using the decrypter */
1724 	vm_object_offset_t crypto_backing_offset;
1725 	crypto_backing_offset = -1; /* i.e. use map entry's offset */
1726 	result = vm_map_apple_protected(user_map,
1727 	    user_addr,
1728 	    user_addr + user_size,
1729 	    crypto_backing_offset,
1730 	    &crypt_info,
1731 	    cryptid);
1732 	if (result) {
1733 		printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1734 	}
1735 
1736 	if (result) {
1737 		return EPERM;
1738 	}
1739 	return 0;
1740 }
1741 #endif /* CONFIG_CODE_DECRYPTION */
1742