1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117 #include <mach/vm_types_unsafe.h>
118
119 #include <machine/machine_routines.h>
120
121 #include <kern/cpu_number.h>
122 #include <kern/host.h>
123 #include <kern/task.h>
124 #include <kern/page_decrypt.h>
125
126 #include <IOKit/IOReturn.h>
127 #include <IOKit/IOBSD.h>
128
129 #include <vm/vm_kern_xnu.h>
130 #include <vm/vm_map_xnu.h>
131 #include <vm/vm_pager_xnu.h>
132 #include <vm/vm_sanitize_internal.h>
133
134 #if CONFIG_MACF
135 #include <security/mac_framework.h>
136 #endif
137 #include <os/overflow.h>
138
139 /*
140 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
141 * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
142 * compiled with an SDK from fall of 2020 or later?""
143 */
144 static bool
proc_2020_fall_os_sdk_or_later(void)145 proc_2020_fall_os_sdk_or_later(void)
146 {
147 const uint32_t proc_sdk_ver = proc_sdk(current_proc());
148
149 switch (proc_platform(current_proc())) {
150 case PLATFORM_MACOS:
151 return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
152 case PLATFORM_IOS:
153 case PLATFORM_IOSSIMULATOR:
154 case PLATFORM_MACCATALYST:
155 return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
156 case PLATFORM_BRIDGEOS:
157 return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
158 case PLATFORM_TVOS:
159 case PLATFORM_TVOSSIMULATOR:
160 return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
161 case PLATFORM_WATCHOS:
162 case PLATFORM_WATCHOSSIMULATOR:
163 return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
164 default:
165 /*
166 * tough call, but let's give new platforms the benefit of the doubt
167 * to avoid a re-occurence of rdar://89843927
168 */
169 return true;
170 }
171 }
172
173 static inline kern_return_t
mmap_sanitize(vm_map_t user_map,vm_prot_ut prot_u,vm_addr_struct_t pos_u,vm_size_struct_t len_u,vm_addr_struct_t addr_u,int flags,vm_prot_t * prot,vm_object_offset_t * file_pos,vm_object_offset_t * file_end,vm_map_size_t * file_size,vm_map_offset_t * user_addr,vm_map_offset_t * user_end,vm_map_size_t * user_size)174 mmap_sanitize(
175 vm_map_t user_map,
176 vm_prot_ut prot_u,
177 vm_addr_struct_t pos_u,
178 vm_size_struct_t len_u,
179 vm_addr_struct_t addr_u,
180 int flags,
181 vm_prot_t *prot,
182 vm_object_offset_t *file_pos,
183 vm_object_offset_t *file_end,
184 vm_map_size_t *file_size,
185 vm_map_offset_t *user_addr,
186 vm_map_offset_t *user_end,
187 vm_map_size_t *user_size)
188 {
189 kern_return_t kr;
190 vm_map_offset_t user_mask = vm_map_page_mask(user_map);
191 vm_sanitize_flags_t vm_sanitize_flags;
192
193 kr = vm_sanitize_prot_bsd(prot_u, VM_SANITIZE_CALLER_MMAP, prot);
194 *prot &= VM_PROT_ALL;
195 if (__improbable(kr != KERN_SUCCESS)) {
196 return kr;
197 }
198
199 /*
200 * Check file_pos doesn't overflow with PAGE_MASK since VM objects use
201 * this page mask internally, and it can be wider than the user_map's.
202 */
203 if (flags & MAP_UNIX03) {
204 vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS;
205 } else {
206 vm_sanitize_flags = VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH;
207 }
208
209 kr = vm_sanitize_addr_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP, PAGE_MASK,
210 vm_sanitize_flags | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
211 file_pos, file_end, file_size);
212 if (__improbable(kr != KERN_SUCCESS)) {
213 return kr;
214 }
215
216 /*
217 * Check that file_pos is page aligned for the user page size when
218 * UNIX03 compliance is requested.
219 * The user page size may be different from the kernel page size we
220 * use to check for overflows in the sanitizer call above).
221 */
222 if ((flags & MAP_UNIX03) && (*file_pos & user_mask)) {
223 return KERN_INVALID_ARGUMENT;
224 }
225
226 if (flags & MAP_FIXED) {
227 kr = vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MMAP,
228 user_map,
229 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
230 user_addr, user_end, user_size);
231 if (__improbable(kr != KERN_SUCCESS)) {
232 return kr;
233 }
234
235 /*
236 * Further validation since we allowed a misaligned user_addr
237 * for fixed mappings.
238 *
239 * The specified address must have the same remainder
240 * as the file offset taken modulo PAGE_SIZE, so it
241 * should be aligned after adjustment by (file_pos & user_mask).
242 */
243 if (!VM_SANITIZE_UNSAFE_IS_EQUAL(addr_u, *user_addr + (*file_pos & user_mask))) {
244 return KERN_INVALID_ARGUMENT;
245 }
246 } else {
247 /*
248 * For "anywhere" mappings, the address is only a hint,
249 * mach_vm_map_kernel() will fail with KERN_NO_SPACE
250 * if user_addr + user_size overflows,
251 * and mmap will start scanning again.
252 *
253 * Unlike Mach VM APIs, the hint is taken as a strict
254 * "start" which is why we round the sanitized address up,
255 * rather than truncate.
256 */
257 *user_addr = vm_sanitize_addr(user_map,
258 vm_sanitize_compute_unsafe_end(addr_u, user_mask));
259 kr = vm_sanitize_size(pos_u, len_u, VM_SANITIZE_CALLER_MMAP,
260 user_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
261 user_size);
262 if (__improbable(kr != KERN_SUCCESS)) {
263 return kr;
264 }
265 }
266
267 return KERN_SUCCESS;
268 }
269
270 /*
271 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
272 * XXX usage is PROT_* from an interface perspective. Thus the values of
273 * XXX VM_PROT_* and PROT_* need to correspond.
274 */
275 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)276 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
277 {
278 /*
279 * Map in special device (must be SHARED) or file
280 */
281 struct fileproc *fp;
282 struct vnode *vp = NULLVP;
283 int flags;
284 int prot;
285 int err = 0;
286 vm_map_t user_map;
287 kern_return_t result;
288 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
289 boolean_t docow;
290 vm_prot_t maxprot;
291 void *handle;
292 memory_object_t pager = MEMORY_OBJECT_NULL;
293 memory_object_control_t control;
294 int mapanon = 0;
295 int fpref = 0;
296 int error = 0;
297 int fd = uap->fd;
298 int num_retries = 0;
299 kern_return_t kr;
300 /* page-aligned "user_map" quantities */
301 vm_map_offset_t user_addr, user_end, user_mask;
302 vm_map_size_t user_size;
303 /* unaligned "file" quantities */
304 vm_object_offset_t file_pos, file_end;
305 vm_map_size_t file_size;
306
307 /*
308 * Note that for UNIX03 conformance, there is additional parameter checking for
309 * mmap() system call in libsyscall prior to entering the kernel. The sanity
310 * checks and argument validation done in this function are not the only places
311 * one can get returned errnos.
312 */
313
314 user_map = current_map();
315 flags = uap->flags;
316 user_mask = vm_map_page_mask(user_map);
317
318 AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
319 AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
320 AUDIT_ARG(fd, uap->fd);
321
322 /*
323 * Sanitize any input parameters that are addr/size/protections
324 */
325 kr = mmap_sanitize(user_map,
326 uap->prot,
327 uap->pos,
328 uap->len,
329 uap->addr,
330 flags,
331 &prot,
332 &file_pos,
333 &file_end,
334 &file_size,
335 &user_addr,
336 &user_end,
337 &user_size);
338 if (__improbable(kr != KERN_SUCCESS)) {
339 assert(vm_sanitize_get_kr(kr));
340 return EINVAL;
341 }
342
343 #if 3777787
344 /*
345 * Since the hardware currently does not support writing without
346 * read-before-write, or execution-without-read, if the request is
347 * for write or execute access, we must imply read access as well;
348 * otherwise programs expecting this to work will fail to operate.
349 */
350 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
351 prot |= VM_PROT_READ;
352 }
353 #endif /* radar 3777787 */
354
355 /*
356 * verify no unknown flags are passed in, and if any are,
357 * fail out early to make sure the logic below never has to deal
358 * with invalid flag values. only do so for processes compiled
359 * with Fall 2020 or later SDK, which is where we drew this
360 * line and documented it as such.
361 */
362 if (flags & ~(MAP_SHARED |
363 MAP_PRIVATE |
364 MAP_COPY |
365 MAP_FIXED |
366 MAP_RENAME |
367 MAP_NORESERVE |
368 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
369 MAP_NOEXTEND |
370 MAP_HASSEMAPHORE |
371 MAP_NOCACHE |
372 MAP_JIT |
373 MAP_TPRO |
374 MAP_FILE |
375 MAP_ANON |
376 MAP_RESILIENT_CODESIGN |
377 MAP_RESILIENT_MEDIA |
378 #if XNU_TARGET_OS_OSX
379 MAP_32BIT |
380 #endif
381 MAP_TRANSLATED_ALLOW_EXECUTE |
382 MAP_UNIX03)) {
383 if (proc_2020_fall_os_sdk_or_later()) {
384 return EINVAL;
385 }
386 }
387
388
389 if (flags & MAP_UNIX03) {
390 /*
391 * Enforce UNIX03 compliance.
392 */
393 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
394 /* need either MAP_PRIVATE or MAP_SHARED */
395 return EINVAL;
396 }
397 }
398
399
400 if (flags & MAP_JIT) {
401 if ((flags & MAP_FIXED) ||
402 (flags & MAP_SHARED) ||
403 !(flags & MAP_ANON) ||
404 (flags & MAP_RESILIENT_CODESIGN) ||
405 (flags & MAP_RESILIENT_MEDIA) ||
406 (flags & MAP_TPRO)) {
407 return EINVAL;
408 }
409 }
410
411 if ((flags & MAP_RESILIENT_CODESIGN) ||
412 (flags & MAP_RESILIENT_MEDIA)) {
413 if ((flags & MAP_ANON) ||
414 (flags & MAP_JIT) ||
415 (flags & MAP_TPRO)) {
416 return EINVAL;
417 }
418 }
419 if (flags & MAP_RESILIENT_CODESIGN) {
420 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
421 if (prot & reject_prot) {
422 /*
423 * Quick sanity check. maxprot is calculated below and
424 * we will test it again.
425 */
426 return EPERM;
427 }
428 }
429 if (flags & MAP_SHARED) {
430 /*
431 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
432 * there is no place to inject zero-filled pages without
433 * actually adding them to the file.
434 * Since we didn't reject that combination before, there might
435 * already be callers using it and getting a valid MAP_SHARED
436 * mapping but without the resilience.
437 * For backwards compatibility's sake, let's keep ignoring
438 * MAP_RESILIENT_MEDIA in that case.
439 */
440 flags &= ~MAP_RESILIENT_MEDIA;
441 }
442 if (flags & MAP_RESILIENT_MEDIA) {
443 if ((flags & MAP_ANON) ||
444 (flags & MAP_SHARED)) {
445 return EINVAL;
446 }
447 }
448 if (flags & MAP_TPRO) {
449 /*
450 * MAP_TPRO without VM_PROT_WRITE is not valid here because
451 * the TPRO mapping is handled at the PMAP layer with implicit RW
452 * protections.
453 *
454 * This would enable bypassing of file-based protections, i.e.
455 * a file open/mapped as read-only could be written to.
456 */
457 if ((prot & VM_PROT_EXECUTE) ||
458 !(prot & VM_PROT_WRITE)) {
459 return EPERM;
460 }
461 }
462
463 /* Entitlement check against code signing monitor */
464 if ((flags & MAP_JIT) && (vm_map_csm_allow_jit(user_map) != KERN_SUCCESS)) {
465 printf("[%d] code signing monitor denies JIT mapping\n", proc_pid(p));
466 return EPERM;
467 }
468
469 if (flags & MAP_ANON) {
470 maxprot = VM_PROT_ALL;
471 #if CONFIG_MACF
472 /*
473 * Entitlement check.
474 */
475 error = mac_proc_check_map_anon(p, current_cached_proc_cred(p),
476 user_addr, user_size, prot, flags, &maxprot);
477 if (error) {
478 return EINVAL;
479 }
480 #endif /* MAC */
481
482 /*
483 * Mapping blank space is trivial. Use positive fds as the alias
484 * value for memory tracking.
485 */
486 if (fd != -1) {
487 /*
488 * Use "fd" to pass (some) Mach VM allocation flags,
489 * (see the VM_FLAGS_* definitions).
490 */
491 int vm_flags = fd & (VM_FLAGS_ALIAS_MASK |
492 VM_FLAGS_SUPERPAGE_MASK |
493 VM_FLAGS_PURGABLE |
494 VM_FLAGS_4GB_CHUNK);
495
496 if (vm_flags != fd) {
497 /* reject if there are any extra flags */
498 return EINVAL;
499 }
500
501 /*
502 * vm_map_kernel_flags_set_vmflags() will assume that
503 * the full set of VM flags are passed, which is
504 * problematic for FIXED/ANYWHERE.
505 *
506 * The block handling MAP_FIXED below will do the same
507 * thing again which is fine because it's idempotent.
508 */
509 if (flags & MAP_FIXED) {
510 vm_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
511 } else {
512 vm_flags |= VM_FLAGS_ANYWHERE;
513 }
514 vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags);
515 }
516
517 #if CONFIG_MAP_RANGES
518 /*
519 * if the client specified a tag, let the system policy apply.
520 *
521 * otherwise, force the heap range.
522 */
523 if (vmk_flags.vm_tag) {
524 vm_map_kernel_flags_update_range_id(&vmk_flags, user_map, user_size);
525 } else {
526 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
527 }
528 #endif /* CONFIG_MAP_RANGES */
529
530 handle = NULL;
531 file_pos = 0;
532 mapanon = 1;
533 } else {
534 struct vnode_attr va;
535 vfs_context_t ctx = vfs_context_current();
536
537 if (flags & MAP_JIT) {
538 return EINVAL;
539 }
540
541 /*
542 * Mapping file, get fp for validation. Obtain vnode and make
543 * sure it is of appropriate type.
544 */
545 err = fp_lookup(p, fd, &fp, 0);
546 if (err) {
547 return err;
548 }
549 fpref = 1;
550 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
551 case DTYPE_PSXSHM:
552 error = pshm_mmap(p, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr),
553 user_size, prot, flags, fp,
554 vm_map_trunc_page(file_pos, user_mask),
555 file_pos & user_mask, retval);
556 goto bad;
557 case DTYPE_VNODE:
558 break;
559 default:
560 error = EINVAL;
561 goto bad;
562 }
563 vp = (struct vnode *)fp_get_data(fp);
564 error = vnode_getwithref(vp);
565 if (error != 0) {
566 goto bad;
567 }
568
569 if (vp->v_type != VREG && vp->v_type != VCHR) {
570 (void)vnode_put(vp);
571 error = EINVAL;
572 goto bad;
573 }
574
575 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
576
577 /*
578 * POSIX: mmap needs to update access time for mapped files
579 */
580 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
581 VATTR_INIT(&va);
582 nanotime(&va.va_access_time);
583 VATTR_SET_ACTIVE(&va, va_access_time);
584 vnode_setattr(vp, &va, ctx);
585 }
586
587 /*
588 * XXX hack to handle use of /dev/zero to map anon memory (ala
589 * SunOS).
590 */
591 if (vp->v_type == VCHR || vp->v_type == VSTR) {
592 (void)vnode_put(vp);
593 error = ENODEV;
594 goto bad;
595 } else {
596 /*
597 * Ensure that file and memory protections are
598 * compatible. Note that we only worry about
599 * writability if mapping is shared; in this case,
600 * current and max prot are dictated by the open file.
601 * XXX use the vnode instead? Problem is: what
602 * credentials do we use for determination? What if
603 * proc does a setuid?
604 */
605 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
606 if (fp->fp_glob->fg_flag & FREAD) {
607 maxprot |= VM_PROT_READ;
608 } else if (prot & PROT_READ) {
609 (void)vnode_put(vp);
610 error = EACCES;
611 goto bad;
612 }
613 /*
614 * If we are sharing potential changes (either via
615 * MAP_SHARED or via the implicit sharing of character
616 * device mappings), and we are trying to get write
617 * permission although we opened it without asking
618 * for it, bail out.
619 */
620
621 if ((flags & MAP_SHARED) != 0) {
622 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
623 /*
624 * Do not allow writable mappings of
625 * swap files (see vm_swapfile_pager.c).
626 */
627 !vnode_isswap(vp)) {
628 /*
629 * check for write access
630 *
631 * Note that we already made this check when granting FWRITE
632 * against the file, so it seems redundant here.
633 */
634 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
635
636 /* if not granted for any reason, but we wanted it, bad */
637 if ((prot & PROT_WRITE) && (error != 0)) {
638 vnode_put(vp);
639 goto bad;
640 }
641
642 /* if writable, remember */
643 if (error == 0) {
644 maxprot |= VM_PROT_WRITE;
645 }
646 } else if ((prot & PROT_WRITE) != 0) {
647 (void)vnode_put(vp);
648 error = EACCES;
649 goto bad;
650 }
651 } else {
652 maxprot |= VM_PROT_WRITE;
653 }
654
655 handle = (void *)vp;
656 #if CONFIG_MACF
657 error = mac_file_check_mmap(vfs_context_ucred(ctx),
658 fp->fp_glob, prot, flags, file_pos, &maxprot);
659 if (error) {
660 (void)vnode_put(vp);
661 goto bad;
662 }
663 #endif /* MAC */
664 /*
665 * Consult the file system to determine if this
666 * particular file object can be mapped.
667 *
668 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
669 * then we don't check for writeability on the file
670 * object, because it will only ever see reads.
671 */
672 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
673 (prot & ~PROT_WRITE) : prot, ctx);
674 if (error) {
675 (void)vnode_put(vp);
676 goto bad;
677 }
678 }
679
680 /*
681 * No copy-on-read for mmap() mappings themselves.
682 */
683 vmk_flags.vmkf_no_copy_on_read = 1;
684 #if CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX
685 /* force file ranges on !macOS */
686 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
687 #if XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT
688 /*
689 * Put allocations on iOS with EXTENDED_USER_VA_SUPPORT
690 * in the large file range, if the process has the "extra jumbo" entitlement.
691 * Otherwise, place allocation into the heap range.
692 */
693 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_LARGE_FILE;
694 #endif /* XNU_TARGET_OS_IOS && EXTENDED_USER_VA_SUPPORT */
695 #endif /* CONFIG_MAP_RANGES && !XNU_PLATFORM_MacOSX */
696 }
697
698 if (user_size == 0) {
699 if (!mapanon) {
700 (void)vnode_put(vp);
701 }
702 error = 0;
703 goto bad;
704 }
705
706 if (flags & MAP_FIXED) {
707 /*
708 * mmap(MAP_FIXED) will replace any existing mappings in the
709 * specified range, if the new mapping is successful.
710 * If we just deallocate the specified address range here,
711 * another thread might jump in and allocate memory in that
712 * range before we get a chance to establish the new mapping,
713 * and we won't have a chance to restore the old mappings.
714 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
715 * has to deallocate the existing mappings and establish the
716 * new ones atomically.
717 */
718 vmk_flags.vmf_fixed = true;
719 vmk_flags.vmf_overwrite = true;
720 }
721
722 if (flags & MAP_NOCACHE) {
723 vmk_flags.vmf_no_cache = true;
724 }
725
726 if (flags & MAP_JIT) {
727 vmk_flags.vmkf_map_jit = TRUE;
728 }
729
730 if (flags & MAP_TPRO) {
731 vmk_flags.vmf_tpro = true;
732 }
733
734 #if CONFIG_ROSETTA
735 if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
736 if (!proc_is_translated(p)) {
737 if (!mapanon) {
738 (void)vnode_put(vp);
739 }
740 error = EINVAL;
741 goto bad;
742 }
743 vmk_flags.vmkf_translated_allow_execute = TRUE;
744 }
745 #endif
746
747 if (flags & MAP_RESILIENT_CODESIGN) {
748 vmk_flags.vmf_resilient_codesign = true;
749 }
750 if (flags & MAP_RESILIENT_MEDIA) {
751 vmk_flags.vmf_resilient_media = true;
752 }
753
754 #if XNU_TARGET_OS_OSX
755 /* macOS-specific MAP_32BIT flag handling */
756 if (flags & MAP_32BIT) {
757 vmk_flags.vmkf_32bit_map_va = TRUE;
758 }
759 #endif
760
761 /*
762 * Lookup/allocate object.
763 */
764 if (handle == NULL) {
765 control = NULL;
766 #ifdef notyet
767 /* Hmm .. */
768 #if defined(VM_PROT_READ_IS_EXEC)
769 if (prot & VM_PROT_READ) {
770 prot |= VM_PROT_EXECUTE;
771 }
772 if (maxprot & VM_PROT_READ) {
773 maxprot |= VM_PROT_EXECUTE;
774 }
775 #endif
776 #endif
777
778 #if 3777787
779 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
780 prot |= VM_PROT_READ;
781 }
782 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
783 maxprot |= VM_PROT_READ;
784 }
785 #endif /* radar 3777787 */
786 map_anon_retry:
787
788 result = mach_vm_map_kernel(user_map,
789 vm_sanitize_wrap_addr_ref(&user_addr), user_size,
790 0, vmk_flags,
791 IPC_PORT_NULL, 0, FALSE,
792 prot, maxprot,
793 (flags & MAP_SHARED) ?
794 VM_INHERIT_SHARE :
795 VM_INHERIT_DEFAULT);
796
797 /* If a non-binding address was specified for this anonymous
798 * mapping, retry the mapping with a zero base
799 * in the event the mapping operation failed due to
800 * lack of space between the address and the map's maximum.
801 */
802 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
803 user_addr = vm_map_page_size(user_map);
804 goto map_anon_retry;
805 }
806 } else {
807 if (vnode_isswap(vp)) {
808 /*
809 * Map swap files with a special pager
810 * that returns obfuscated contents.
811 */
812 control = NULL;
813 pager = swapfile_pager_setup(vp);
814 if (pager != MEMORY_OBJECT_NULL) {
815 control = swapfile_pager_control(pager);
816 }
817 } else {
818 control = ubc_getobject(vp, UBC_FLAGS_NONE);
819 }
820
821 if (control == NULL) {
822 (void)vnode_put(vp);
823 error = ENOMEM;
824 goto bad;
825 }
826
827 #if FBDP_DEBUG_OBJECT_NO_PAGER
828 //#define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/2022a.1.1/icutz/"
829 #define FBDP_PATH_NAME1 "/private/var/db/timezone/tz/202"
830 #define FBDP_FILE_NAME1 "icutz44l.dat"
831 #define FBDP_PATH_NAME2 "/private/var/mobile/Containers/Data/InternalDaemon/"
832 #define FBDP_FILE_NAME_START2 "com.apple.LaunchServices-"
833 #define FBDP_FILE_NAME_END2 "-v2.csstore"
834 if (!strncmp(vp->v_name, FBDP_FILE_NAME1, strlen(FBDP_FILE_NAME1))) {
835 char *path;
836 int len;
837 bool already_tracked;
838 len = MAXPATHLEN;
839 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
840 vn_getpath(vp, path, &len);
841 if (!strncmp(path, FBDP_PATH_NAME1, strlen(FBDP_PATH_NAME1))) {
842 if (memory_object_mark_as_tracked(control,
843 true,
844 &already_tracked) == KERN_SUCCESS &&
845 !already_tracked) {
846 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
847 }
848 }
849 zfree(ZV_NAMEI, path);
850 } else if (!strncmp(vp->v_name, FBDP_FILE_NAME_START2, strlen(FBDP_FILE_NAME_START2)) &&
851 strlen(vp->v_name) > strlen(FBDP_FILE_NAME_START2) + strlen(FBDP_FILE_NAME_END2) &&
852 !strncmp(vp->v_name + strlen(vp->v_name) - strlen(FBDP_FILE_NAME_END2),
853 FBDP_FILE_NAME_END2,
854 strlen(FBDP_FILE_NAME_END2))) {
855 char *path;
856 int len;
857 bool already_tracked;
858 len = MAXPATHLEN;
859 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
860 vn_getpath(vp, path, &len);
861 if (!strncmp(path, FBDP_PATH_NAME2, strlen(FBDP_PATH_NAME2))) {
862 if (memory_object_mark_as_tracked(control,
863 true,
864 &already_tracked) == KERN_SUCCESS &&
865 !already_tracked) {
866 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as tracked\n", __FUNCTION__, __LINE__, vp, path, control);
867 }
868 }
869 zfree(ZV_NAMEI, path);
870 }
871 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
872
873 /*
874 * Set credentials:
875 * FIXME: if we're writing the file we need a way to
876 * ensure that someone doesn't replace our R/W creds
877 * with ones that only work for read.
878 */
879
880 ubc_setthreadcred(vp, p, current_thread());
881 docow = FALSE;
882 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
883 docow = TRUE;
884 }
885
886 #ifdef notyet
887 /* Hmm .. */
888 #if defined(VM_PROT_READ_IS_EXEC)
889 if (prot & VM_PROT_READ) {
890 prot |= VM_PROT_EXECUTE;
891 }
892 if (maxprot & VM_PROT_READ) {
893 maxprot |= VM_PROT_EXECUTE;
894 }
895 #endif
896 #endif /* notyet */
897
898 #if 3777787
899 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
900 prot |= VM_PROT_READ;
901 }
902 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
903 maxprot |= VM_PROT_READ;
904 }
905 #endif /* radar 3777787 */
906
907 map_file_retry:
908 if (flags & MAP_RESILIENT_CODESIGN) {
909 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
910 if (prot & reject_prot) {
911 /*
912 * Would like to use (prot | maxprot) here
913 * but the assignment of VM_PROT_EXECUTE
914 * to maxprot above would always fail the test.
915 *
916 * Skipping the check is ok, however, because we
917 * restrict maxprot to prot just below in this
918 * block.
919 */
920 assert(!mapanon);
921 vnode_put(vp);
922 error = EPERM;
923 goto bad;
924 }
925 /* strictly limit access to "prot" */
926 maxprot &= prot;
927 }
928
929 result = vm_map_enter_mem_object_control(user_map,
930 vm_sanitize_wrap_addr_ref(&user_addr), user_size,
931 0, vmk_flags,
932 control, vm_map_trunc_page(file_pos, user_mask),
933 docow, prot, maxprot,
934 (flags & MAP_SHARED) ?
935 VM_INHERIT_SHARE :
936 VM_INHERIT_DEFAULT);
937
938 /* If a non-binding address was specified for this file backed
939 * mapping, retry the mapping with a zero base
940 * in the event the mapping operation failed due to
941 * lack of space between the address and the map's maximum.
942 */
943 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
944 user_addr = vm_map_page_size(user_map);
945 goto map_file_retry;
946 }
947 }
948
949 if (!mapanon) {
950 (void)vnode_put(vp);
951 }
952
953 switch (result) {
954 case KERN_SUCCESS:
955 *retval = user_addr + (file_pos & user_mask);
956 error = 0;
957 break;
958 case KERN_INVALID_ADDRESS:
959 case KERN_NO_SPACE:
960 error = ENOMEM;
961 break;
962 case KERN_PROTECTION_FAILURE:
963 error = EACCES;
964 break;
965 default:
966 error = EINVAL;
967 break;
968 }
969 bad:
970 if (pager != MEMORY_OBJECT_NULL) {
971 /*
972 * Release the reference on the pager.
973 * If the mapping was successful, it now holds
974 * an extra reference.
975 */
976 memory_object_deallocate(pager);
977 }
978 if (fpref) {
979 fp_drop(p, fd, fp, 0);
980 }
981
982 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
983 #if XNU_TARGET_OS_OSX
984 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
985 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
986 #endif /* XNU_TARGET_OS_OSX */
987 return error;
988 }
989
990 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)991 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
992 {
993 __pthread_testcancel(1);
994 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
995 }
996
997 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)998 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
999 {
1000 mach_vm_offset_t addr;
1001 mach_vm_size_t size;
1002 int flags;
1003 vm_map_t user_map;
1004 int rv;
1005 vm_sync_t sync_flags = 0;
1006
1007 user_map = current_map();
1008 addr = (mach_vm_offset_t) uap->addr;
1009 size = (mach_vm_size_t) uap->len;
1010 #if XNU_TARGET_OS_OSX
1011 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
1012 #endif /* XNU_TARGET_OS_OSX */
1013 if (vm_map_range_overflows(user_map, addr, size)) {
1014 return EINVAL;
1015 }
1016 if (addr & vm_map_page_mask(user_map)) {
1017 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1018 return EINVAL;
1019 }
1020 if (size == 0) {
1021 /*
1022 * We cannot support this properly without maintaining
1023 * list all mmaps done. Cannot use vm_map_entry as they could be
1024 * split or coalesced by indepenedant actions. So instead of
1025 * inaccurate results, lets just return error as invalid size
1026 * specified
1027 */
1028 return EINVAL; /* XXX breaks posix apps */
1029 }
1030
1031 flags = uap->flags;
1032 /* disallow contradictory flags */
1033 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
1034 return EINVAL;
1035 }
1036
1037 if (flags & MS_KILLPAGES) {
1038 sync_flags |= VM_SYNC_KILLPAGES;
1039 }
1040 if (flags & MS_DEACTIVATE) {
1041 sync_flags |= VM_SYNC_DEACTIVATE;
1042 }
1043 if (flags & MS_INVALIDATE) {
1044 sync_flags |= VM_SYNC_INVALIDATE;
1045 }
1046
1047 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
1048 if (flags & MS_ASYNC) {
1049 sync_flags |= VM_SYNC_ASYNCHRONOUS;
1050 } else {
1051 sync_flags |= VM_SYNC_SYNCHRONOUS;
1052 }
1053 }
1054
1055 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
1056
1057 rv = mach_vm_msync(user_map, addr, size, sync_flags);
1058
1059 switch (rv) {
1060 case KERN_SUCCESS:
1061 break;
1062 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
1063 return ENOMEM;
1064 case KERN_FAILURE:
1065 return EIO;
1066 default:
1067 return EINVAL;
1068 }
1069 return 0;
1070 }
1071
1072 static inline kern_return_t
munmap_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1073 munmap_sanitize(
1074 vm_map_t user_map,
1075 vm_addr_struct_t addr_u,
1076 vm_size_struct_t len_u,
1077 mach_vm_offset_t *user_addr,
1078 mach_vm_offset_t *user_end,
1079 mach_vm_size_t *user_size)
1080 {
1081 return vm_sanitize_addr_size(addr_u, len_u, VM_SANITIZE_CALLER_MUNMAP,
1082 user_map,
1083 VM_SANITIZE_FLAGS_CHECK_ALIGNED_START | VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS,
1084 user_addr, user_end, user_size);
1085 }
1086
1087 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)1088 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
1089 {
1090 mach_vm_offset_t user_addr, user_end;
1091 mach_vm_size_t user_size;
1092 kern_return_t result;
1093 vm_map_t user_map;
1094
1095 user_map = current_map();
1096
1097 AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1098 AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1099
1100 /*
1101 * Sanitize any input parameters that are addr/size/protections
1102 */
1103 result = munmap_sanitize(user_map,
1104 uap->addr,
1105 uap->len,
1106 &user_addr,
1107 &user_end,
1108 &user_size);
1109 if (__improbable(result != KERN_SUCCESS)) {
1110 assert(vm_sanitize_get_kr(result) ==
1111 KERN_INVALID_ARGUMENT);
1112 return EINVAL;
1113 }
1114 if (mach_vm_deallocate(user_map, user_addr, user_size)) {
1115 return EINVAL;
1116 }
1117 return 0;
1118 }
1119
1120 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1121 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1122 {
1123 vm_prot_t prot;
1124 mach_vm_offset_t user_addr;
1125 mach_vm_size_t user_size;
1126 kern_return_t result;
1127 vm_map_t user_map;
1128 #if CONFIG_MACF
1129 int error;
1130 #endif
1131
1132 AUDIT_ARG(addr, uap->addr);
1133 AUDIT_ARG(len, uap->len);
1134 AUDIT_ARG(value32, uap->prot);
1135
1136 user_map = current_map();
1137 user_addr = (mach_vm_offset_t) uap->addr;
1138 user_size = (mach_vm_size_t) uap->len;
1139 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1140
1141 if (vm_map_range_overflows(user_map, user_addr, user_size)) {
1142 return EINVAL;
1143 }
1144 if (user_addr & vm_map_page_mask(user_map)) {
1145 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1146 return EINVAL;
1147 }
1148
1149 #ifdef notyet
1150 /* Hmm .. */
1151 #if defined(VM_PROT_READ_IS_EXEC)
1152 if (prot & VM_PROT_READ) {
1153 prot |= VM_PROT_EXECUTE;
1154 }
1155 #endif
1156 #endif /* notyet */
1157
1158 #if 3936456
1159 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1160 prot |= VM_PROT_READ;
1161 }
1162 #endif /* 3936456 */
1163
1164 #if CONFIG_MACF
1165 /*
1166 * The MAC check for mprotect is of limited use for 2 reasons:
1167 * Without mmap revocation, the caller could have asked for the max
1168 * protections initially instead of a reduced set, so a mprotect
1169 * check would offer no new security.
1170 * It is not possible to extract the vnode from the pager object(s)
1171 * of the target memory range.
1172 * However, the MAC check may be used to prevent a process from,
1173 * e.g., making the stack executable.
1174 */
1175 error = mac_proc_check_mprotect(p, user_addr,
1176 user_size, prot);
1177 if (error) {
1178 return error;
1179 }
1180 #endif
1181
1182 if (prot & VM_PROT_TRUSTED) {
1183 #if CONFIG_DYNAMIC_CODE_SIGNING
1184 /* CODE SIGNING ENFORCEMENT - JIT support */
1185 /* The special protection value VM_PROT_TRUSTED requests that we treat
1186 * this page as if it had a valid code signature.
1187 * If this is enabled, there MUST be a MAC policy implementing the
1188 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1189 * compromised because the check would always succeed and thusly any
1190 * process could sign dynamically. */
1191 result = vm_map_sign(
1192 user_map,
1193 vm_map_trunc_page(user_addr,
1194 vm_map_page_mask(user_map)),
1195 vm_map_round_page(user_addr + user_size,
1196 vm_map_page_mask(user_map)));
1197 switch (result) {
1198 case KERN_SUCCESS:
1199 break;
1200 case KERN_INVALID_ADDRESS:
1201 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1202 return ENOMEM;
1203 default:
1204 return EINVAL;
1205 }
1206 #else
1207 return ENOTSUP;
1208 #endif
1209 }
1210 prot &= ~VM_PROT_TRUSTED;
1211
1212 result = mach_vm_protect(user_map, user_addr, user_size,
1213 FALSE, prot);
1214 switch (result) {
1215 case KERN_SUCCESS:
1216 return 0;
1217 case KERN_PROTECTION_FAILURE:
1218 return EACCES;
1219 case KERN_INVALID_ADDRESS:
1220 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1221 return ENOMEM;
1222 }
1223 return EINVAL;
1224 }
1225
1226
1227 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1228 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1229 {
1230 mach_vm_offset_t addr;
1231 mach_vm_size_t size;
1232 vm_inherit_t inherit;
1233 vm_map_t user_map;
1234 kern_return_t result;
1235
1236 AUDIT_ARG(addr, uap->addr);
1237 AUDIT_ARG(len, uap->len);
1238 AUDIT_ARG(value32, uap->inherit);
1239
1240 user_map = current_map();
1241 addr = (mach_vm_offset_t)uap->addr;
1242 size = (mach_vm_size_t)uap->len;
1243 inherit = uap->inherit;
1244 if (vm_map_range_overflows(user_map, addr, size)) {
1245 return EINVAL;
1246 }
1247 result = mach_vm_inherit(user_map, addr, size,
1248 inherit);
1249 switch (result) {
1250 case KERN_SUCCESS:
1251 return 0;
1252 case KERN_PROTECTION_FAILURE:
1253 return EACCES;
1254 }
1255 return EINVAL;
1256 }
1257
1258 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1259 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1260 {
1261 vm_map_t user_map;
1262 mach_vm_offset_t start;
1263 mach_vm_size_t size;
1264 vm_behavior_t new_behavior;
1265 kern_return_t result;
1266
1267 /*
1268 * Since this routine is only advisory, we default to conservative
1269 * behavior.
1270 */
1271 switch (uap->behav) {
1272 case MADV_RANDOM:
1273 new_behavior = VM_BEHAVIOR_RANDOM;
1274 break;
1275 case MADV_SEQUENTIAL:
1276 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1277 break;
1278 case MADV_NORMAL:
1279 new_behavior = VM_BEHAVIOR_DEFAULT;
1280 break;
1281 case MADV_WILLNEED:
1282 new_behavior = VM_BEHAVIOR_WILLNEED;
1283 break;
1284 case MADV_DONTNEED:
1285 new_behavior = VM_BEHAVIOR_DONTNEED;
1286 break;
1287 case MADV_FREE:
1288 new_behavior = VM_BEHAVIOR_FREE;
1289 break;
1290 case MADV_ZERO_WIRED_PAGES:
1291 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1292 break;
1293 case MADV_FREE_REUSABLE:
1294 new_behavior = VM_BEHAVIOR_REUSABLE;
1295 break;
1296 case MADV_FREE_REUSE:
1297 new_behavior = VM_BEHAVIOR_REUSE;
1298 break;
1299 case MADV_CAN_REUSE:
1300 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1301 break;
1302 case MADV_PAGEOUT:
1303 #if MACH_ASSERT
1304 new_behavior = VM_BEHAVIOR_PAGEOUT;
1305 break;
1306 #else /* MACH_ASSERT */
1307 return ENOTSUP;
1308 #endif /* MACH_ASSERT */
1309 case MADV_ZERO:
1310 new_behavior = VM_BEHAVIOR_ZERO;
1311 break;
1312 default:
1313 return EINVAL;
1314 }
1315
1316 user_map = current_map();
1317 start = (mach_vm_offset_t) uap->addr;
1318 size = (mach_vm_size_t) uap->len;
1319 if (vm_map_range_overflows(user_map, start, size)) {
1320 return EINVAL;
1321 }
1322 #if __arm64__
1323 if (start == 0 &&
1324 size != 0 &&
1325 (uap->behav == MADV_FREE ||
1326 uap->behav == MADV_FREE_REUSABLE)) {
1327 printf("** %s: %d[%s] "
1328 "failing madvise(0x%llx,0x%llx,%s)\n",
1329 __func__, proc_getpid(p), p->p_comm, start, size,
1330 ((uap->behav == MADV_FREE_REUSABLE)
1331 ? "MADV_FREE_REUSABLE"
1332 : "MADV_FREE"));
1333 return EINVAL;
1334 }
1335 #endif /* __arm64__ */
1336
1337 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1338 switch (result) {
1339 case KERN_SUCCESS:
1340 return 0;
1341 case KERN_INVALID_ADDRESS:
1342 return EINVAL;
1343 case KERN_NO_SPACE:
1344 return ENOMEM;
1345 case KERN_PROTECTION_FAILURE:
1346 return EPERM;
1347 case KERN_NO_ACCESS:
1348 return ENOTSUP;
1349 }
1350
1351 return EINVAL;
1352 }
1353
1354 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1355 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1356 {
1357 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1358 vm_map_t map = VM_MAP_NULL;
1359 user_addr_t vec = 0;
1360 int error = 0;
1361 int64_t lastvecindex = 0;
1362 int mincoreinfo = 0;
1363 int pqueryinfo = 0;
1364 uint64_t pqueryinfo_vec_size = 0;
1365 vm_page_info_basic_t info = NULL;
1366 mach_msg_type_number_t count = 0;
1367 char *kernel_vec = NULL;
1368 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1369 kern_return_t kr = KERN_SUCCESS;
1370 int effective_page_shift, effective_page_size;
1371
1372 map = current_map();
1373
1374 /*
1375 * On systems with 4k kernel space and 16k user space, we will
1376 * use the kernel page size to report back the residency information.
1377 * This is for backwards compatibility since we already have
1378 * processes that depend on this behavior.
1379 */
1380 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1381 effective_page_shift = vm_map_page_shift(map);
1382 effective_page_size = vm_map_page_size(map);
1383 } else {
1384 effective_page_shift = PAGE_SHIFT;
1385 effective_page_size = PAGE_SIZE;
1386 }
1387
1388 /*
1389 * Make sure that the addresses presented are valid for user
1390 * mode.
1391 */
1392 first_addr = addr = vm_map_trunc_page(uap->addr,
1393 vm_map_page_mask(map));
1394 end = vm_map_round_page(uap->addr + uap->len,
1395 vm_map_page_mask(map));
1396
1397 if (end < addr) {
1398 return EINVAL;
1399 }
1400
1401 if (end == addr) {
1402 return 0;
1403 }
1404
1405 /*
1406 * We are going to loop through the whole 'req_vec_size' pages
1407 * range in chunks of 'cur_vec_size'.
1408 */
1409
1410 req_vec_size_pages = (end - addr) >> effective_page_shift;
1411 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1412 size_t kernel_vec_size = cur_vec_size_pages;
1413
1414 kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1415
1416 if (kernel_vec == NULL) {
1417 return ENOMEM;
1418 }
1419
1420 /*
1421 * Address of byte vector
1422 */
1423 vec = uap->vec;
1424
1425 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1426
1427 info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1428
1429 if (info == NULL) {
1430 kfree_data(kernel_vec, kernel_vec_size);
1431 return ENOMEM;
1432 }
1433
1434 while (addr < end) {
1435 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1436
1437 count = VM_PAGE_INFO_BASIC_COUNT;
1438 kr = vm_map_page_range_info_internal(map,
1439 addr,
1440 cur_end,
1441 effective_page_shift,
1442 VM_PAGE_INFO_BASIC,
1443 (vm_page_info_t) info,
1444 &count);
1445
1446 assert(kr == KERN_SUCCESS);
1447
1448 /*
1449 * Do this on a map entry basis so that if the pages are not
1450 * in the current processes address space, we can easily look
1451 * up the pages elsewhere.
1452 */
1453 lastvecindex = -1;
1454
1455 for (; addr < cur_end; addr += effective_page_size) {
1456 pqueryinfo = info[lastvecindex + 1].disposition;
1457
1458 mincoreinfo = 0;
1459
1460 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1461 mincoreinfo |= MINCORE_INCORE;
1462 }
1463 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1464 mincoreinfo |= MINCORE_REFERENCED;
1465 }
1466 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1467 mincoreinfo |= MINCORE_MODIFIED;
1468 }
1469 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1470 mincoreinfo |= MINCORE_PAGED_OUT;
1471 }
1472 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1473 mincoreinfo |= MINCORE_COPIED;
1474 }
1475 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1476 mincoreinfo |= MINCORE_ANONYMOUS;
1477 }
1478 /*
1479 * calculate index into user supplied byte vector
1480 */
1481 vecindex = (addr - first_addr) >> effective_page_shift;
1482 kernel_vec[vecindex] = (char)mincoreinfo;
1483 lastvecindex = vecindex;
1484 }
1485
1486
1487 assert(vecindex == (cur_vec_size_pages - 1));
1488
1489 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1490
1491 if (error) {
1492 break;
1493 }
1494
1495 /*
1496 * For the next chunk, we'll need:
1497 * - bump the location in the user buffer for our next disposition.
1498 * - new length
1499 * - starting address
1500 */
1501 vec += cur_vec_size_pages * sizeof(char);
1502 req_vec_size_pages = (end - addr) >> effective_page_shift;
1503 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1504
1505 first_addr = addr;
1506 }
1507
1508 kfree_data(info, pqueryinfo_vec_size);
1509 kfree_data(kernel_vec, kernel_vec_size);
1510
1511 if (error) {
1512 return EFAULT;
1513 }
1514
1515 return 0;
1516 }
1517
1518 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1519 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1520 {
1521 kern_return_t result;
1522
1523 AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1524 AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1525
1526 /* have to call vm_map_wire directly to pass "I don't know" protections */
1527 result = vm_map_wire_kernel(current_map(), uap->addr,
1528 vm_sanitize_compute_unsafe_end(uap->addr, uap->len),
1529 vm_sanitize_wrap_prot(VM_PROT_NONE), VM_KERN_MEMORY_MLOCK, TRUE);
1530
1531 switch (result) {
1532 case KERN_SUCCESS:
1533 return 0;
1534 case KERN_INVALID_ARGUMENT:
1535 return EINVAL;
1536 case KERN_RESOURCE_SHORTAGE:
1537 return EAGAIN;
1538 case KERN_PROTECTION_FAILURE:
1539 return EPERM;
1540 default:
1541 return ENOMEM;
1542 }
1543 }
1544
1545 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1546 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1547 {
1548 kern_return_t result;
1549
1550 AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1551 AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1552
1553 /* JMM - need to remove all wirings by spec - this just removes one */
1554 result = vm_map_unwire(current_map(), uap->addr,
1555 vm_sanitize_compute_unsafe_end(uap->addr, uap->len), TRUE);
1556
1557 switch (result) {
1558 case KERN_SUCCESS:
1559 return 0;
1560 case KERN_INVALID_ARGUMENT:
1561 return EINVAL;
1562 default:
1563 return ENOMEM;
1564 }
1565 }
1566
1567
1568 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1569 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1570 {
1571 return ENOSYS;
1572 }
1573
1574 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1575 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1576 {
1577 return ENOSYS;
1578 }
1579
1580 #if CONFIG_CODE_DECRYPTION
1581 static inline kern_return_t
mremap_encrypted_sanitize(vm_map_t user_map,vm_addr_struct_t addr_u,vm_size_struct_t len_u,mach_vm_offset_t * user_addr,mach_vm_offset_t * user_end,mach_vm_size_t * user_size)1582 mremap_encrypted_sanitize(
1583 vm_map_t user_map,
1584 vm_addr_struct_t addr_u,
1585 vm_size_struct_t len_u,
1586 mach_vm_offset_t *user_addr,
1587 mach_vm_offset_t *user_end,
1588 mach_vm_size_t *user_size)
1589 {
1590 return vm_sanitize_addr_size(addr_u, len_u,
1591 VM_SANITIZE_CALLER_MREMAP_ENCRYPTED, user_map,
1592 VM_SANITIZE_FLAGS_CHECK_ALIGNED_START | VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
1593 user_addr, user_end, user_size);
1594 }
1595
1596 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1597 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1598 {
1599 mach_vm_offset_t user_addr, user_end;
1600 mach_vm_size_t user_size;
1601 kern_return_t result;
1602 vm_map_t user_map;
1603 uint32_t cryptid;
1604 cpu_type_t cputype;
1605 cpu_subtype_t cpusubtype;
1606 pager_crypt_info_t crypt_info;
1607 const char * cryptname = 0;
1608 char *vpath;
1609 int len, ret;
1610 struct proc_regioninfo_internal pinfo;
1611 vnode_t vp;
1612 uintptr_t vnodeaddr;
1613 uint32_t vid;
1614
1615 AUDIT_ARG(addr, VM_SANITIZE_UNSAFE_UNWRAP(uap->addr));
1616 AUDIT_ARG(len, VM_SANITIZE_UNSAFE_UNWRAP(uap->len));
1617
1618 user_map = current_map();
1619 cryptid = uap->cryptid;
1620 cputype = uap->cputype;
1621 cpusubtype = uap->cpusubtype;
1622
1623 /*
1624 * Sanitize any input parameters that are addr/size/protections
1625 */
1626 result = mremap_encrypted_sanitize(user_map,
1627 uap->addr,
1628 uap->len,
1629 &user_addr,
1630 &user_end,
1631 &user_size);
1632 if (__improbable(result != KERN_SUCCESS)) {
1633 assert(vm_sanitize_get_kr(result));
1634 return EINVAL;
1635 }
1636
1637 switch (cryptid) {
1638 case CRYPTID_NO_ENCRYPTION:
1639 /* not encrypted, just an empty load command */
1640 return 0;
1641 case CRYPTID_APP_ENCRYPTION:
1642 case CRYPTID_MODEL_ENCRYPTION:
1643 cryptname = "com.apple.unfree";
1644 break;
1645 case 0x10:
1646 /* some random cryptid that you could manually put into
1647 * your binary if you want NULL */
1648 cryptname = "com.apple.null";
1649 break;
1650 default:
1651 return EINVAL;
1652 }
1653
1654 if (NULL == text_crypter_create) {
1655 return ENOTSUP;
1656 }
1657
1658 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1659 if (ret == 0 || !vnodeaddr) {
1660 /* No really, this returns 0 if the memory address is not backed by a file */
1661 return EINVAL;
1662 }
1663
1664 vp = (vnode_t)vnodeaddr;
1665 if ((vnode_getwithvid(vp, vid)) == 0) {
1666 vpath = zalloc(ZV_NAMEI);
1667
1668 len = MAXPATHLEN;
1669 ret = vn_getpath(vp, vpath, &len);
1670 if (ret) {
1671 zfree(ZV_NAMEI, vpath);
1672 vnode_put(vp);
1673 return ret;
1674 }
1675
1676 vnode_put(vp);
1677 } else {
1678 return EINVAL;
1679 }
1680
1681 #if 0
1682 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1683 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1684 #endif
1685
1686 if (user_size == 0) {
1687 printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1688 zfree(ZV_NAMEI, vpath);
1689 return 0;
1690 }
1691
1692 /* set up decrypter first */
1693 crypt_file_data_t crypt_data = {
1694 .filename = vpath,
1695 .cputype = cputype,
1696 .cpusubtype = cpusubtype,
1697 .origin = CRYPT_ORIGIN_LIBRARY_LOAD,
1698 };
1699 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1700 #if VM_MAP_DEBUG_APPLE_PROTECT
1701 if (vm_map_debug_apple_protect) {
1702 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1703 proc_getpid(p), p->p_comm,
1704 user_map,
1705 (uint64_t) user_addr,
1706 (uint64_t) (user_addr + user_size),
1707 __FUNCTION__, vpath, result);
1708 }
1709 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1710 zfree(ZV_NAMEI, vpath);
1711
1712 if (result) {
1713 printf("%s: unable to create decrypter %s, kr=%d\n",
1714 __FUNCTION__, cryptname, result);
1715 if (result == kIOReturnNotPrivileged) {
1716 /* text encryption returned decryption failure */
1717 return EPERM;
1718 } else {
1719 return ENOMEM;
1720 }
1721 }
1722
1723 /* now remap using the decrypter */
1724 vm_object_offset_t crypto_backing_offset;
1725 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1726 result = vm_map_apple_protected(user_map,
1727 user_addr,
1728 user_addr + user_size,
1729 crypto_backing_offset,
1730 &crypt_info,
1731 cryptid);
1732 if (result) {
1733 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1734 }
1735
1736 if (result) {
1737 return EPERM;
1738 }
1739 return 0;
1740 }
1741 #endif /* CONFIG_CODE_DECRYPTION */
1742