1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117
118 #include <machine/machine_routines.h>
119
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124
125 #include <IOKit/IOReturn.h>
126 #include <IOKit/IOBSD.h>
127
128 #include <vm/vm_map.h>
129 #include <vm/vm_kern.h>
130 #include <vm/vm_pager.h>
131 #include <vm/vm_protos.h>
132
133 #if CONFIG_MACF
134 #include <security/mac_framework.h>
135 #endif
136 #include <os/overflow.h>
137
138 /*
139 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
140 * from dyld_priv.h. Basically, we attempt to draw the line of: "was this code
141 * compiled with an SDK from fall of 2020 or later?""
142 */
143 static bool
proc_2020_fall_os_sdk_or_later(void)144 proc_2020_fall_os_sdk_or_later(void)
145 {
146 const uint32_t proc_sdk_ver = proc_sdk(current_proc());
147
148 switch (proc_platform(current_proc())) {
149 case PLATFORM_MACOS:
150 return proc_sdk_ver >= 0x000a1000; // DYLD_MACOSX_VERSION_10_16
151 case PLATFORM_IOS:
152 case PLATFORM_IOSSIMULATOR:
153 case PLATFORM_MACCATALYST:
154 return proc_sdk_ver >= 0x000e0000; // DYLD_IOS_VERSION_14_0
155 case PLATFORM_BRIDGEOS:
156 return proc_sdk_ver >= 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
157 case PLATFORM_TVOS:
158 case PLATFORM_TVOSSIMULATOR:
159 return proc_sdk_ver >= 0x000e0000; // DYLD_TVOS_VERSION_14_0
160 case PLATFORM_WATCHOS:
161 case PLATFORM_WATCHOSSIMULATOR:
162 return proc_sdk_ver >= 0x00070000; // DYLD_WATCHOS_VERSION_7_0
163 default:
164 /*
165 * tough call, but let's give new platforms the benefit of the doubt
166 * to avoid a re-occurence of rdar://89843927
167 */
168 return true;
169 }
170 }
171
172 #if MACH_ASSERT
173 vnode_t fbdp_vp = NULL;
174 #endif /* MACH_ASSERT */
175
176 /*
177 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
178 * XXX usage is PROT_* from an interface perspective. Thus the values of
179 * XXX VM_PROT_* and PROT_* need to correspond.
180 */
181 int
mmap(proc_t p,struct mmap_args * uap,user_addr_t * retval)182 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
183 {
184 /*
185 * Map in special device (must be SHARED) or file
186 */
187 struct fileproc *fp;
188 struct vnode *vp;
189 int flags;
190 int prot;
191 int err = 0;
192 vm_map_t user_map;
193 kern_return_t result;
194 vm_map_offset_t user_addr;
195 vm_map_offset_t sum;
196 vm_map_size_t user_size;
197 vm_object_offset_t pageoff;
198 vm_object_offset_t file_pos;
199 int alloc_flags = 0;
200 vm_tag_t tag = VM_KERN_MEMORY_NONE;
201 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
202 boolean_t docow;
203 vm_prot_t maxprot;
204 void *handle;
205 memory_object_t pager = MEMORY_OBJECT_NULL;
206 memory_object_control_t control;
207 int mapanon = 0;
208 int fpref = 0;
209 int error = 0;
210 int fd = uap->fd;
211 int num_retries = 0;
212
213 /*
214 * Note that for UNIX03 conformance, there is additional parameter checking for
215 * mmap() system call in libsyscall prior to entering the kernel. The sanity
216 * checks and argument validation done in this function are not the only places
217 * one can get returned errnos.
218 */
219
220 user_map = current_map();
221 user_addr = (vm_map_offset_t)uap->addr;
222 user_size = (vm_map_size_t) uap->len;
223
224 AUDIT_ARG(addr, user_addr);
225 AUDIT_ARG(len, user_size);
226 AUDIT_ARG(fd, uap->fd);
227
228 if (vm_map_range_overflows(user_addr, user_size)) {
229 return EINVAL;
230 }
231 prot = (uap->prot & VM_PROT_ALL);
232 #if 3777787
233 /*
234 * Since the hardware currently does not support writing without
235 * read-before-write, or execution-without-read, if the request is
236 * for write or execute access, we must imply read access as well;
237 * otherwise programs expecting this to work will fail to operate.
238 */
239 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
240 prot |= VM_PROT_READ;
241 }
242 #endif /* radar 3777787 */
243
244 flags = uap->flags;
245 vp = NULLVP;
246
247 /*
248 * verify no unknown flags are passed in, and if any are,
249 * fail out early to make sure the logic below never has to deal
250 * with invalid flag values. only do so for processes compiled
251 * with Fall 2020 or later SDK, which is where we drew this
252 * line and documented it as such.
253 */
254 if (flags & ~(MAP_SHARED |
255 MAP_PRIVATE |
256 MAP_COPY |
257 MAP_FIXED |
258 MAP_RENAME |
259 MAP_NORESERVE |
260 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
261 MAP_NOEXTEND |
262 MAP_HASSEMAPHORE |
263 MAP_NOCACHE |
264 MAP_JIT |
265 MAP_FILE |
266 MAP_ANON |
267 MAP_RESILIENT_CODESIGN |
268 MAP_RESILIENT_MEDIA |
269 #if XNU_TARGET_OS_OSX
270 MAP_32BIT |
271 #endif
272 MAP_TRANSLATED_ALLOW_EXECUTE |
273 MAP_UNIX03)) {
274 if (proc_2020_fall_os_sdk_or_later()) {
275 return EINVAL;
276 }
277 }
278
279
280 /*
281 * The vm code does not have prototypes & compiler doesn't do
282 * the right thing when you cast 64bit value and pass it in function
283 * call. So here it is.
284 */
285 file_pos = (vm_object_offset_t)uap->pos;
286
287
288 /* make sure mapping fits into numeric range etc */
289 if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
290 return EINVAL;
291 }
292
293 if (flags & MAP_UNIX03) {
294 vm_map_offset_t offset_alignment_mask;
295
296 /*
297 * Enforce UNIX03 compliance.
298 */
299
300 if (vm_map_is_exotic(current_map())) {
301 offset_alignment_mask = 0xFFF;
302 } else {
303 offset_alignment_mask = vm_map_page_mask(current_map());
304 }
305 if (file_pos & offset_alignment_mask) {
306 /* file offset should be page-aligned */
307 return EINVAL;
308 }
309 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
310 /* need either MAP_PRIVATE or MAP_SHARED */
311 return EINVAL;
312 }
313 if (user_size == 0) {
314 /* mapping length should not be 0 */
315 return EINVAL;
316 }
317 }
318
319 /*
320 * Align the file position to a page boundary,
321 * and save its page offset component.
322 */
323 pageoff = (file_pos & vm_map_page_mask(user_map));
324 file_pos -= (vm_object_offset_t)pageoff;
325
326
327 /* Adjust size for rounding (on both ends). */
328 user_size += pageoff; /* low end... */
329 user_size = vm_map_round_page(user_size,
330 vm_map_page_mask(user_map)); /* hi end */
331
332
333 if (flags & MAP_JIT) {
334 if ((flags & MAP_FIXED) ||
335 (flags & MAP_SHARED) ||
336 !(flags & MAP_ANON) ||
337 (flags & MAP_RESILIENT_CODESIGN) ||
338 (flags & MAP_RESILIENT_MEDIA)) {
339 return EINVAL;
340 }
341 }
342
343 if ((flags & MAP_RESILIENT_CODESIGN) ||
344 (flags & MAP_RESILIENT_MEDIA)) {
345 if ((flags & MAP_ANON) ||
346 (flags & MAP_JIT)) {
347 return EINVAL;
348 }
349 }
350 if (flags & MAP_RESILIENT_CODESIGN) {
351 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
352 if (prot & reject_prot) {
353 /*
354 * Quick sanity check. maxprot is calculated below and
355 * we will test it again.
356 */
357 return EPERM;
358 }
359 }
360 if (flags & MAP_SHARED) {
361 /*
362 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
363 * there is no place to inject zero-filled pages without
364 * actually adding them to the file.
365 * Since we didn't reject that combination before, there might
366 * already be callers using it and getting a valid MAP_SHARED
367 * mapping but without the resilience.
368 * For backwards compatibility's sake, let's keep ignoring
369 * MAP_RESILIENT_MEDIA in that case.
370 */
371 flags &= ~MAP_RESILIENT_MEDIA;
372 }
373 if (flags & MAP_RESILIENT_MEDIA) {
374 if ((flags & MAP_ANON) ||
375 (flags & MAP_SHARED)) {
376 return EINVAL;
377 }
378 }
379
380 /*
381 * Check for illegal addresses. Watch out for address wrap... Note
382 * that VM_*_ADDRESS are not constants due to casts (argh).
383 */
384 if (flags & MAP_FIXED) {
385 /*
386 * The specified address must have the same remainder
387 * as the file offset taken modulo PAGE_SIZE, so it
388 * should be aligned after adjustment by pageoff.
389 */
390 user_addr -= pageoff;
391 if (user_addr & vm_map_page_mask(user_map)) {
392 return EINVAL;
393 }
394 }
395 #ifdef notyet
396 /* DO not have apis to get this info, need to wait till then*/
397 /*
398 * XXX for non-fixed mappings where no hint is provided or
399 * the hint would fall in the potential heap space,
400 * place it after the end of the largest possible heap.
401 *
402 * There should really be a pmap call to determine a reasonable
403 * location.
404 */
405 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
406 vm_map_page_mask(user_map))) {
407 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
408 vm_map_page_mask(user_map));
409 }
410
411 #endif
412
413 alloc_flags = 0;
414
415 #if CONFIG_MAP_RANGES
416 /* default to placing mappings in the heap range. */
417 vmk_flags.vmkf_range_id = UMEM_RANGE_ID_HEAP;
418 #endif /* CONFIG_MAP_RANGES */
419
420 if (flags & MAP_ANON) {
421 maxprot = VM_PROT_ALL;
422 #if CONFIG_MACF
423 /*
424 * Entitlement check.
425 */
426 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
427 if (error) {
428 return EINVAL;
429 }
430 #endif /* MAC */
431
432 /*
433 * Mapping blank space is trivial. Use positive fds as the alias
434 * value for memory tracking.
435 */
436 if (fd != -1) {
437 /*
438 * Use "fd" to pass (some) Mach VM allocation flags,
439 * (see the VM_FLAGS_* definitions).
440 */
441 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
442 VM_FLAGS_SUPERPAGE_MASK |
443 VM_FLAGS_PURGABLE |
444 VM_FLAGS_4GB_CHUNK);
445 if (alloc_flags != fd) {
446 /* reject if there are any extra flags */
447 return EINVAL;
448 }
449 VM_GET_FLAGS_ALIAS(alloc_flags, tag);
450 alloc_flags &= ~VM_FLAGS_ALIAS_MASK;
451 vmk_flags.vmkf_range_id = VM_MAP_RANGE_ID(user_map, tag);
452 }
453
454 handle = NULL;
455 file_pos = 0;
456 pageoff = 0;
457 mapanon = 1;
458 } else {
459 struct vnode_attr va;
460 vfs_context_t ctx = vfs_context_current();
461
462 if (flags & MAP_JIT) {
463 return EINVAL;
464 }
465
466 /*
467 * Mapping file, get fp for validation. Obtain vnode and make
468 * sure it is of appropriate type.
469 */
470 err = fp_lookup(p, fd, &fp, 0);
471 if (err) {
472 return err;
473 }
474 fpref = 1;
475 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
476 case DTYPE_PSXSHM:
477 uap->addr = (user_addr_t)user_addr;
478 uap->len = (user_size_t)user_size;
479 uap->prot = prot;
480 uap->flags = flags;
481 uap->pos = file_pos;
482 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
483 goto bad;
484 case DTYPE_VNODE:
485 break;
486 default:
487 error = EINVAL;
488 goto bad;
489 }
490 vp = (struct vnode *)fp_get_data(fp);
491 error = vnode_getwithref(vp);
492 if (error != 0) {
493 goto bad;
494 }
495
496 if (vp->v_type != VREG && vp->v_type != VCHR) {
497 (void)vnode_put(vp);
498 error = EINVAL;
499 goto bad;
500 }
501
502 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
503
504 /*
505 * POSIX: mmap needs to update access time for mapped files
506 */
507 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
508 VATTR_INIT(&va);
509 nanotime(&va.va_access_time);
510 VATTR_SET_ACTIVE(&va, va_access_time);
511 vnode_setattr(vp, &va, ctx);
512 }
513
514 /*
515 * XXX hack to handle use of /dev/zero to map anon memory (ala
516 * SunOS).
517 */
518 if (vp->v_type == VCHR || vp->v_type == VSTR) {
519 (void)vnode_put(vp);
520 error = ENODEV;
521 goto bad;
522 } else {
523 /*
524 * Ensure that file and memory protections are
525 * compatible. Note that we only worry about
526 * writability if mapping is shared; in this case,
527 * current and max prot are dictated by the open file.
528 * XXX use the vnode instead? Problem is: what
529 * credentials do we use for determination? What if
530 * proc does a setuid?
531 */
532 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
533 if (fp->fp_glob->fg_flag & FREAD) {
534 maxprot |= VM_PROT_READ;
535 } else if (prot & PROT_READ) {
536 (void)vnode_put(vp);
537 error = EACCES;
538 goto bad;
539 }
540 /*
541 * If we are sharing potential changes (either via
542 * MAP_SHARED or via the implicit sharing of character
543 * device mappings), and we are trying to get write
544 * permission although we opened it without asking
545 * for it, bail out.
546 */
547
548 if ((flags & MAP_SHARED) != 0) {
549 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
550 /*
551 * Do not allow writable mappings of
552 * swap files (see vm_swapfile_pager.c).
553 */
554 !vnode_isswap(vp)) {
555 /*
556 * check for write access
557 *
558 * Note that we already made this check when granting FWRITE
559 * against the file, so it seems redundant here.
560 */
561 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
562
563 /* if not granted for any reason, but we wanted it, bad */
564 if ((prot & PROT_WRITE) && (error != 0)) {
565 vnode_put(vp);
566 goto bad;
567 }
568
569 /* if writable, remember */
570 if (error == 0) {
571 maxprot |= VM_PROT_WRITE;
572 }
573 } else if ((prot & PROT_WRITE) != 0) {
574 (void)vnode_put(vp);
575 error = EACCES;
576 goto bad;
577 }
578 } else {
579 maxprot |= VM_PROT_WRITE;
580 }
581
582 handle = (void *)vp;
583 #if CONFIG_MACF
584 error = mac_file_check_mmap(vfs_context_ucred(ctx),
585 fp->fp_glob, prot, flags, file_pos + pageoff,
586 &maxprot);
587 if (error) {
588 (void)vnode_put(vp);
589 goto bad;
590 }
591 #endif /* MAC */
592 /*
593 * Consult the file system to determine if this
594 * particular file object can be mapped.
595 *
596 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
597 * then we don't check for writeability on the file
598 * object, because it will only ever see reads.
599 */
600 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
601 (prot & ~PROT_WRITE) : prot, ctx);
602 if (error) {
603 (void)vnode_put(vp);
604 goto bad;
605 }
606 }
607
608 /*
609 * No copy-on-read for mmap() mappings themselves.
610 */
611 vmk_flags.vmkf_no_copy_on_read = 1;
612 }
613
614 if (user_size == 0) {
615 if (!mapanon) {
616 (void)vnode_put(vp);
617 }
618 error = 0;
619 goto bad;
620 }
621
622 /*
623 * We bend a little - round the start and end addresses
624 * to the nearest page boundary.
625 */
626 user_size = vm_map_round_page(user_size,
627 vm_map_page_mask(user_map));
628
629 if (file_pos & vm_map_page_mask(user_map)) {
630 if (!mapanon) {
631 (void)vnode_put(vp);
632 }
633 error = EINVAL;
634 goto bad;
635 }
636
637 if ((flags & MAP_FIXED) == 0) {
638 alloc_flags |= VM_FLAGS_ANYWHERE;
639 user_addr = vm_map_round_page(user_addr,
640 vm_map_page_mask(user_map));
641 } else {
642 if (user_addr != vm_map_trunc_page(user_addr,
643 vm_map_page_mask(user_map))) {
644 if (!mapanon) {
645 (void)vnode_put(vp);
646 }
647 error = EINVAL;
648 goto bad;
649 }
650 /*
651 * mmap(MAP_FIXED) will replace any existing mappings in the
652 * specified range, if the new mapping is successful.
653 * If we just deallocate the specified address range here,
654 * another thread might jump in and allocate memory in that
655 * range before we get a chance to establish the new mapping,
656 * and we won't have a chance to restore the old mappings.
657 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
658 * has to deallocate the existing mappings and establish the
659 * new ones atomically.
660 */
661 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
662 }
663
664 if (flags & MAP_NOCACHE) {
665 alloc_flags |= VM_FLAGS_NO_CACHE;
666 }
667
668 if (flags & MAP_JIT) {
669 vmk_flags.vmkf_map_jit = TRUE;
670 }
671
672 #if CONFIG_ROSETTA
673 if (flags & MAP_TRANSLATED_ALLOW_EXECUTE) {
674 if (!proc_is_translated(p)) {
675 if (!mapanon) {
676 (void)vnode_put(vp);
677 }
678 error = EINVAL;
679 goto bad;
680 }
681 vmk_flags.vmkf_translated_allow_execute = TRUE;
682 }
683 #endif
684
685 if (flags & MAP_RESILIENT_CODESIGN) {
686 alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
687 }
688 if (flags & MAP_RESILIENT_MEDIA) {
689 alloc_flags |= VM_FLAGS_RESILIENT_MEDIA;
690 }
691
692 #if XNU_TARGET_OS_OSX
693 /* macOS-specific MAP_32BIT flag handling */
694 if (flags & MAP_32BIT) {
695 vmk_flags.vmkf_32bit_map_va = TRUE;
696 }
697 #endif
698
699 /*
700 * Lookup/allocate object.
701 */
702 if (handle == NULL) {
703 control = NULL;
704 #ifdef notyet
705 /* Hmm .. */
706 #if defined(VM_PROT_READ_IS_EXEC)
707 if (prot & VM_PROT_READ) {
708 prot |= VM_PROT_EXECUTE;
709 }
710 if (maxprot & VM_PROT_READ) {
711 maxprot |= VM_PROT_EXECUTE;
712 }
713 #endif
714 #endif
715
716 #if 3777787
717 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
718 prot |= VM_PROT_READ;
719 }
720 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
721 maxprot |= VM_PROT_READ;
722 }
723 #endif /* radar 3777787 */
724 map_anon_retry:
725
726 result = vm_map_enter_mem_object(user_map,
727 &user_addr, user_size,
728 0, alloc_flags, vmk_flags,
729 tag,
730 IPC_PORT_NULL, 0, FALSE,
731 prot, maxprot,
732 (flags & MAP_SHARED) ?
733 VM_INHERIT_SHARE :
734 VM_INHERIT_DEFAULT);
735
736 /* If a non-binding address was specified for this anonymous
737 * mapping, retry the mapping with a zero base
738 * in the event the mapping operation failed due to
739 * lack of space between the address and the map's maximum.
740 */
741 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
742 user_addr = vm_map_page_size(user_map);
743 goto map_anon_retry;
744 }
745 } else {
746 if (vnode_isswap(vp)) {
747 /*
748 * Map swap files with a special pager
749 * that returns obfuscated contents.
750 */
751 control = NULL;
752 pager = swapfile_pager_setup(vp);
753 if (pager != MEMORY_OBJECT_NULL) {
754 control = swapfile_pager_control(pager);
755 }
756 } else {
757 control = ubc_getobject(vp, UBC_FLAGS_NONE);
758 }
759
760 if (control == NULL) {
761 (void)vnode_put(vp);
762 error = ENOMEM;
763 goto bad;
764 }
765
766 #if MACH_ASSERT
767 #define FBDP_PATH_NAME "/private/var/db/timezone/tz/2022a.1.1/icutz/"
768 #define FBDP_FILE_NAME "icutz44l.dat"
769 if (fbdp_vp == NULL &&
770 !strncmp(vp->v_name, FBDP_FILE_NAME, strlen(FBDP_FILE_NAME))) {
771 char *path;
772 int len;
773 len = MAXPATHLEN;
774 path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_NOFAIL);
775 vn_getpath(vp, path, &len);
776 if (!strncmp(path, FBDP_PATH_NAME, strlen(FBDP_PATH_NAME))) {
777 fbdp_vp = vp;
778 memory_object_mark_for_fbdp(control);
779 printf("FBDP %s:%d marked vp %p \"%s\" moc %p as 'fbdp'\n", __FUNCTION__, __LINE__, vp, path, control);
780 }
781 zfree(ZV_NAMEI, path);
782 }
783 #endif /* MACH_ASSERT */
784
785 /*
786 * Set credentials:
787 * FIXME: if we're writing the file we need a way to
788 * ensure that someone doesn't replace our R/W creds
789 * with ones that only work for read.
790 */
791
792 ubc_setthreadcred(vp, p, current_thread());
793 docow = FALSE;
794 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
795 docow = TRUE;
796 }
797
798 #ifdef notyet
799 /* Hmm .. */
800 #if defined(VM_PROT_READ_IS_EXEC)
801 if (prot & VM_PROT_READ) {
802 prot |= VM_PROT_EXECUTE;
803 }
804 if (maxprot & VM_PROT_READ) {
805 maxprot |= VM_PROT_EXECUTE;
806 }
807 #endif
808 #endif /* notyet */
809
810 #if 3777787
811 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
812 prot |= VM_PROT_READ;
813 }
814 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
815 maxprot |= VM_PROT_READ;
816 }
817 #endif /* radar 3777787 */
818
819 map_file_retry:
820 if (flags & MAP_RESILIENT_CODESIGN) {
821 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
822 if (prot & reject_prot) {
823 /*
824 * Would like to use (prot | maxprot) here
825 * but the assignment of VM_PROT_EXECUTE
826 * to maxprot above would always fail the test.
827 *
828 * Skipping the check is ok, however, because we
829 * restrict maxprot to prot just below in this
830 * block.
831 */
832 assert(!mapanon);
833 vnode_put(vp);
834 error = EPERM;
835 goto bad;
836 }
837 /* strictly limit access to "prot" */
838 maxprot &= prot;
839 }
840
841 vm_object_offset_t end_pos = 0;
842 if (os_add_overflow(user_size, file_pos, &end_pos)) {
843 vnode_put(vp);
844 error = EINVAL;
845 goto bad;
846 }
847
848 result = vm_map_enter_mem_object_control(user_map,
849 &user_addr, user_size,
850 0, alloc_flags, vmk_flags,
851 tag,
852 control, file_pos,
853 docow, prot, maxprot,
854 (flags & MAP_SHARED) ?
855 VM_INHERIT_SHARE :
856 VM_INHERIT_DEFAULT);
857
858 /* If a non-binding address was specified for this file backed
859 * mapping, retry the mapping with a zero base
860 * in the event the mapping operation failed due to
861 * lack of space between the address and the map's maximum.
862 */
863 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
864 user_addr = vm_map_page_size(user_map);
865 goto map_file_retry;
866 }
867 }
868
869 if (!mapanon) {
870 (void)vnode_put(vp);
871 }
872
873 switch (result) {
874 case KERN_SUCCESS:
875 *retval = user_addr + pageoff;
876 error = 0;
877 break;
878 case KERN_INVALID_ADDRESS:
879 case KERN_NO_SPACE:
880 error = ENOMEM;
881 break;
882 case KERN_PROTECTION_FAILURE:
883 error = EACCES;
884 break;
885 default:
886 error = EINVAL;
887 break;
888 }
889 bad:
890 if (pager != MEMORY_OBJECT_NULL) {
891 /*
892 * Release the reference on the pager.
893 * If the mapping was successful, it now holds
894 * an extra reference.
895 */
896 memory_object_deallocate(pager);
897 }
898 if (fpref) {
899 fp_drop(p, fd, fp, 0);
900 }
901
902 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
903 #if XNU_TARGET_OS_OSX
904 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
905 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
906 #endif /* XNU_TARGET_OS_OSX */
907 return error;
908 }
909
910 int
msync(__unused proc_t p,struct msync_args * uap,int32_t * retval)911 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
912 {
913 __pthread_testcancel(1);
914 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
915 }
916
917 int
msync_nocancel(__unused proc_t p,struct msync_nocancel_args * uap,__unused int32_t * retval)918 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
919 {
920 mach_vm_offset_t addr;
921 mach_vm_size_t size;
922 int flags;
923 vm_map_t user_map;
924 int rv;
925 vm_sync_t sync_flags = 0;
926
927 user_map = current_map();
928 addr = (mach_vm_offset_t) uap->addr;
929 size = (mach_vm_size_t) uap->len;
930 #if XNU_TARGET_OS_OSX
931 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
932 #endif /* XNU_TARGET_OS_OSX */
933 if (mach_vm_range_overflows(addr, size)) {
934 return EINVAL;
935 }
936 if (addr & vm_map_page_mask(user_map)) {
937 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
938 return EINVAL;
939 }
940 if (size == 0) {
941 /*
942 * We cannot support this properly without maintaining
943 * list all mmaps done. Cannot use vm_map_entry as they could be
944 * split or coalesced by indepenedant actions. So instead of
945 * inaccurate results, lets just return error as invalid size
946 * specified
947 */
948 return EINVAL; /* XXX breaks posix apps */
949 }
950
951 flags = uap->flags;
952 /* disallow contradictory flags */
953 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
954 return EINVAL;
955 }
956
957 if (flags & MS_KILLPAGES) {
958 sync_flags |= VM_SYNC_KILLPAGES;
959 }
960 if (flags & MS_DEACTIVATE) {
961 sync_flags |= VM_SYNC_DEACTIVATE;
962 }
963 if (flags & MS_INVALIDATE) {
964 sync_flags |= VM_SYNC_INVALIDATE;
965 }
966
967 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
968 if (flags & MS_ASYNC) {
969 sync_flags |= VM_SYNC_ASYNCHRONOUS;
970 } else {
971 sync_flags |= VM_SYNC_SYNCHRONOUS;
972 }
973 }
974
975 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
976
977 rv = mach_vm_msync(user_map, addr, size, sync_flags);
978
979 switch (rv) {
980 case KERN_SUCCESS:
981 break;
982 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
983 return ENOMEM;
984 case KERN_FAILURE:
985 return EIO;
986 default:
987 return EINVAL;
988 }
989 return 0;
990 }
991
992
993 int
munmap(__unused proc_t p,struct munmap_args * uap,__unused int32_t * retval)994 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
995 {
996 mach_vm_offset_t user_addr;
997 mach_vm_size_t user_size;
998 kern_return_t result;
999 vm_map_t user_map;
1000
1001 user_map = current_map();
1002 user_addr = (mach_vm_offset_t) uap->addr;
1003 user_size = (mach_vm_size_t) uap->len;
1004
1005 AUDIT_ARG(addr, user_addr);
1006 AUDIT_ARG(len, user_size);
1007
1008 if (user_addr & vm_map_page_mask(user_map)) {
1009 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1010 return EINVAL;
1011 }
1012
1013 if (mach_vm_range_overflows(user_addr, user_size)) {
1014 return EINVAL;
1015 }
1016
1017 if (user_size == 0) {
1018 /* UNIX SPEC: size is 0, return EINVAL */
1019 return EINVAL;
1020 }
1021
1022 result = mach_vm_deallocate(user_map, user_addr, user_size);
1023 if (result != KERN_SUCCESS) {
1024 return EINVAL;
1025 }
1026 return 0;
1027 }
1028
1029 int
mprotect(__unused proc_t p,struct mprotect_args * uap,__unused int32_t * retval)1030 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
1031 {
1032 vm_prot_t prot;
1033 mach_vm_offset_t user_addr;
1034 mach_vm_size_t user_size;
1035 kern_return_t result;
1036 vm_map_t user_map;
1037 #if CONFIG_MACF
1038 int error;
1039 #endif
1040
1041 AUDIT_ARG(addr, uap->addr);
1042 AUDIT_ARG(len, uap->len);
1043 AUDIT_ARG(value32, uap->prot);
1044
1045 user_map = current_map();
1046 user_addr = (mach_vm_offset_t) uap->addr;
1047 user_size = (mach_vm_size_t) uap->len;
1048 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
1049
1050 if (mach_vm_range_overflows(user_addr, user_size)) {
1051 return EINVAL;
1052 }
1053 if (user_addr & vm_map_page_mask(user_map)) {
1054 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1055 return EINVAL;
1056 }
1057
1058 #ifdef notyet
1059 /* Hmm .. */
1060 #if defined(VM_PROT_READ_IS_EXEC)
1061 if (prot & VM_PROT_READ) {
1062 prot |= VM_PROT_EXECUTE;
1063 }
1064 #endif
1065 #endif /* notyet */
1066
1067 #if 3936456
1068 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1069 prot |= VM_PROT_READ;
1070 }
1071 #endif /* 3936456 */
1072
1073 #if CONFIG_MACF
1074 /*
1075 * The MAC check for mprotect is of limited use for 2 reasons:
1076 * Without mmap revocation, the caller could have asked for the max
1077 * protections initially instead of a reduced set, so a mprotect
1078 * check would offer no new security.
1079 * It is not possible to extract the vnode from the pager object(s)
1080 * of the target memory range.
1081 * However, the MAC check may be used to prevent a process from,
1082 * e.g., making the stack executable.
1083 */
1084 error = mac_proc_check_mprotect(p, user_addr,
1085 user_size, prot);
1086 if (error) {
1087 return error;
1088 }
1089 #endif
1090
1091 if (prot & VM_PROT_TRUSTED) {
1092 #if CONFIG_DYNAMIC_CODE_SIGNING
1093 /* CODE SIGNING ENFORCEMENT - JIT support */
1094 /* The special protection value VM_PROT_TRUSTED requests that we treat
1095 * this page as if it had a valid code signature.
1096 * If this is enabled, there MUST be a MAC policy implementing the
1097 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1098 * compromised because the check would always succeed and thusly any
1099 * process could sign dynamically. */
1100 result = vm_map_sign(
1101 user_map,
1102 vm_map_trunc_page(user_addr,
1103 vm_map_page_mask(user_map)),
1104 vm_map_round_page(user_addr + user_size,
1105 vm_map_page_mask(user_map)));
1106 switch (result) {
1107 case KERN_SUCCESS:
1108 break;
1109 case KERN_INVALID_ADDRESS:
1110 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1111 return ENOMEM;
1112 default:
1113 return EINVAL;
1114 }
1115 #else
1116 return ENOTSUP;
1117 #endif
1118 }
1119 prot &= ~VM_PROT_TRUSTED;
1120
1121 result = mach_vm_protect(user_map, user_addr, user_size,
1122 FALSE, prot);
1123 switch (result) {
1124 case KERN_SUCCESS:
1125 return 0;
1126 case KERN_PROTECTION_FAILURE:
1127 return EACCES;
1128 case KERN_INVALID_ADDRESS:
1129 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1130 return ENOMEM;
1131 }
1132 return EINVAL;
1133 }
1134
1135
1136 int
minherit(__unused proc_t p,struct minherit_args * uap,__unused int32_t * retval)1137 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1138 {
1139 mach_vm_offset_t addr;
1140 mach_vm_size_t size;
1141 vm_inherit_t inherit;
1142 vm_map_t user_map;
1143 kern_return_t result;
1144
1145 AUDIT_ARG(addr, uap->addr);
1146 AUDIT_ARG(len, uap->len);
1147 AUDIT_ARG(value32, uap->inherit);
1148
1149 addr = (mach_vm_offset_t)uap->addr;
1150 size = (mach_vm_size_t)uap->len;
1151 inherit = uap->inherit;
1152 if (mach_vm_range_overflows(addr, size)) {
1153 return EINVAL;
1154 }
1155 user_map = current_map();
1156 result = mach_vm_inherit(user_map, addr, size,
1157 inherit);
1158 switch (result) {
1159 case KERN_SUCCESS:
1160 return 0;
1161 case KERN_PROTECTION_FAILURE:
1162 return EACCES;
1163 }
1164 return EINVAL;
1165 }
1166
1167 int
madvise(__unused proc_t p,struct madvise_args * uap,__unused int32_t * retval)1168 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1169 {
1170 vm_map_t user_map;
1171 mach_vm_offset_t start;
1172 mach_vm_size_t size;
1173 vm_behavior_t new_behavior;
1174 kern_return_t result;
1175
1176 /*
1177 * Since this routine is only advisory, we default to conservative
1178 * behavior.
1179 */
1180 switch (uap->behav) {
1181 case MADV_RANDOM:
1182 new_behavior = VM_BEHAVIOR_RANDOM;
1183 break;
1184 case MADV_SEQUENTIAL:
1185 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1186 break;
1187 case MADV_NORMAL:
1188 new_behavior = VM_BEHAVIOR_DEFAULT;
1189 break;
1190 case MADV_WILLNEED:
1191 new_behavior = VM_BEHAVIOR_WILLNEED;
1192 break;
1193 case MADV_DONTNEED:
1194 new_behavior = VM_BEHAVIOR_DONTNEED;
1195 break;
1196 case MADV_FREE:
1197 new_behavior = VM_BEHAVIOR_FREE;
1198 break;
1199 case MADV_ZERO_WIRED_PAGES:
1200 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1201 break;
1202 case MADV_FREE_REUSABLE:
1203 new_behavior = VM_BEHAVIOR_REUSABLE;
1204 break;
1205 case MADV_FREE_REUSE:
1206 new_behavior = VM_BEHAVIOR_REUSE;
1207 break;
1208 case MADV_CAN_REUSE:
1209 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1210 break;
1211 case MADV_PAGEOUT:
1212 #if MACH_ASSERT
1213 new_behavior = VM_BEHAVIOR_PAGEOUT;
1214 break;
1215 #else /* MACH_ASSERT */
1216 return ENOTSUP;
1217 #endif /* MACH_ASSERT */
1218 default:
1219 return EINVAL;
1220 }
1221
1222 start = (mach_vm_offset_t) uap->addr;
1223 size = (mach_vm_size_t) uap->len;
1224 if (mach_vm_range_overflows(start, size)) {
1225 return EINVAL;
1226 }
1227 #if __arm64__
1228 if (start == 0 &&
1229 size != 0 &&
1230 (uap->behav == MADV_FREE ||
1231 uap->behav == MADV_FREE_REUSABLE)) {
1232 printf("** FOURK_COMPAT: %d[%s] "
1233 "failing madvise(0x%llx,0x%llx,%s)\n",
1234 proc_getpid(p), p->p_comm, start, size,
1235 ((uap->behav == MADV_FREE_REUSABLE)
1236 ? "MADV_FREE_REUSABLE"
1237 : "MADV_FREE"));
1238 DTRACE_VM3(fourk_compat_madvise,
1239 uint64_t, start,
1240 uint64_t, size,
1241 int, uap->behav);
1242 return EINVAL;
1243 }
1244 #endif /* __arm64__ */
1245
1246 user_map = current_map();
1247
1248 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1249 switch (result) {
1250 case KERN_SUCCESS:
1251 return 0;
1252 case KERN_INVALID_ADDRESS:
1253 return EINVAL;
1254 case KERN_NO_SPACE:
1255 return ENOMEM;
1256 }
1257
1258 return EINVAL;
1259 }
1260
1261 int
mincore(__unused proc_t p,struct mincore_args * uap,__unused int32_t * retval)1262 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1263 {
1264 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1265 vm_map_t map = VM_MAP_NULL;
1266 user_addr_t vec = 0;
1267 int error = 0;
1268 int64_t lastvecindex = 0;
1269 int mincoreinfo = 0;
1270 int pqueryinfo = 0;
1271 uint64_t pqueryinfo_vec_size = 0;
1272 vm_page_info_basic_t info = NULL;
1273 mach_msg_type_number_t count = 0;
1274 char *kernel_vec = NULL;
1275 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1276 kern_return_t kr = KERN_SUCCESS;
1277 int effective_page_shift, effective_page_size;
1278
1279 map = current_map();
1280
1281 /*
1282 * On systems with 4k kernel space and 16k user space, we will
1283 * use the kernel page size to report back the residency information.
1284 * This is for backwards compatibility since we already have
1285 * processes that depend on this behavior.
1286 */
1287 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1288 effective_page_shift = vm_map_page_shift(map);
1289 effective_page_size = vm_map_page_size(map);
1290 } else {
1291 effective_page_shift = PAGE_SHIFT;
1292 effective_page_size = PAGE_SIZE;
1293 }
1294
1295 /*
1296 * Make sure that the addresses presented are valid for user
1297 * mode.
1298 */
1299 first_addr = addr = vm_map_trunc_page(uap->addr,
1300 vm_map_page_mask(map));
1301 end = vm_map_round_page(uap->addr + uap->len,
1302 vm_map_page_mask(map));
1303
1304 if (end < addr) {
1305 return EINVAL;
1306 }
1307
1308 if (end == addr) {
1309 return 0;
1310 }
1311
1312 /*
1313 * We are going to loop through the whole 'req_vec_size' pages
1314 * range in chunks of 'cur_vec_size'.
1315 */
1316
1317 req_vec_size_pages = (end - addr) >> effective_page_shift;
1318 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1319 size_t kernel_vec_size = cur_vec_size_pages;
1320
1321 kernel_vec = (char *)kalloc_data(kernel_vec_size, Z_WAITOK | Z_ZERO);
1322
1323 if (kernel_vec == NULL) {
1324 return ENOMEM;
1325 }
1326
1327 /*
1328 * Address of byte vector
1329 */
1330 vec = uap->vec;
1331
1332 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1333
1334 info = (struct vm_page_info_basic *)kalloc_data(pqueryinfo_vec_size, Z_WAITOK);
1335
1336 if (info == NULL) {
1337 kfree_data(kernel_vec, kernel_vec_size);
1338 return ENOMEM;
1339 }
1340
1341 while (addr < end) {
1342 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1343
1344 count = VM_PAGE_INFO_BASIC_COUNT;
1345 kr = vm_map_page_range_info_internal(map,
1346 addr,
1347 cur_end,
1348 effective_page_shift,
1349 VM_PAGE_INFO_BASIC,
1350 (vm_page_info_t) info,
1351 &count);
1352
1353 assert(kr == KERN_SUCCESS);
1354
1355 /*
1356 * Do this on a map entry basis so that if the pages are not
1357 * in the current processes address space, we can easily look
1358 * up the pages elsewhere.
1359 */
1360 lastvecindex = -1;
1361
1362 for (; addr < cur_end; addr += effective_page_size) {
1363 pqueryinfo = info[lastvecindex + 1].disposition;
1364
1365 mincoreinfo = 0;
1366
1367 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1368 mincoreinfo |= MINCORE_INCORE;
1369 }
1370 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1371 mincoreinfo |= MINCORE_REFERENCED;
1372 }
1373 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1374 mincoreinfo |= MINCORE_MODIFIED;
1375 }
1376 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1377 mincoreinfo |= MINCORE_PAGED_OUT;
1378 }
1379 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1380 mincoreinfo |= MINCORE_COPIED;
1381 }
1382 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1383 mincoreinfo |= MINCORE_ANONYMOUS;
1384 }
1385 /*
1386 * calculate index into user supplied byte vector
1387 */
1388 vecindex = (addr - first_addr) >> effective_page_shift;
1389 kernel_vec[vecindex] = (char)mincoreinfo;
1390 lastvecindex = vecindex;
1391 }
1392
1393
1394 assert(vecindex == (cur_vec_size_pages - 1));
1395
1396 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1397
1398 if (error) {
1399 break;
1400 }
1401
1402 /*
1403 * For the next chunk, we'll need:
1404 * - bump the location in the user buffer for our next disposition.
1405 * - new length
1406 * - starting address
1407 */
1408 vec += cur_vec_size_pages * sizeof(char);
1409 req_vec_size_pages = (end - addr) >> effective_page_shift;
1410 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1411
1412 first_addr = addr;
1413 }
1414
1415 kfree_data(info, pqueryinfo_vec_size);
1416 kfree_data(kernel_vec, kernel_vec_size);
1417
1418 if (error) {
1419 return EFAULT;
1420 }
1421
1422 return 0;
1423 }
1424
1425 int
mlock(__unused proc_t p,struct mlock_args * uap,__unused int32_t * retvalval)1426 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1427 {
1428 vm_map_t user_map;
1429 vm_map_offset_t addr;
1430 vm_map_size_t size, pageoff;
1431 kern_return_t result;
1432
1433 AUDIT_ARG(addr, uap->addr);
1434 AUDIT_ARG(len, uap->len);
1435
1436 addr = (vm_map_offset_t) uap->addr;
1437 size = (vm_map_size_t)uap->len;
1438
1439 if (vm_map_range_overflows(addr, size)) {
1440 return EINVAL;
1441 }
1442
1443 if (size == 0) {
1444 return 0;
1445 }
1446
1447 user_map = current_map();
1448 pageoff = (addr & vm_map_page_mask(user_map));
1449 addr -= pageoff;
1450 size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1451
1452 /* have to call vm_map_wire directly to pass "I don't know" protections */
1453 result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1454
1455 if (result == KERN_RESOURCE_SHORTAGE) {
1456 return EAGAIN;
1457 } else if (result == KERN_PROTECTION_FAILURE) {
1458 return EACCES;
1459 } else if (result != KERN_SUCCESS) {
1460 return ENOMEM;
1461 }
1462
1463 return 0; /* KERN_SUCCESS */
1464 }
1465
1466 int
munlock(__unused proc_t p,struct munlock_args * uap,__unused int32_t * retval)1467 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1468 {
1469 mach_vm_offset_t addr;
1470 mach_vm_size_t size;
1471 vm_map_t user_map;
1472 kern_return_t result;
1473
1474 AUDIT_ARG(addr, uap->addr);
1475 AUDIT_ARG(len, uap->len);
1476
1477 addr = (mach_vm_offset_t) uap->addr;
1478 size = (mach_vm_size_t)uap->len;
1479 user_map = current_map();
1480 if (mach_vm_range_overflows(addr, size)) {
1481 return EINVAL;
1482 }
1483 /* JMM - need to remove all wirings by spec - this just removes one */
1484 result = mach_vm_wire_kernel(host_priv_self(), user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1485 return result == KERN_SUCCESS ? 0 : ENOMEM;
1486 }
1487
1488
1489 int
mlockall(__unused proc_t p,__unused struct mlockall_args * uap,__unused int32_t * retval)1490 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1491 {
1492 return ENOSYS;
1493 }
1494
1495 int
munlockall(__unused proc_t p,__unused struct munlockall_args * uap,__unused int32_t * retval)1496 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1497 {
1498 return ENOSYS;
1499 }
1500
1501 #if CONFIG_CODE_DECRYPTION
1502 int
mremap_encrypted(__unused struct proc * p,struct mremap_encrypted_args * uap,__unused int32_t * retval)1503 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1504 {
1505 mach_vm_offset_t user_addr;
1506 mach_vm_size_t user_size;
1507 kern_return_t result;
1508 vm_map_t user_map;
1509 uint32_t cryptid;
1510 cpu_type_t cputype;
1511 cpu_subtype_t cpusubtype;
1512 pager_crypt_info_t crypt_info;
1513 const char * cryptname = 0;
1514 char *vpath;
1515 int len, ret;
1516 struct proc_regioninfo_internal pinfo;
1517 vnode_t vp;
1518 uintptr_t vnodeaddr;
1519 uint32_t vid;
1520
1521 AUDIT_ARG(addr, uap->addr);
1522 AUDIT_ARG(len, uap->len);
1523
1524 user_map = current_map();
1525 user_addr = (mach_vm_offset_t) uap->addr;
1526 user_size = (mach_vm_size_t) uap->len;
1527
1528 cryptid = uap->cryptid;
1529 cputype = uap->cputype;
1530 cpusubtype = uap->cpusubtype;
1531
1532 if (mach_vm_range_overflows(user_addr, user_size)) {
1533 return EINVAL;
1534 }
1535 if (user_addr & vm_map_page_mask(user_map)) {
1536 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1537 return EINVAL;
1538 }
1539
1540 switch (cryptid) {
1541 case CRYPTID_NO_ENCRYPTION:
1542 /* not encrypted, just an empty load command */
1543 return 0;
1544 case CRYPTID_APP_ENCRYPTION:
1545 case CRYPTID_MODEL_ENCRYPTION:
1546 cryptname = "com.apple.unfree";
1547 break;
1548 case 0x10:
1549 /* some random cryptid that you could manually put into
1550 * your binary if you want NULL */
1551 cryptname = "com.apple.null";
1552 break;
1553 default:
1554 return EINVAL;
1555 }
1556
1557 if (NULL == text_crypter_create) {
1558 return ENOTSUP;
1559 }
1560
1561 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1562 if (ret == 0 || !vnodeaddr) {
1563 /* No really, this returns 0 if the memory address is not backed by a file */
1564 return EINVAL;
1565 }
1566
1567 vp = (vnode_t)vnodeaddr;
1568 if ((vnode_getwithvid(vp, vid)) == 0) {
1569 vpath = zalloc(ZV_NAMEI);
1570
1571 len = MAXPATHLEN;
1572 ret = vn_getpath(vp, vpath, &len);
1573 if (ret) {
1574 zfree(ZV_NAMEI, vpath);
1575 vnode_put(vp);
1576 return ret;
1577 }
1578
1579 vnode_put(vp);
1580 } else {
1581 return EINVAL;
1582 }
1583
1584 #if 0
1585 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1586 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1587 #endif
1588
1589 if (user_size == 0) {
1590 printf("%s:%d '%s': user_addr 0x%llx user_size 0x%llx cryptid 0x%x ignored\n", __FUNCTION__, __LINE__, vpath, user_addr, user_size, cryptid);
1591 zfree(ZV_NAMEI, vpath);
1592 return 0;
1593 }
1594
1595 /* set up decrypter first */
1596 crypt_file_data_t crypt_data = {
1597 .filename = vpath,
1598 .cputype = cputype,
1599 .cpusubtype = cpusubtype
1600 };
1601 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1602 #if VM_MAP_DEBUG_APPLE_PROTECT
1603 if (vm_map_debug_apple_protect) {
1604 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1605 proc_getpid(p), p->p_comm,
1606 user_map,
1607 (uint64_t) user_addr,
1608 (uint64_t) (user_addr + user_size),
1609 __FUNCTION__, vpath, result);
1610 }
1611 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1612 zfree(ZV_NAMEI, vpath);
1613
1614 if (result) {
1615 printf("%s: unable to create decrypter %s, kr=%d\n",
1616 __FUNCTION__, cryptname, result);
1617 if (result == kIOReturnNotPrivileged) {
1618 /* text encryption returned decryption failure */
1619 return EPERM;
1620 } else {
1621 return ENOMEM;
1622 }
1623 }
1624
1625 /* now remap using the decrypter */
1626 vm_object_offset_t crypto_backing_offset;
1627 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1628 result = vm_map_apple_protected(user_map,
1629 user_addr,
1630 user_addr + user_size,
1631 crypto_backing_offset,
1632 &crypt_info,
1633 cryptid);
1634 if (result) {
1635 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1636 }
1637
1638 if (result) {
1639 return EPERM;
1640 }
1641 return 0;
1642 }
1643 #endif /* CONFIG_CODE_DECRYPTION */
1644