1 /*
2 * Copyright (c) 2009-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <mach_assert.h>
29
30 #include <sys/errno.h>
31 #include <i386/param.h>
32 #include <i386/misc_protos.h>
33 #include <i386/cpu_data.h>
34 #include <i386/machine_cpu.h>
35 #include <i386/machine_routines.h>
36 #include <i386/cpuid.h>
37 #include <i386/vmx.h>
38 #include <vm/pmap.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_kern.h>
41 #include <vm/vm_fault.h>
42 #include <san/kasan.h>
43
44 #include <sys/kdebug.h>
45
46 #include <kern/copyout_shim.h>
47 #include <kern/zalloc_internal.h>
48
49 #undef copyin
50 #undef copyout
51
52 static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
53 static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
54
55 /*
56 * Copy sizes bigger than this value will cause a kernel panic.
57 *
58 * Yes, this is an arbitrary fixed limit, but it's almost certainly
59 * a programming error to be copying more than this amount between
60 * user and wired kernel memory in a single invocation on this
61 * platform.
62 */
63 const int copysize_limit_panic = (64 * MB);
64
65 /*
66 * The copy engine has the following characteristics
67 * - copyio() handles copies to/from user or kernel space
68 * - copypv() deals with physical or virtual addresses
69 *
70 * Readers familiar with the 32-bit kernel will expect Joe's thesis at this
71 * point describing the full glory of the copy window implementation. In K64,
72 * however, there is no need for windowing. Thanks to the vast shared address
73 * space, the kernel has direct access to userspace and to physical memory.
74 *
75 * User virtual addresses are accessible provided the user's cr3 is loaded.
76 * Physical addresses are accessible via the direct map and the PHYSMAP_PTOV()
77 * translation.
78 *
79 * Copyin/out variants all boil done to just these 2 routines in locore.s which
80 * provide fault-recoverable copying:
81 */
82 extern int _bcopy(const void *, void *, vm_size_t);
83 extern int _bcopystr(const void *, void *, vm_size_t, vm_size_t *);
84 extern int _copyin_atomic32(const char *src, uint32_t *dst);
85 extern int _copyin_atomic64(const char *src, uint64_t *dst);
86 extern int _copyout_atomic32(const uint32_t *u32, char *src);
87 extern int _copyout_atomic64(const uint64_t *u64, char *src);
88
89 /*
90 * Types of copies:
91 */
92 #define COPYIN 0 /* from user virtual to kernel virtual */
93 #define COPYOUT 1 /* from kernel virtual to user virtual */
94 #define COPYINSTR 2 /* string variant of copyout */
95 #define COPYINPHYS 3 /* from user virtual to kernel physical */
96 #define COPYOUTPHYS 4 /* from kernel physical to user virtual */
97 #define COPYINATOMIC32 5 /* from user virtual to kernel virtual */
98 #define COPYINATOMIC64 6 /* from user virtual to kernel virtual */
99 #define COPYOUTATOMIC32 7 /* from user virtual to kernel virtual */
100 #define COPYOUTATOMIC64 8 /* from user virtual to kernel virtual */
101
102 #if ENABLE_SMAPLOG
103 typedef struct {
104 uint64_t timestamp;
105 thread_t thread;
106 uintptr_t cr4;
107 uint8_t cpuid;
108 uint8_t smap_state;
109 uint8_t copyio_active;
110 } smaplog_entry_t;
111
112 #define SMAPLOG_BUFFER_SIZE (50)
113 static smaplog_entry_t smaplog_cbuf[SMAPLOG_BUFFER_SIZE];
114 static uint32_t smaplog_head = 0;
115
116 static void
smaplog_add_entry(boolean_t enabling)117 smaplog_add_entry(boolean_t enabling)
118 {
119 uint32_t index = 0;
120 thread_t thread = current_thread();
121
122 do {
123 index = smaplog_head;
124 } while (!OSCompareAndSwap(index, (index + 1) % SMAPLOG_BUFFER_SIZE, &smaplog_head));
125
126 assert(index < SMAPLOG_BUFFER_SIZE);
127 assert(smaplog_head < SMAPLOG_BUFFER_SIZE);
128 assert(thread);
129
130 smaplog_cbuf[index].timestamp = mach_absolute_time();
131 smaplog_cbuf[index].thread = thread;
132 smaplog_cbuf[index].cpuid = cpu_number();
133 smaplog_cbuf[index].cr4 = get_cr4();
134 smaplog_cbuf[index].smap_state = enabling;
135 smaplog_cbuf[index].copyio_active = (thread->machine.specFlags & CopyIOActive) ? 1 : 0;
136 }
137 #endif /* ENABLE_SMAPLOG */
138
139 extern boolean_t pmap_smap_enabled;
140 static inline void
user_access_enable(void)141 user_access_enable(void)
142 {
143 if (pmap_smap_enabled) {
144 stac();
145 #if ENABLE_SMAPLOG
146 smaplog_add_entry(TRUE);
147 #endif
148 }
149 }
150 static inline void
user_access_disable(void)151 user_access_disable(void)
152 {
153 if (pmap_smap_enabled) {
154 clac();
155 #if ENABLE_SMAPLOG
156 smaplog_add_entry(FALSE);
157 #endif
158 }
159 }
160
161 #if COPYIO_TRACE_ENABLED
162 #define COPYIO_TRACE(x, a, b, c, d, e) KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e)
163 #else
164 #define COPYIO_TRACE(x, a, b, c, d, e) do { } while(0)
165 #endif
166
167 static int
copyio(int copy_type,user_addr_t user_addr,char * kernel_addr,vm_size_t nbytes,vm_size_t * lencopied,int use_kernel_map)168 copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
169 vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
170 {
171 thread_t thread = current_thread();
172 pmap_t pmap;
173 vm_size_t bytes_copied;
174 int error = 0;
175 boolean_t istate = FALSE;
176 boolean_t recursive_CopyIOActive;
177 #if COPYIO_TRACE_ENABLED
178 int debug_type = 0xeff70010;
179 debug_type += (copy_type << 2);
180 #endif
181
182 if (__improbable(nbytes > copysize_limit_panic)) {
183 error = EINVAL;
184 goto out;
185 }
186
187 COPYIO_TRACE(debug_type | DBG_FUNC_START,
188 user_addr, kernel_addr, nbytes, use_kernel_map, 0);
189
190 if (__improbable(nbytes == 0)) {
191 goto out;
192 }
193
194 pmap = thread->map->pmap;
195 boolean_t nopagezero = thread->map->pmap->pagezero_accessible;
196
197 if ((copy_type != COPYINPHYS) && (copy_type != COPYOUTPHYS)) {
198 if (__improbable((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS)) {
199 panic("Invalid copy parameter, copy type: %d, kernel address: %p", copy_type, kernel_addr);
200 }
201 zone_element_bounds_check((vm_offset_t)kernel_addr, nbytes);
202 }
203
204 /* Sanity and security check for addresses to/from a user */
205
206 if (__improbable(((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
207 ((nbytes && (user_addr + nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map))))) {
208 error = EFAULT;
209 goto out;
210 }
211
212 if (copy_type >= COPYINATOMIC32 && copy_type <= COPYOUTATOMIC64) {
213 if (__improbable(pmap == kernel_pmap)) {
214 error = EFAULT;
215 goto out;
216 }
217 }
218
219 #if KASAN
220 switch (copy_type) {
221 case COPYIN:
222 case COPYINSTR:
223 case COPYINATOMIC32:
224 case COPYINATOMIC64:
225 __asan_storeN((uptr)kernel_addr, nbytes);
226 break;
227 case COPYOUT:
228 case COPYOUTATOMIC32:
229 case COPYOUTATOMIC64:
230 __asan_loadN((uptr)kernel_addr, nbytes);
231 break;
232 }
233 #endif
234
235 /*
236 * If the no_shared_cr3 boot-arg is set (true), the kernel runs on
237 * its own pmap and cr3 rather than the user's -- so that wild accesses
238 * from kernel or kexts can be trapped. So, during copyin and copyout,
239 * we need to switch back to the user's map/cr3. The thread is flagged
240 * "CopyIOActive" at this time so that if the thread is pre-empted,
241 * we will later restore the correct cr3.
242 */
243 recursive_CopyIOActive = thread->machine.specFlags & CopyIOActive;
244
245 boolean_t pdswitch = no_shared_cr3 || nopagezero;
246
247 if (__improbable(pdswitch)) {
248 istate = ml_set_interrupts_enabled(FALSE);
249 if (nopagezero && pmap_pcid_ncpus) {
250 pmap_pcid_activate(pmap, cpu_number(), TRUE, TRUE);
251 } else if (get_cr3_base() != pmap->pm_cr3) {
252 set_cr3_raw(pmap->pm_cr3);
253 }
254 thread->machine.specFlags |= CopyIOActive;
255 } else {
256 thread->machine.specFlags |= CopyIOActive;
257 }
258
259 user_access_enable();
260
261 #if DEVELOPMENT || DEBUG
262 /*
263 * Ensure that we're running on the target thread's cr3.
264 */
265 if ((pmap != kernel_pmap) && !use_kernel_map &&
266 (get_cr3_base() != pmap->pm_cr3)) {
267 panic("copyio(%d,%p,%p,%ld,%p,%d) cr3 is %p expects %p",
268 copy_type, (void *)user_addr, kernel_addr, nbytes, lencopied, use_kernel_map,
269 (void *) get_cr3_raw(), (void *) pmap->pm_cr3);
270 }
271 #endif
272
273 if (__improbable(pdswitch)) {
274 (void) ml_set_interrupts_enabled(istate);
275 }
276
277 COPYIO_TRACE(0xeff70044 | DBG_FUNC_NONE, user_addr,
278 kernel_addr, nbytes, 0, 0);
279
280 switch (copy_type) {
281 case COPYIN:
282 error = _bcopy((const void *) user_addr,
283 kernel_addr,
284 nbytes);
285 break;
286
287 case COPYOUT:
288 error = _bcopy(kernel_addr,
289 (void *) user_addr,
290 nbytes);
291 break;
292
293 case COPYINPHYS:
294 error = _bcopy((const void *) user_addr,
295 PHYSMAP_PTOV(kernel_addr),
296 nbytes);
297 break;
298
299 case COPYOUTPHYS:
300 error = _bcopy((const void *) PHYSMAP_PTOV(kernel_addr),
301 (void *) user_addr,
302 nbytes);
303 break;
304
305 case COPYINATOMIC32:
306 error = _copyin_atomic32((const void *) user_addr,
307 (void *) kernel_addr);
308 break;
309
310 case COPYINATOMIC64:
311 error = _copyin_atomic64((const void *) user_addr,
312 (void *) kernel_addr);
313 break;
314
315 case COPYOUTATOMIC32:
316 error = _copyout_atomic32((const void *) kernel_addr,
317 (void *) user_addr);
318 break;
319
320 case COPYOUTATOMIC64:
321 error = _copyout_atomic64((const void *) kernel_addr,
322 (void *) user_addr);
323 break;
324
325 case COPYINSTR:
326 error = _bcopystr((const void *) user_addr,
327 kernel_addr,
328 (int) nbytes,
329 &bytes_copied);
330
331 /*
332 * lencopied should be updated on success
333 * or ENAMETOOLONG... but not EFAULT
334 */
335 if (error != EFAULT) {
336 *lencopied = bytes_copied;
337 }
338
339 if (error) {
340 #if KDEBUG
341 nbytes = *lencopied;
342 #endif
343 break;
344 }
345 if (*(kernel_addr + bytes_copied - 1) == 0) {
346 /*
347 * we found a NULL terminator... we're done
348 */
349 #if KDEBUG
350 nbytes = *lencopied;
351 #endif
352 break;
353 } else {
354 /*
355 * no more room in the buffer and we haven't
356 * yet come across a NULL terminator
357 */
358 #if KDEBUG
359 nbytes = *lencopied;
360 #endif
361 error = ENAMETOOLONG;
362 break;
363 }
364 }
365
366 user_access_disable();
367
368 if (__improbable(pdswitch)) {
369 istate = ml_set_interrupts_enabled(FALSE);
370 if (!recursive_CopyIOActive && (get_cr3_raw() != kernel_pmap->pm_cr3)) {
371 if (nopagezero && pmap_pcid_ncpus) {
372 pmap_pcid_activate(pmap, cpu_number(), TRUE, FALSE);
373 } else {
374 set_cr3_raw(kernel_pmap->pm_cr3);
375 }
376 }
377
378 if (!recursive_CopyIOActive) {
379 thread->machine.specFlags &= ~CopyIOActive;
380 }
381 (void) ml_set_interrupts_enabled(istate);
382 } else if (!recursive_CopyIOActive) {
383 thread->machine.specFlags &= ~CopyIOActive;
384 }
385
386 out:
387 COPYIO_TRACE(debug_type | DBG_FUNC_END, user_addr, kernel_addr, nbytes, error, 0);
388
389 return error;
390 }
391
392
393 static int
copyio_phys(addr64_t source,addr64_t sink,vm_size_t csize,int which)394 copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
395 {
396 char *paddr;
397 user_addr_t vaddr;
398 int ctype;
399
400 if (which & cppvPsnk) {
401 paddr = (char *)sink;
402 vaddr = (user_addr_t)source;
403 ctype = COPYINPHYS;
404 } else {
405 paddr = (char *)source;
406 vaddr = (user_addr_t)sink;
407 ctype = COPYOUTPHYS;
408 CALL_COPYOUT_SHIM_PHYS((void *)PHYSMAP_PTOV(source), sink, csize)
409 }
410 return copyio(ctype, vaddr, paddr, csize, NULL, which & cppvKmap);
411 }
412
413 int
copyinmsg(const user_addr_t user_addr,void * kernel_addr,mach_msg_size_t nbytes)414 copyinmsg(const user_addr_t user_addr, void *kernel_addr, mach_msg_size_t nbytes)
415 {
416 return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
417 }
418
419 int
copyin(const user_addr_t user_addr,void * kernel_addr,vm_size_t nbytes)420 copyin(const user_addr_t user_addr, void *kernel_addr, vm_size_t nbytes)
421 {
422 return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
423 }
424
425 /*
426 * copy{in,out}_atomic{32,64}
427 * Read or store an aligned value from userspace as a single memory transaction.
428 * These functions support userspace synchronization features
429 */
430 int
copyin_atomic32(const user_addr_t user_addr,uint32_t * kernel_addr)431 copyin_atomic32(const user_addr_t user_addr, uint32_t *kernel_addr)
432 {
433 /* Test alignment */
434 if (user_addr & 3) {
435 return EINVAL;
436 }
437 return copyio(COPYINATOMIC32, user_addr, (char *)(uintptr_t)kernel_addr, 4, NULL, 0);
438 }
439
440 int
copyin_atomic32_wait_if_equals(const user_addr_t user_addr,uint32_t value)441 copyin_atomic32_wait_if_equals(const user_addr_t user_addr, uint32_t value)
442 {
443 uint32_t u32;
444 int result = copyin_atomic32(user_addr, &u32);
445 if (__improbable(result)) {
446 return result;
447 }
448 if (u32 != value) {
449 return ESTALE;
450 }
451 cpu_pause();
452 return 0;
453 }
454
455 int
copyin_atomic64(const user_addr_t user_addr,uint64_t * kernel_addr)456 copyin_atomic64(const user_addr_t user_addr, uint64_t *kernel_addr)
457 {
458 /* Test alignment */
459 if (user_addr & 7) {
460 return EINVAL;
461 }
462 return copyio(COPYINATOMIC64, user_addr, (char *)(uintptr_t)kernel_addr, 8, NULL, 0);
463 }
464
465 int
copyout_atomic32(uint32_t value,user_addr_t user_addr)466 copyout_atomic32(uint32_t value, user_addr_t user_addr)
467 {
468 /* Test alignment */
469 if (user_addr & 3) {
470 return EINVAL;
471 }
472 return copyio(COPYOUTATOMIC32, user_addr, (char *)&value, 4, NULL, 0);
473 }
474
475 int
copyout_atomic64(uint64_t value,user_addr_t user_addr)476 copyout_atomic64(uint64_t value, user_addr_t user_addr)
477 {
478 /* Test alignment */
479 if (user_addr & 7) {
480 return EINVAL;
481 }
482 return copyio(COPYOUTATOMIC64, user_addr, (char *)&value, 8, NULL, 0);
483 }
484
485 int
copyinstr(const user_addr_t user_addr,char * kernel_addr,vm_size_t nbytes,vm_size_t * lencopied)486 copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
487 {
488 *lencopied = 0;
489
490 return copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0);
491 }
492
493 int
copyoutmsg(const void * kernel_addr,user_addr_t user_addr,mach_msg_size_t nbytes)494 copyoutmsg(const void *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes)
495 {
496 CALL_COPYOUT_SHIM_MSG(kernel_addr, user_addr, (vm_size_t)nbytes)
497 return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
498 }
499
500 int
copyout(const void * kernel_addr,user_addr_t user_addr,vm_size_t nbytes)501 copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
502 {
503 CALL_COPYOUT_SHIM_NRML(kernel_addr, user_addr, nbytes)
504 return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
505 }
506
507 #if (DEBUG || DEVELOPMENT)
508 int
verify_write(const void * source,void * dst,size_t size)509 verify_write(const void *source, void *dst, size_t size)
510 {
511 int rc;
512 disable_preemption();
513 rc = _bcopy((const char*)source, (char*)dst, size);
514 enable_preemption();
515 return rc;
516 }
517 #endif
518
519 kern_return_t
copypv(addr64_t src64,addr64_t snk64,unsigned int size,int which)520 copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
521 {
522 unsigned int lop, csize;
523 int bothphys = 0;
524
525 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
526 (unsigned)snk64, size, which, 0);
527
528 if ((which & (cppvPsrc | cppvPsnk)) == 0) { /* Make sure that only one is virtual */
529 panic("copypv: no more than 1 parameter may be virtual"); /* Not allowed */
530 }
531 if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk)) {
532 bothphys = 1; /* both are physical */
533 }
534 while (size) {
535 if (bothphys) {
536 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */
537
538 if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)))) {
539 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */
540 }
541 } else {
542 /*
543 * only need to compute the resid for the physical page
544 * address... we don't care about where we start/finish in
545 * the virtual since we just call the normal copyin/copyout
546 */
547 if (which & cppvPsrc) {
548 lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
549 } else {
550 lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
551 }
552 }
553 csize = size; /* Assume we can copy it all */
554 if (lop < size) {
555 csize = lop; /* Nope, we can't do it all */
556 }
557 #if 0
558 /*
559 * flush_dcache64 is currently a nop on the i386...
560 * it's used when copying to non-system memory such
561 * as video capture cards... on PPC there was a need
562 * to flush due to how we mapped this memory... not
563 * sure if it's needed on i386.
564 */
565 if (which & cppvFsrc) {
566 flush_dcache64(src64, csize, 1); /* If requested, flush source before move */
567 }
568 if (which & cppvFsnk) {
569 flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */
570 }
571 #endif
572 if (bothphys) {
573 bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */
574 } else {
575 if (copyio_phys(src64, snk64, csize, which)) {
576 return KERN_FAILURE;
577 }
578 }
579 #if 0
580 if (which & cppvFsrc) {
581 flush_dcache64(src64, csize, 1); /* If requested, flush source after move */
582 }
583 if (which & cppvFsnk) {
584 flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */
585 }
586 #endif
587 size -= csize; /* Calculate what is left */
588 snk64 += csize; /* Bump sink to next physical address */
589 src64 += csize; /* Bump source to next physical address */
590 }
591 KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
592 (unsigned)snk64, size, which, 0);
593
594 return KERN_SUCCESS;
595 }
596