xref: /xnu-8019.80.24/bsd/kern/kern_core.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1991 NeXT Computer, Inc.  All rights reserved.
29  *
30  *	File:	bsd/kern/kern_core.c
31  *
32  *	This file contains machine independent code for performing core dumps.
33  *
34  */
35 #if CONFIG_COREDUMP
36 
37 #include <mach/vm_param.h>
38 #include <mach/thread_status.h>
39 #include <sys/content_protection.h>
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/resourcevar.h>
44 #include <sys/namei.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/proc_internal.h>
47 #include <sys/kauth.h>
48 #include <sys/timeb.h>
49 #include <sys/times.h>
50 #include <sys/acct.h>
51 #include <sys/file_internal.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 
56 #include <mach-o/loader.h>
57 #include <mach/vm_region.h>
58 #include <mach/vm_statistics.h>
59 
60 #include <vm/vm_kern.h>
61 #include <vm/vm_protos.h> /* last */
62 #include <vm/vm_map.h>          /* current_map() */
63 #include <mach/mach_vm.h>       /* mach_vm_region_recurse() */
64 #include <mach/task.h>          /* task_suspend() */
65 #include <kern/task.h>          /* get_task_numacts() */
66 
67 #include <security/audit/audit.h>
68 
69 #if CONFIG_MACF
70 #include <security/mac_framework.h>
71 #endif /* CONFIG_MACF */
72 
73 typedef struct {
74 	int     flavor;                 /* the number for this flavor */
75 	mach_msg_type_number_t  count;  /* count of ints in this flavor */
76 } mythread_state_flavor_t;
77 
78 #if defined (__i386__) || defined (__x86_64__)
79 mythread_state_flavor_t thread_flavor_array[] = {
80 	{x86_THREAD_STATE, x86_THREAD_STATE_COUNT},
81 	{x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT},
82 	{x86_EXCEPTION_STATE, x86_EXCEPTION_STATE_COUNT},
83 };
84 int mynum_flavors = 3;
85 #elif defined (__arm__)
86 mythread_state_flavor_t thread_flavor_array[] = {
87 	{ARM_THREAD_STATE, ARM_THREAD_STATE_COUNT},
88 	{ARM_VFP_STATE, ARM_VFP_STATE_COUNT},
89 	{ARM_EXCEPTION_STATE, ARM_EXCEPTION_STATE_COUNT}
90 };
91 int mynum_flavors = 3;
92 
93 #elif defined (__arm64__)
94 mythread_state_flavor_t thread_flavor_array[] = {
95 	{ARM_THREAD_STATE64, ARM_THREAD_STATE64_COUNT},
96 	/* ARM64_TODO: VFP */
97 	{ARM_EXCEPTION_STATE64, ARM_EXCEPTION_STATE64_COUNT}
98 };
99 int mynum_flavors = 2;
100 #else
101 #error architecture not supported
102 #endif
103 
104 
105 typedef struct {
106 	vm_offset_t header;
107 	size_t hoffset;
108 	mythread_state_flavor_t *flavors;
109 	size_t tstate_size;
110 	size_t flavor_count;
111 } tir_t;
112 
113 extern int freespace_mb(vnode_t vp);
114 
115 /* XXX not in a Mach header anywhere */
116 kern_return_t thread_getstatus(thread_t act, int flavor,
117     thread_state_t tstate, mach_msg_type_number_t *count);
118 void task_act_iterate_wth_args(task_t, void (*)(thread_t, void *), void *);
119 
120 #ifdef SECURE_KERNEL
121 __XNU_PRIVATE_EXTERN int do_coredump = 0;       /* default: don't dump cores */
122 #else
123 __XNU_PRIVATE_EXTERN int do_coredump = 1;       /* default: dump cores */
124 #endif
125 __XNU_PRIVATE_EXTERN int sugid_coredump = 0; /* default: but not SGUID binaries */
126 
127 
128 /* cpu_type returns only the most generic indication of the current CPU. */
129 /* in a core we want to know the kind of process. */
130 
131 static cpu_type_t
process_cpu_type(proc_t core_proc)132 process_cpu_type(proc_t core_proc)
133 {
134 	cpu_type_t what_we_think;
135 #if defined (__i386__) || defined (__x86_64__)
136 	if (IS_64BIT_PROCESS(core_proc)) {
137 		what_we_think = CPU_TYPE_X86_64;
138 	} else {
139 		what_we_think = CPU_TYPE_I386;
140 	}
141 #elif defined (__arm__) || defined(__arm64__)
142 	if (IS_64BIT_PROCESS(core_proc)) {
143 		what_we_think = CPU_TYPE_ARM64;
144 	} else {
145 		what_we_think = CPU_TYPE_ARM;
146 	}
147 #endif
148 
149 	return what_we_think;
150 }
151 
152 static cpu_type_t
process_cpu_subtype(proc_t core_proc)153 process_cpu_subtype(proc_t core_proc)
154 {
155 	cpu_type_t what_we_think;
156 #if defined (__i386__) || defined (__x86_64__)
157 	if (IS_64BIT_PROCESS(core_proc)) {
158 		what_we_think = CPU_SUBTYPE_X86_64_ALL;
159 	} else {
160 		what_we_think = CPU_SUBTYPE_I386_ALL;
161 	}
162 #elif defined (__arm__) || defined(__arm64__)
163 	if (IS_64BIT_PROCESS(core_proc)) {
164 		what_we_think = CPU_SUBTYPE_ARM64_ALL;
165 	} else {
166 		what_we_think = CPU_SUBTYPE_ARM_ALL;
167 	}
168 #endif
169 	return what_we_think;
170 }
171 
172 static void
collectth_state(thread_t th_act,void * tirp)173 collectth_state(thread_t th_act, void *tirp)
174 {
175 	vm_offset_t     header;
176 	size_t  hoffset, i;
177 	mythread_state_flavor_t *flavors;
178 	struct thread_command   *tc;
179 	tir_t *t = (tir_t *)tirp;
180 
181 	/*
182 	 *	Fill in thread command structure.
183 	 */
184 	header = t->header;
185 	hoffset = t->hoffset;
186 	flavors = t->flavors;
187 
188 	tc = (struct thread_command *) (header + hoffset);
189 	tc->cmd = LC_THREAD;
190 	tc->cmdsize = (uint32_t)(sizeof(struct thread_command)
191 	    + t->tstate_size);
192 	hoffset += sizeof(struct thread_command);
193 	/*
194 	 * Follow with a struct thread_state_flavor and
195 	 * the appropriate thread state struct for each
196 	 * thread state flavor.
197 	 */
198 	for (i = 0; i < t->flavor_count; i++) {
199 		*(mythread_state_flavor_t *)(header + hoffset) =
200 		    flavors[i];
201 		hoffset += sizeof(mythread_state_flavor_t);
202 		thread_getstatus(th_act, flavors[i].flavor,
203 		    (thread_state_t)(header + hoffset),
204 		    &flavors[i].count);
205 		hoffset += flavors[i].count * sizeof(int);
206 	}
207 
208 	t->hoffset = hoffset;
209 }
210 
211 /*
212  * coredump
213  *
214  * Description:	Create a core image on the file "core" for the process
215  *		indicated
216  *
217  * Parameters:	core_proc			Process to dump core [*]
218  *				reserve_mb			If non-zero, leave filesystem with
219  *									at least this much free space.
220  *				coredump_flags	Extra options (ignore rlimit, run fsync)
221  *
222  * Returns:	0				Success
223  *		!0				Failure errno
224  *
225  * IMPORTANT:	This function can only be called on the current process, due
226  *		to assumptions below; see variable declaration section for
227  *		details.
228  */
229 #define MAX_TSTATE_FLAVORS      10
230 int
coredump(proc_t core_proc,uint32_t reserve_mb,int coredump_flags)231 coredump(proc_t core_proc, uint32_t reserve_mb, int coredump_flags)
232 {
233 /* Begin assumptions that limit us to only the current process */
234 	vfs_context_t ctx = vfs_context_current();
235 	vm_map_t        map = current_map();
236 	task_t          task = current_task();
237 /* End assumptions */
238 	kauth_cred_t cred = vfs_context_ucred(ctx);
239 	int error = 0;
240 	struct vnode_attr va;
241 	size_t          thread_count, segment_count;
242 	size_t          command_size, header_size, tstate_size;
243 	size_t          hoffset;
244 	off_t           foffset;
245 	mach_vm_offset_t vmoffset;
246 	vm_offset_t     header;
247 	mach_vm_size_t  vmsize;
248 	vm_prot_t       prot;
249 	vm_prot_t       maxprot;
250 	int             error1 = 0;
251 	char            stack_name[MAXCOMLEN + 6];
252 	char            *alloced_name = NULL;
253 	char            *name = NULL;
254 	mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
255 	vm_size_t       mapsize;
256 	size_t          i;
257 	uint32_t nesting_depth = 0;
258 	kern_return_t   kret;
259 	struct vm_region_submap_info_64 vbr;
260 	mach_msg_type_number_t vbrcount = 0;
261 	tir_t tir1;
262 	struct vnode * vp;
263 	struct mach_header      *mh = NULL;     /* protected by is_64 */
264 	struct mach_header_64   *mh64 = NULL;   /* protected by is_64 */
265 	int             is_64 = 0;
266 	size_t          mach_header_sz = sizeof(struct mach_header);
267 	size_t          segment_command_sz = sizeof(struct segment_command);
268 
269 	if (current_proc() != core_proc) {
270 		panic("coredump() called against proc that is not current_proc: %p", core_proc);
271 	}
272 
273 	if (do_coredump == 0 ||         /* Not dumping at all */
274 	    ((sugid_coredump == 0) &&   /* Not dumping SUID/SGID binaries */
275 	    ((kauth_cred_getsvuid(cred) != kauth_cred_getruid(cred)) ||
276 	    (kauth_cred_getsvgid(cred) != kauth_cred_getrgid(cred))))) {
277 		error = EFAULT;
278 		goto out2;
279 	}
280 
281 #if CONFIG_MACF
282 	error = mac_proc_check_dump_core(core_proc);
283 	if (error != 0) {
284 		goto out2;
285 	}
286 #endif
287 
288 	if (IS_64BIT_PROCESS(core_proc)) {
289 		is_64 = 1;
290 		mach_header_sz = sizeof(struct mach_header_64);
291 		segment_command_sz = sizeof(struct segment_command_64);
292 	}
293 
294 	mapsize = get_vmmap_size(map);
295 
296 	if (((coredump_flags & COREDUMP_IGNORE_ULIMIT) == 0) &&
297 	    (mapsize >= proc_limitgetcur(core_proc, RLIMIT_CORE))) {
298 		error = EFAULT;
299 		goto out2;
300 	}
301 
302 	(void) task_suspend_internal(task);
303 
304 	alloced_name = zalloc_flags(ZV_NAMEI, Z_NOWAIT | Z_ZERO);
305 
306 	/* create name according to sysctl'able format string */
307 	/* if name creation fails, fall back to historical behaviour... */
308 	if (alloced_name == NULL ||
309 	    proc_core_name(core_proc->p_comm, kauth_cred_getuid(cred),
310 	    proc_getpid(core_proc), alloced_name, MAXPATHLEN)) {
311 		snprintf(stack_name, sizeof(stack_name),
312 		    "/cores/core.%d", proc_getpid(core_proc));
313 		name = stack_name;
314 	} else {
315 		name = alloced_name;
316 	}
317 
318 	if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, ctx))) {
319 		goto out2;
320 	}
321 
322 	VATTR_INIT(&va);
323 	VATTR_WANTED(&va, va_nlink);
324 	/* Don't dump to non-regular files or files with links. */
325 	if (vp->v_type != VREG ||
326 	    vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) {
327 		error = EFAULT;
328 		goto out;
329 	}
330 
331 	VATTR_INIT(&va);        /* better to do it here than waste more stack in vnode_setsize */
332 	VATTR_SET(&va, va_data_size, 0);
333 	if (core_proc == initproc) {
334 		VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D);
335 	}
336 	vnode_setattr(vp, &va, ctx);
337 	core_proc->p_acflag |= ACORE;
338 
339 	if ((reserve_mb > 0) &&
340 	    ((freespace_mb(vp) - (mapsize >> 20)) < reserve_mb)) {
341 		error = ENOSPC;
342 		goto out;
343 	}
344 
345 	/*
346 	 *	If the task is modified while dumping the file
347 	 *	(e.g., changes in threads or VM, the resulting
348 	 *	file will not necessarily be correct.
349 	 */
350 
351 	thread_count = get_task_numacts(task);
352 	segment_count = get_vmmap_entries(map); /* XXX */
353 	tir1.flavor_count = sizeof(thread_flavor_array) / sizeof(mythread_state_flavor_t);
354 	bcopy(thread_flavor_array, flavors, sizeof(thread_flavor_array));
355 	tstate_size = 0;
356 	for (i = 0; i < tir1.flavor_count; i++) {
357 		tstate_size += sizeof(mythread_state_flavor_t) +
358 		    (flavors[i].count * sizeof(int));
359 	}
360 
361 	{
362 		size_t lhs;
363 		size_t rhs;
364 
365 		/* lhs = segment_count * segment_command_sz */
366 		if (os_mul_overflow(segment_count, segment_command_sz, &lhs)) {
367 			error = ENOMEM;
368 			goto out;
369 		}
370 
371 		/* rhs = (tstate_size + sizeof(struct thread_command)) * thread_count */
372 		if (os_add_and_mul_overflow(tstate_size, sizeof(struct thread_command), thread_count, &rhs)) {
373 			error = ENOMEM;
374 			goto out;
375 		}
376 
377 		/* command_size = lhs + rhs */
378 		if (os_add_overflow(lhs, rhs, &command_size)) {
379 			error = ENOMEM;
380 			goto out;
381 		}
382 	}
383 
384 	if (os_add_overflow(command_size, mach_header_sz, &header_size)) {
385 		error = ENOMEM;
386 		goto out;
387 	}
388 
389 	if (kmem_alloc_flags(kernel_map, &header, (vm_size_t)header_size, VM_KERN_MEMORY_DIAG, KMA_ZERO) != KERN_SUCCESS) {
390 		error = ENOMEM;
391 		goto out;
392 	}
393 
394 	/*
395 	 *	Set up Mach-O header.
396 	 */
397 	if (is_64) {
398 		mh64 = (struct mach_header_64 *)header;
399 		mh64->magic = MH_MAGIC_64;
400 		mh64->cputype = process_cpu_type(core_proc);
401 		mh64->cpusubtype = process_cpu_subtype(core_proc);
402 		mh64->filetype = MH_CORE;
403 		mh64->ncmds = (uint32_t)(segment_count + thread_count);
404 		mh64->sizeofcmds = (uint32_t)command_size;
405 	} else {
406 		mh = (struct mach_header *)header;
407 		mh->magic = MH_MAGIC;
408 		mh->cputype = process_cpu_type(core_proc);
409 		mh->cpusubtype = process_cpu_subtype(core_proc);
410 		mh->filetype = MH_CORE;
411 		mh->ncmds = (uint32_t)(segment_count + thread_count);
412 		mh->sizeofcmds = (uint32_t)command_size;
413 	}
414 
415 	hoffset = mach_header_sz;       /* offset into header */
416 	foffset = round_page(header_size);      /* offset into file */
417 	vmoffset = MACH_VM_MIN_ADDRESS;         /* offset into VM */
418 
419 	/*
420 	 * We use to check for an error, here, now we try and get
421 	 * as much as we can
422 	 */
423 	while (segment_count > 0) {
424 		struct segment_command          *sc;
425 		struct segment_command_64       *sc64;
426 
427 		/*
428 		 *	Get region information for next region.
429 		 */
430 
431 		while (1) {
432 			vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
433 			if ((kret = mach_vm_region_recurse(map,
434 			    &vmoffset, &vmsize, &nesting_depth,
435 			    (vm_region_recurse_info_t)&vbr,
436 			    &vbrcount)) != KERN_SUCCESS) {
437 				break;
438 			}
439 			/*
440 			 * If we get a valid mapping back, but we're dumping
441 			 * a 32 bit process,  and it's over the allowable
442 			 * address space of a 32 bit process, it's the same
443 			 * as if mach_vm_region_recurse() failed.
444 			 */
445 			if (!(is_64) &&
446 			    (vmoffset + vmsize > VM_MAX_ADDRESS)) {
447 				kret = KERN_INVALID_ADDRESS;
448 				break;
449 			}
450 			if (vbr.is_submap) {
451 				nesting_depth++;
452 				continue;
453 			} else {
454 				break;
455 			}
456 		}
457 		if (kret != KERN_SUCCESS) {
458 			break;
459 		}
460 
461 		prot = vbr.protection;
462 		maxprot = vbr.max_protection;
463 
464 		if ((prot | maxprot) == VM_PROT_NONE) {
465 			/*
466 			 * Elide unreadable (likely reserved) segments
467 			 */
468 			vmoffset += vmsize;
469 			continue;
470 		}
471 
472 		/*
473 		 * Try as hard as possible to get read access to the data.
474 		 */
475 		if ((prot & VM_PROT_READ) == 0) {
476 			mach_vm_protect(map, vmoffset, vmsize, FALSE,
477 			    prot | VM_PROT_READ);
478 		}
479 
480 		/*
481 		 * But only try and perform the write if we can read it.
482 		 */
483 		int64_t fsize = ((maxprot & VM_PROT_READ) == VM_PROT_READ
484 		    && vbr.user_tag != VM_MEMORY_IOKIT
485 		    && coredumpok(map, vmoffset)) ? vmsize : 0;
486 
487 		if (fsize) {
488 			int64_t resid = 0;
489 			const enum uio_seg sflg = IS_64BIT_PROCESS(core_proc) ?
490 			    UIO_USERSPACE64 : UIO_USERSPACE32;
491 
492 			error = vn_rdwr_64(UIO_WRITE, vp, vmoffset, fsize,
493 			    foffset, sflg, IO_NOCACHE | IO_NODELOCKED | IO_UNIT,
494 			    cred, &resid, core_proc);
495 
496 			if (error) {
497 				/*
498 				 * Mark segment as empty
499 				 */
500 				fsize = 0;
501 			} else if (resid) {
502 				/*
503 				 * Partial write. Extend the file size so
504 				 * that the segment command contains a valid
505 				 * range of offsets, possibly creating a hole.
506 				 */
507 				VATTR_INIT(&va);
508 				VATTR_SET(&va, va_data_size, foffset + fsize);
509 				vnode_setattr(vp, &va, ctx);
510 			}
511 		}
512 
513 		/*
514 		 *	Fill in segment command structure.
515 		 */
516 
517 		if (is_64) {
518 			sc64 = (struct segment_command_64 *)(header + hoffset);
519 			sc64->cmd = LC_SEGMENT_64;
520 			sc64->cmdsize = sizeof(struct segment_command_64);
521 			/* segment name is zeroed by kmem_alloc */
522 			sc64->segname[0] = 0;
523 			sc64->vmaddr = vmoffset;
524 			sc64->vmsize = vmsize;
525 			sc64->fileoff = foffset;
526 			sc64->filesize = fsize;
527 			sc64->maxprot = maxprot;
528 			sc64->initprot = prot;
529 			sc64->nsects = 0;
530 			sc64->flags = 0;
531 		} else {
532 			sc = (struct segment_command *) (header + hoffset);
533 			sc->cmd = LC_SEGMENT;
534 			sc->cmdsize = sizeof(struct segment_command);
535 			/* segment name is zeroed by kmem_alloc */
536 			sc->segname[0] = 0;
537 			sc->vmaddr = CAST_DOWN_EXPLICIT(uint32_t, vmoffset);
538 			sc->vmsize = CAST_DOWN_EXPLICIT(uint32_t, vmsize);
539 			sc->fileoff = CAST_DOWN_EXPLICIT(uint32_t, foffset); /* will never truncate */
540 			sc->filesize = CAST_DOWN_EXPLICIT(uint32_t, fsize); /* will never truncate */
541 			sc->maxprot = maxprot;
542 			sc->initprot = prot;
543 			sc->nsects = 0;
544 			sc->flags = 0;
545 		}
546 
547 		hoffset += segment_command_sz;
548 		foffset += fsize;
549 		vmoffset += vmsize;
550 		segment_count--;
551 	}
552 
553 	/*
554 	 * If there are remaining segments which have not been written
555 	 * out because break in the loop above, then they were not counted
556 	 * because they exceed the real address space of the executable
557 	 * type: remove them from the header's count.  This is OK, since
558 	 * we are allowed to have a sparse area following the segments.
559 	 */
560 	if (is_64) {
561 		mh64->ncmds -= segment_count;
562 		mh64->sizeofcmds -= segment_count * segment_command_sz;
563 	} else {
564 		mh->ncmds -= segment_count;
565 		mh->sizeofcmds -= segment_count * segment_command_sz;
566 	}
567 
568 	tir1.header = header;
569 	tir1.hoffset = hoffset;
570 	tir1.flavors = flavors;
571 	tir1.tstate_size = tstate_size;
572 	task_act_iterate_wth_args(task, collectth_state, &tir1);
573 
574 	/*
575 	 *	Write out the Mach header at the beginning of the
576 	 *	file.  OK to use a 32 bit write for this.
577 	 */
578 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, (int)MIN(header_size, INT_MAX), (off_t)0,
579 	    UIO_SYSSPACE, IO_NOCACHE | IO_NODELOCKED | IO_UNIT, cred, (int *) 0, core_proc);
580 	kmem_free(kernel_map, header, header_size);
581 
582 	if ((coredump_flags & COREDUMP_FULLFSYNC) && error == 0) {
583 		error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
584 	}
585 out:
586 	error1 = vnode_close(vp, FWRITE, ctx);
587 out2:
588 #if CONFIG_AUDIT
589 	audit_proc_coredump(core_proc, name, error);
590 #endif
591 	if (alloced_name != NULL) {
592 		zfree(ZV_NAMEI, alloced_name);
593 	}
594 	if (error == 0) {
595 		error = error1;
596 	}
597 
598 	return error;
599 }
600 
601 #else /* CONFIG_COREDUMP */
602 
603 /* When core dumps aren't needed, no need to compile this file at all */
604 
605 #error assertion failed: this section is not compiled
606 
607 #endif /* CONFIG_COREDUMP */
608