xref: /xnu-12377.41.6/bsd/kern/kern_core.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2025 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1991 NeXT Computer, Inc.  All rights reserved.
29  *
30  *	File:	bsd/kern/kern_core.c
31  *
32  *	This file contains machine independent code for performing core dumps.
33  *
34  */
35 #if CONFIG_COREDUMP || CONFIG_UCOREDUMP
36 
37 #include <mach/vm_param.h>
38 #include <mach/thread_status.h>
39 #include <sys/content_protection.h>
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/resourcevar.h>
44 #include <sys/namei.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/proc_internal.h>
47 #include <sys/kauth.h>
48 #include <sys/timeb.h>
49 #include <sys/times.h>
50 #include <sys/acct.h>
51 #include <sys/file_internal.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 
56 #include <mach-o/loader.h>
57 #include <mach/vm_region.h>
58 #include <mach/vm_statistics.h>
59 
60 #include <IOKit/IOBSD.h>
61 
62 #include <vm/vm_kern_xnu.h>
63 #include <vm/vm_protos.h> /* last */
64 #include <vm/vm_map_xnu.h>          /* current_map() */
65 #include <vm/pmap.h>            /* pmap_user_va_bits() */
66 #include <mach/mach_vm.h>       /* mach_vm_region_recurse() */
67 #include <mach/task.h>          /* task_suspend() */
68 #include <kern/task.h>          /* get_task_numacts() */
69 
70 #include <security/audit/audit.h>
71 
72 #if CONFIG_MACF
73 #include <security/mac_framework.h>
74 #endif /* CONFIG_MACF */
75 
76 #if HAS_MTE
77 #include <arm64/mte_xnu.h>
78 #endif /* HAS_MTE */
79 
80 #include <kdp/core_notes.h>
81 
82 extern int freespace_mb(vnode_t vp);
83 
84 /* XXX not in a Mach header anywhere */
85 kern_return_t thread_getstatus(thread_t act, int flavor,
86     thread_state_t tstate, mach_msg_type_number_t *count);
87 void task_act_iterate_wth_args(task_t, void (*)(thread_t, void *), void *);
88 
89 #ifdef SECURE_KERNEL
90 __XNU_PRIVATE_EXTERN int do_coredump = 0;       /* default: don't dump cores */
91 #else
92 __XNU_PRIVATE_EXTERN int do_coredump = 1;       /* default: dump cores */
93 #endif /* SECURE_KERNEL */
94 __XNU_PRIVATE_EXTERN int sugid_coredump = 0; /* default: but not SGUID binaries */
95 
96 #if CONFIG_UCOREDUMP
97 __XNU_PRIVATE_EXTERN int do_ucoredump = 0;   /* default: kernel does dumps */
98 #endif
99 
100 /*
101  * is_coredump_eligible
102  *
103  * Determine if a core should even be dumped at all (by any mechanism)
104  *
105  * Does NOT include disk permission or space constraints
106  *
107  * core_proc		Process to dump core [*] must be current proc!
108  *
109  * Return:	0	Success
110  *		!0	Failure errno
111  */
112 int
is_coredump_eligible(proc_t core_proc)113 is_coredump_eligible(proc_t core_proc)
114 {
115 	if (current_proc() != core_proc && (
116 		    core_proc->p_exit_reason &&
117 		    core_proc->p_exit_reason->osr_namespace == OS_REASON_JETSAM)) {
118 		return EPERM;
119 	}
120 	if (current_proc() != core_proc) {
121 		panic("coredump for proc that is not current: %p)", core_proc);
122 	}
123 
124 	vfs_context_t ctx = vfs_context_current();
125 	kauth_cred_t cred = vfs_context_ucred(ctx);
126 
127 	if (do_coredump == 0 ||         /* Not dumping at all */
128 	    ((sugid_coredump == 0) &&   /* Not dumping SUID/SGID binaries */
129 	    ((kauth_cred_getsvuid(cred) != kauth_cred_getruid(cred)) ||
130 	    (kauth_cred_getsvgid(cred) != kauth_cred_getrgid(cred))))) {
131 		return EPERM;
132 	}
133 
134 #if CONFIG_MACF
135 	const int error = mac_proc_check_dump_core(core_proc);
136 	if (error != 0) {
137 		return error;
138 	}
139 #endif
140 	return 0;
141 }
142 
143 #else /* CONFIG_COREDUMP || CONFIG_UCOREDUMP */
144 
145 /* When core dumps aren't needed, no need to compile this file at all */
146 
147 #error assertion failed: this section is not compiled
148 
149 #endif /* CONFIG_COREDUMP || CONFIG_UCOREDUMP */
150 
151 #if CONFIG_COREDUMP
152 
153 #define COREDUMP_CUSTOM_LOCATION_ENTITLEMENT "com.apple.private.custom-coredump-location"
154 
155 typedef struct {
156 	int     flavor;                 /* the number for this flavor */
157 	mach_msg_type_number_t  count;        /* count of ints in this flavor */
158 } mythread_state_flavor_t;
159 
160 #if defined (__i386__) || defined (__x86_64__)
161 mythread_state_flavor_t thread_flavor_array[] = {
162 	{x86_THREAD_STATE, x86_THREAD_STATE_COUNT},
163 	{x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT},
164 	{x86_EXCEPTION_STATE, x86_EXCEPTION_STATE_COUNT},
165 };
166 int mynum_flavors = 3;
167 #elif defined (__arm64__)
168 mythread_state_flavor_t thread_flavor_array[] = {
169 	{ARM_THREAD_STATE64, ARM_THREAD_STATE64_COUNT},
170 	/* ARM64_TODO: VFP */
171 	{ARM_EXCEPTION_STATE64, ARM_EXCEPTION_STATE64_COUNT}
172 };
173 int mynum_flavors = 2;
174 #else
175 #error architecture not supported
176 #endif
177 
178 
179 typedef struct {
180 	vm_offset_t header;
181 	size_t hoffset;
182 	mythread_state_flavor_t *flavors;
183 	size_t tstate_size;
184 	size_t flavor_count;
185 } tir_t;
186 
187 /* cpu_type returns only the most generic indication of the current CPU. */
188 /* in a core we want to know the kind of process. */
189 
190 cpu_type_t
process_cpu_type(proc_t core_proc)191 process_cpu_type(proc_t core_proc)
192 {
193 	cpu_type_t what_we_think;
194 #if defined (__i386__) || defined (__x86_64__)
195 	if (IS_64BIT_PROCESS(core_proc)) {
196 		what_we_think = CPU_TYPE_X86_64;
197 	} else {
198 		what_we_think = CPU_TYPE_I386;
199 	}
200 #elif defined(__arm64__)
201 	if (IS_64BIT_PROCESS(core_proc)) {
202 		what_we_think = CPU_TYPE_ARM64;
203 	} else {
204 		what_we_think = CPU_TYPE_ARM;
205 	}
206 #endif
207 
208 	return what_we_think;
209 }
210 
211 cpu_type_t
process_cpu_subtype(proc_t core_proc)212 process_cpu_subtype(proc_t core_proc)
213 {
214 	cpu_type_t what_we_think;
215 #if defined (__i386__) || defined (__x86_64__)
216 	if (IS_64BIT_PROCESS(core_proc)) {
217 		what_we_think = CPU_SUBTYPE_X86_64_ALL;
218 	} else {
219 		what_we_think = CPU_SUBTYPE_I386_ALL;
220 	}
221 #elif defined(__arm64__)
222 	if (IS_64BIT_PROCESS(core_proc)) {
223 		what_we_think = CPU_SUBTYPE_ARM64_ALL;
224 	} else {
225 		what_we_think = CPU_SUBTYPE_ARM_ALL;
226 	}
227 #endif
228 	return what_we_think;
229 }
230 
231 static void
collectth_state(thread_t th_act,void * tirp)232 collectth_state(thread_t th_act, void *tirp)
233 {
234 	vm_offset_t     header;
235 	size_t  hoffset, i;
236 	mythread_state_flavor_t *flavors;
237 	struct thread_command   *tc;
238 	tir_t *t = (tir_t *)tirp;
239 
240 	/*
241 	 *	Fill in thread command structure.
242 	 */
243 	header = t->header;
244 	hoffset = t->hoffset;
245 	flavors = t->flavors;
246 
247 	tc = (struct thread_command *) (header + hoffset);
248 	tc->cmd = LC_THREAD;
249 	tc->cmdsize = (uint32_t)(sizeof(struct thread_command)
250 	    + t->tstate_size);
251 	hoffset += sizeof(struct thread_command);
252 	/*
253 	 * Follow with a struct thread_state_flavor and
254 	 * the appropriate thread state struct for each
255 	 * thread state flavor.
256 	 */
257 	for (i = 0; i < t->flavor_count; i++) {
258 		*(mythread_state_flavor_t *)(header + hoffset) =
259 		    flavors[i];
260 		hoffset += sizeof(mythread_state_flavor_t);
261 		thread_getstatus(th_act, flavors[i].flavor,
262 		    (thread_state_t)(header + hoffset),
263 		    &flavors[i].count);
264 		hoffset += flavors[i].count * sizeof(int);
265 	}
266 
267 	t->hoffset = hoffset;
268 }
269 
270 #if DEVELOPMENT || DEBUG
271 #define COREDUMPLOG(fmt, args...) printf("coredump (%s, pid %d): " fmt "\n", core_proc->p_comm, proc_getpid(core_proc), ## args)
272 #else
273 #define COREDUMPLOG(fmt, args...)
274 #endif
275 
276 /*
277  * LC_NOTE support for userspace coredumps.
278  */
279 
280 typedef int (write_note_cb_t)(struct vnode *vp, off_t foffset);
281 
282 static int
note_addrable_bits(struct vnode * vp,off_t foffset)283 note_addrable_bits(struct vnode *vp, off_t foffset)
284 {
285 	task_t t = current_task();
286 	vfs_context_t ctx = vfs_context_current();
287 	kauth_cred_t cred = vfs_context_ucred(ctx);
288 
289 	addrable_bits_note_t note = {
290 		.version = ADDRABLE_BITS_VER,
291 		.addressing_bits = pmap_user_va_bits(get_task_pmap(t)),
292 		.unused = 0
293 	};
294 
295 	return vn_rdwr_64(UIO_WRITE, vp, (vm_offset_t)&note, sizeof(note), foffset, UIO_SYSSPACE,
296 	           IO_NODELOCKED | IO_UNIT, cred, 0, current_proc());
297 }
298 
299 /*
300  * note handling
301  */
302 
303 struct core_note {
304 	size_t          cn_size;
305 	const char      *cn_owner;
306 	write_note_cb_t *cn_write_cb;
307 } const core_notes[] = {
308 	{
309 		.cn_size = sizeof(addrable_bits_note_t),
310 		.cn_owner = ADDRABLE_BITS_DATA_OWNER,
311 		.cn_write_cb = note_addrable_bits,
312 	}
313 };
314 
315 const size_t notes_count = sizeof(core_notes) / sizeof(struct core_note);
316 
317 /*
318  * LC_NOTE commands are allocated as a part of Mach-O header and are written to
319  * disk at the end of coredump. LC_NOTE's payload has to be written in callbacks here.
320  */
321 static int
dump_notes(proc_t __unused core_proc,vm_offset_t header,size_t hoffset,struct vnode * vp,off_t foffset)322 dump_notes(proc_t __unused core_proc, vm_offset_t header, size_t hoffset, struct vnode *vp, off_t foffset)
323 {
324 	for (size_t i = 0; i < notes_count; i++) {
325 		int error = 0;
326 
327 		if (core_notes[i].cn_write_cb == NULL) {
328 			continue;
329 		}
330 
331 		/* Generate LC_NOTE command. */
332 		struct note_command *nc = (struct note_command *)(header + hoffset);
333 
334 		nc->cmd = LC_NOTE;
335 		nc->cmdsize = sizeof(struct note_command);
336 		nc->offset = foffset;
337 		nc->size = core_notes[i].cn_size;
338 		strlcpy(nc->data_owner, core_notes[i].cn_owner, sizeof(nc->data_owner));
339 
340 		hoffset += sizeof(struct note_command);
341 
342 		/* Add note's payload. */
343 		error = core_notes[i].cn_write_cb(vp, foffset);
344 		if (error != KERN_SUCCESS) {
345 			COREDUMPLOG("failed to write LC_NOTE %s: error %d", core_notes[i].cn_owner, error);
346 			return error;
347 		}
348 
349 		foffset += core_notes[i].cn_size;
350 	}
351 
352 	return 0;
353 }
354 
355 /*
356  * coredump
357  *
358  * Description:	Create a core image on the file "core" for the process
359  *		indicated
360  *
361  * Parameters:	core_proc			Process to dump core [*]
362  *		reserve_mb			If non-zero, leave filesystem with
363  *						at least this much free space.
364  *		coredump_flags	Extra options (ignore rlimit, run fsync)
365  *
366  * Returns:	0				Success
367  *		!0				Failure errno
368  *
369  * IMPORTANT:	This function can only be called on the current process, due
370  *		to assumptions below; see variable declaration section for
371  *		details.
372  */
373 #define MAX_TSTATE_FLAVORS      10
374 int
coredump(proc_t core_proc,uint32_t reserve_mb,int coredump_flags)375 coredump(proc_t core_proc, uint32_t reserve_mb, int coredump_flags)
376 {
377 /* Begin assumptions that limit us to only the current process */
378 	vfs_context_t ctx = vfs_context_current();
379 	vm_map_t        map = current_map();
380 	task_t          task = current_task();
381 /* End assumptions */
382 	kauth_cred_t cred = vfs_context_ucred(ctx);
383 	int error = 0;
384 	struct vnode_attr *vap = NULL;
385 	size_t          thread_count, segment_count;
386 	size_t          command_size, header_size, tstate_size;
387 	size_t          hoffset;
388 	off_t           foffset;
389 	mach_vm_offset_t vmoffset;
390 	vm_offset_t     header;
391 	mach_vm_size_t  vmsize;
392 	vm_prot_t       prot;
393 	vm_prot_t       maxprot;
394 	int             error1 = 0;
395 	char            stack_name[MAXCOMLEN + 6];
396 	char            *alloced_name = NULL;
397 	char            *name = NULL;
398 	mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
399 	vm_size_t       mapsize;
400 	size_t          i;
401 	uint32_t nesting_depth = 0;
402 	kern_return_t   kret;
403 	struct vm_region_submap_info_64 vbr;
404 	mach_msg_type_number_t vbrcount = 0;
405 	tir_t tir1;
406 	struct vnode * vp;
407 	struct mach_header      *mh = NULL;        /* protected by is_64 */
408 	struct mach_header_64   *mh64 = NULL;        /* protected by is_64 */
409 	int             is_64 = 0;
410 	size_t          mach_header_sz = sizeof(struct mach_header);
411 	size_t          segment_command_sz = sizeof(struct segment_command);
412 	size_t          notes_size = 0;
413 	const char     *format = NULL;
414 	char           *custom_location_entitlement = NULL;
415 	size_t          custom_location_entitlement_len = 0;
416 	char           *alloced_format = NULL;
417 	size_t          alloced_format_len = 0;
418 	bool            include_iokit_memory = task_is_driver(task);
419 	bool            coredump_attempted = false;
420 
421 	if ((error = is_coredump_eligible(core_proc)) != 0) {
422 		goto out2;
423 	}
424 
425 	if (IS_64BIT_PROCESS(core_proc)) {
426 		is_64 = 1;
427 		mach_header_sz = sizeof(struct mach_header_64);
428 		segment_command_sz = sizeof(struct segment_command_64);
429 	}
430 
431 	mapsize = get_vmmap_size(map);
432 
433 	custom_location_entitlement = IOCurrentTaskGetEntitlement(COREDUMP_CUSTOM_LOCATION_ENTITLEMENT);
434 	if (custom_location_entitlement != NULL) {
435 		custom_location_entitlement_len = strlen(custom_location_entitlement);
436 		const char * dirname;
437 		if (proc_is_driver(core_proc)) {
438 			dirname = defaultdrivercorefiledir;
439 		} else {
440 			dirname = defaultcorefiledir;
441 		}
442 		size_t dirname_len = strlen(dirname);
443 		size_t printed_len;
444 
445 		/* new format is dirname + "/" + string from entitlement */
446 		alloced_format_len = dirname_len + 1 + custom_location_entitlement_len;
447 		alloced_format = kalloc_data(alloced_format_len + 1, Z_ZERO | Z_WAITOK | Z_NOFAIL);
448 		printed_len = snprintf(alloced_format, alloced_format_len + 1, "%s/%s", dirname, custom_location_entitlement);
449 		assert(printed_len == alloced_format_len);
450 
451 		format = alloced_format;
452 	} else {
453 		if (proc_is_driver(core_proc)) {
454 			format = drivercorefilename;
455 		} else {
456 			format = corefilename;
457 		}
458 	}
459 
460 	if (((coredump_flags & COREDUMP_IGNORE_ULIMIT) == 0) &&
461 	    (mapsize >= proc_limitgetcur(core_proc, RLIMIT_CORE))) {
462 		error = EFAULT;
463 		goto out2;
464 	}
465 
466 	/* log coredump failures from here */
467 	coredump_attempted = true;
468 
469 	(void) task_suspend_internal(task);
470 
471 #if HAS_MTE
472 	/*
473 	 * At this point we have suspended all proc threads, so we are
474 	 * safe disabling tag checking for an MTE enabled process.
475 	 * This will be necessary later when we loop through the process
476 	 * memory segment and copy them in, as we would inevitably generate
477 	 * a tag check fault.
478 	 */
479 
480 	/*
481 	 * Do not disable tag checking and take the "faulty" path if high watermaks
482 	 * cores are enabled. We want a better fix here, but for the time being
483 	 * that's a debugging feature that can run under -disable_mte.
484 	 */
485 	extern int hwm_user_cores;
486 
487 	if (task_has_sec(task) && hwm_user_cores == 0) {
488 		mte_disable_user_checking(task);
489 	}
490 #endif /* HAS_MTE */
491 
492 	alloced_name = zalloc_flags(ZV_NAMEI, Z_NOWAIT | Z_ZERO);
493 
494 	/* create name according to sysctl'able format string */
495 	/* if name creation fails, fall back to historical behaviour... */
496 	if (alloced_name == NULL ||
497 	    proc_core_name(format, core_proc->p_comm, kauth_cred_getuid(cred),
498 	    proc_getpid(core_proc), alloced_name, MAXPATHLEN)) {
499 		snprintf(stack_name, sizeof(stack_name),
500 		    "/cores/core.%d", proc_getpid(core_proc));
501 		name = stack_name;
502 	} else {
503 		name = alloced_name;
504 	}
505 
506 	COREDUMPLOG("writing core to %s", name);
507 	if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, ctx))) {
508 		COREDUMPLOG("failed to open core dump file %s: error %d", name, error);
509 		goto out2;
510 	}
511 
512 	vap = kalloc_type(struct vnode_attr, Z_WAITOK | Z_ZERO);
513 	VATTR_INIT(vap);
514 	VATTR_WANTED(vap, va_nlink);
515 	/* Don't dump to non-regular files or files with links. */
516 	if (vp->v_type != VREG ||
517 	    vnode_getattr(vp, vap, ctx) || vap->va_nlink != 1) {
518 		COREDUMPLOG("failed to write core to non-regular file");
519 		error = EFAULT;
520 		goto out;
521 	}
522 
523 	VATTR_INIT(vap);         /* better to do it here than waste more stack in vnode_setsize */
524 	VATTR_SET(vap, va_data_size, 0);
525 	if (core_proc == initproc) {
526 		VATTR_SET(vap, va_dataprotect_class, PROTECTION_CLASS_D);
527 	}
528 	vnode_setattr(vp, vap, ctx);
529 	core_proc->p_acflag |= ACORE;
530 
531 	COREDUMPLOG("map size: %lu", mapsize);
532 	if ((reserve_mb > 0) &&
533 	    ((freespace_mb(vp) - (mapsize >> 20)) < reserve_mb)) {
534 		COREDUMPLOG("insufficient free space (free=%d MB, needed=%lu MB, reserve=%d MB)", freespace_mb(vp), (mapsize >> 20), reserve_mb);
535 		error = ENOSPC;
536 		goto out;
537 	}
538 
539 	/*
540 	 *	If the task is modified while dumping the file
541 	 *	(e.g., changes in threads or VM, the resulting
542 	 *	file will not necessarily be correct.
543 	 */
544 
545 	thread_count = get_task_numacts(task);
546 	segment_count = get_vmmap_entries(map);         /* XXX */
547 	tir1.flavor_count = sizeof(thread_flavor_array) / sizeof(mythread_state_flavor_t);
548 	bcopy(thread_flavor_array, flavors, sizeof(thread_flavor_array));
549 	tstate_size = 0;
550 	for (i = 0; i < tir1.flavor_count; i++) {
551 		tstate_size += sizeof(mythread_state_flavor_t) +
552 		    (flavors[i].count * sizeof(int));
553 	}
554 
555 	{
556 		size_t lhs;
557 		size_t rhs;
558 
559 		/* lhs = segment_count * segment_command_sz */
560 		if (os_mul_overflow(segment_count, segment_command_sz, &lhs)) {
561 			COREDUMPLOG("error: segment size overflow: segment_count=%lu, segment_command_sz=%lu", segment_count, segment_command_sz);
562 			error = ENOMEM;
563 			goto out;
564 		}
565 
566 		/* rhs = (tstate_size + sizeof(struct thread_command)) * thread_count */
567 		if (os_add_and_mul_overflow(tstate_size, sizeof(struct thread_command), thread_count, &rhs)) {
568 			COREDUMPLOG("error: thread state size overflow: tstate_size=%lu, thread_count=%lu", tstate_size, thread_count);
569 			error = ENOMEM;
570 			goto out;
571 		}
572 
573 		/* command_size = lhs + rhs */
574 		if (os_add_overflow(lhs, rhs, &command_size)) {
575 			COREDUMPLOG("error: command size overflow: lhs=%lu, rhs=%lu", lhs, rhs);
576 			error = ENOMEM;
577 			goto out;
578 		}
579 
580 		/* Add notes payload. */
581 		if (os_mul_overflow(notes_count, sizeof(struct note_command), &notes_size)) {
582 			COREDUMPLOG("error: note command size overflow: note=%lu", i);
583 			error = ENOMEM;
584 			goto out;
585 		}
586 
587 		if (os_add_overflow(command_size, notes_size, &command_size)) {
588 			COREDUMPLOG("error: notes overflow: notes_size=%lu", notes_size);
589 			error = ENOMEM;
590 			goto out;
591 		}
592 	}
593 
594 	if (os_add_overflow(command_size, mach_header_sz, &header_size)) {
595 		COREDUMPLOG("error: header size overflow: command_size=%lu, mach_header_sz=%lu", command_size, mach_header_sz);
596 		error = ENOMEM;
597 		goto out;
598 	}
599 
600 	if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size,
601 	    KMA_DATA | KMA_ZERO, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
602 		COREDUMPLOG("error: failed to allocate memory for header (size=%lu)", header_size);
603 		error = ENOMEM;
604 		goto out;
605 	}
606 
607 	/*
608 	 *	Set up Mach-O header.
609 	 */
610 	if (is_64) {
611 		mh64 = (struct mach_header_64 *)header;
612 		mh64->magic = MH_MAGIC_64;
613 		mh64->cputype = process_cpu_type(core_proc);
614 		mh64->cpusubtype = process_cpu_subtype(core_proc);
615 		mh64->filetype = MH_CORE;
616 		mh64->ncmds = (uint32_t)(segment_count + notes_count + thread_count);
617 		mh64->sizeofcmds = (uint32_t)command_size;
618 	} else {
619 		mh = (struct mach_header *)header;
620 		mh->magic = MH_MAGIC;
621 		mh->cputype = process_cpu_type(core_proc);
622 		mh->cpusubtype = process_cpu_subtype(core_proc);
623 		mh->filetype = MH_CORE;
624 		mh->ncmds = (uint32_t)(segment_count + notes_count + thread_count);
625 		mh->sizeofcmds = (uint32_t)command_size;
626 	}
627 
628 	hoffset = mach_header_sz;         /* offset into header */
629 	foffset = round_page(header_size);         /* offset into file */
630 	vmoffset = MACH_VM_MIN_ADDRESS;         /* offset into VM */
631 	COREDUMPLOG("mach header size: %zu", header_size);
632 
633 	/*
634 	 * We use to check for an error, here, now we try and get
635 	 * as much as we can
636 	 */
637 	COREDUMPLOG("dumping %zu segments", segment_count);
638 	while (segment_count > 0) {
639 		struct segment_command          *sc;
640 		struct segment_command_64       *sc64;
641 
642 		/*
643 		 *	Get region information for next region.
644 		 */
645 
646 		while (1) {
647 			vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
648 			if ((kret = mach_vm_region_recurse(map,
649 			    &vmoffset, &vmsize, &nesting_depth,
650 			    (vm_region_recurse_info_t)&vbr,
651 			    &vbrcount)) != KERN_SUCCESS) {
652 				break;
653 			}
654 			/*
655 			 * If we get a valid mapping back, but we're dumping
656 			 * a 32 bit process,  and it's over the allowable
657 			 * address space of a 32 bit process, it's the same
658 			 * as if mach_vm_region_recurse() failed.
659 			 */
660 			if (!(is_64) &&
661 			    (vmoffset + vmsize > VM_MAX_ADDRESS)) {
662 				kret = KERN_INVALID_ADDRESS;
663 				COREDUMPLOG("exceeded allowable region for 32-bit process");
664 				break;
665 			}
666 			if (vbr.is_submap) {
667 				nesting_depth++;
668 				continue;
669 			} else {
670 				break;
671 			}
672 		}
673 		if (kret != KERN_SUCCESS) {
674 			COREDUMPLOG("ending segment dump, kret=%d", kret);
675 			break;
676 		}
677 
678 		prot = vbr.protection;
679 		maxprot = vbr.max_protection;
680 
681 		if ((prot | maxprot) == VM_PROT_NONE) {
682 			/*
683 			 * Elide unreadable (likely reserved) segments
684 			 */
685 			COREDUMPLOG("eliding unreadable segment %llx->%llx", vmoffset, vmoffset + vmsize);
686 			vmoffset += vmsize;
687 			continue;
688 		}
689 
690 		/*
691 		 * Try as hard as possible to get read access to the data.
692 		 */
693 		if ((prot & VM_PROT_READ) == 0) {
694 			mach_vm_protect(map, vmoffset, vmsize, FALSE,
695 			    prot | VM_PROT_READ);
696 		}
697 
698 		/*
699 		 * But only try and perform the write if we can read it.
700 		 */
701 		int64_t fsize = ((maxprot & VM_PROT_READ) == VM_PROT_READ
702 		    && (include_iokit_memory || vbr.user_tag != VM_MEMORY_IOKIT)
703 		    && coredumpok(map, vmoffset)) ? vmsize : 0;
704 
705 		if (fsize) {
706 			int64_t resid = 0;
707 			const enum uio_seg sflg = IS_64BIT_PROCESS(core_proc) ?
708 			    UIO_USERSPACE64 : UIO_USERSPACE32;
709 
710 			error = vn_rdwr_64(UIO_WRITE, vp, vmoffset, fsize,
711 			    foffset, sflg, IO_NODELOCKED | IO_UNIT,
712 			    cred, &resid, core_proc);
713 
714 			if (error) {
715 				/*
716 				 * Mark segment as empty
717 				 */
718 				fsize = 0;
719 				COREDUMPLOG("failed to write segment %llx->%llx: error %d", vmoffset, vmoffset + vmsize, error);
720 			} else if (resid) {
721 				/*
722 				 * Partial write. Extend the file size so
723 				 * that the segment command contains a valid
724 				 * range of offsets, possibly creating a hole.
725 				 */
726 				VATTR_INIT(vap);
727 				VATTR_SET(vap, va_data_size, foffset + fsize);
728 				vnode_setattr(vp, vap, ctx);
729 				COREDUMPLOG("partially wrote segment %llx->%llx, resid %lld", vmoffset, vmoffset + vmsize, resid);
730 			}
731 		} else {
732 			COREDUMPLOG("skipping unreadable segment %llx->%llx", vmoffset, vmoffset + vmsize);
733 		}
734 
735 		/*
736 		 *	Fill in segment command structure.
737 		 */
738 
739 		if (is_64) {
740 			sc64 = (struct segment_command_64 *)(header + hoffset);
741 			sc64->cmd = LC_SEGMENT_64;
742 			sc64->cmdsize = sizeof(struct segment_command_64);
743 			/* segment name is zeroed by kmem_alloc */
744 			sc64->segname[0] = 0;
745 			sc64->vmaddr = vmoffset;
746 			sc64->vmsize = vmsize;
747 			sc64->fileoff = foffset;
748 			sc64->filesize = fsize;
749 			sc64->maxprot = maxprot;
750 			sc64->initprot = prot;
751 			sc64->nsects = 0;
752 			sc64->flags = 0;
753 		} else {
754 			sc = (struct segment_command *) (header + hoffset);
755 			sc->cmd = LC_SEGMENT;
756 			sc->cmdsize = sizeof(struct segment_command);
757 			/* segment name is zeroed by kmem_alloc */
758 			sc->segname[0] = 0;
759 			sc->vmaddr = CAST_DOWN_EXPLICIT(uint32_t, vmoffset);
760 			sc->vmsize = CAST_DOWN_EXPLICIT(uint32_t, vmsize);
761 			sc->fileoff = CAST_DOWN_EXPLICIT(uint32_t, foffset);         /* will never truncate */
762 			sc->filesize = CAST_DOWN_EXPLICIT(uint32_t, fsize);         /* will never truncate */
763 			sc->maxprot = maxprot;
764 			sc->initprot = prot;
765 			sc->nsects = 0;
766 			sc->flags = 0;
767 		}
768 
769 		hoffset += segment_command_sz;
770 		foffset += fsize;
771 		vmoffset += vmsize;
772 		segment_count--;
773 	}
774 	COREDUMPLOG("max file offset: %lld", foffset);
775 
776 	/*
777 	 * If there are remaining segments which have not been written
778 	 * out because break in the loop above, then they were not counted
779 	 * because they exceed the real address space of the executable
780 	 * type: remove them from the header's count.  This is OK, since
781 	 * we are allowed to have a sparse area following the segments.
782 	 */
783 	if (is_64) {
784 		mh64->ncmds -= segment_count;
785 		mh64->sizeofcmds -= segment_count * segment_command_sz;
786 	} else {
787 		mh->ncmds -= segment_count;
788 		mh->sizeofcmds -= segment_count * segment_command_sz;
789 	}
790 
791 	/* Add LC_NOTES */
792 	COREDUMPLOG("dumping %zu notes", notes_count);
793 	if (dump_notes(core_proc, header, hoffset, vp, foffset) != 0) {
794 		error = EFAULT;
795 		goto out;
796 	}
797 
798 	tir1.header = header;
799 	tir1.hoffset = hoffset + notes_size;
800 	tir1.flavors = flavors;
801 	tir1.tstate_size = tstate_size;
802 	COREDUMPLOG("dumping %zu threads", thread_count);
803 	task_act_iterate_wth_args(task, collectth_state, &tir1);
804 
805 	/*
806 	 *	Write out the Mach header at the beginning of the
807 	 *	file.  OK to use a 32 bit write for this.
808 	 */
809 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, (int)MIN(header_size, INT_MAX), (off_t)0,
810 	    UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, cred, (int *) 0, core_proc);
811 	if (error != KERN_SUCCESS) {
812 		COREDUMPLOG("failed to write mach header: error %d", error);
813 	}
814 	kmem_free(kernel_map, header, header_size);
815 
816 	if ((coredump_flags & COREDUMP_FULLFSYNC) && error == 0) {
817 		error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
818 		if (error != KERN_SUCCESS) {
819 			COREDUMPLOG("failed to FULLFSYNC core: error %d", error);
820 		}
821 	}
822 out:
823 	if (vap) {
824 		kfree_type(struct vnode_attr, vap);
825 	}
826 	error1 = vnode_close(vp, FWRITE, ctx);
827 	if (error1 != KERN_SUCCESS) {
828 		COREDUMPLOG("failed to close core file: error %d", error1);
829 	}
830 out2:
831 #if CONFIG_AUDIT
832 	audit_proc_coredump(core_proc, name, error);
833 #endif
834 	if (alloced_name != NULL) {
835 		zfree(ZV_NAMEI, alloced_name);
836 	}
837 	if (alloced_format != NULL) {
838 		kfree_data(alloced_format, alloced_format_len + 1);
839 	}
840 	if (custom_location_entitlement != NULL) {
841 		kfree_data(custom_location_entitlement, custom_location_entitlement_len + 1);
842 	}
843 	if (error == 0) {
844 		error = error1;
845 	}
846 
847 	if (coredump_attempted) {
848 		if (error != 0) {
849 			COREDUMPLOG("core dump failed: error %d\n", error);
850 		} else {
851 			COREDUMPLOG("core dump succeeded");
852 		}
853 	}
854 
855 	return error;
856 }
857 
858 #endif /* CONFIG_COREDUMP */
859