xref: /xnu-10063.141.1/osfmk/ipc/ipc_kmsg.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
58  * support for mandatory and extensible security protections.  This notice
59  * is included in support of clause 2.2 (b) of the Apple Public License,
60  * Version 2.0.
61  * Copyright (c) 2005 SPARTA, Inc.
62  */
63 /*
64  */
65 /*
66  *	File:	ipc/ipc_kmsg.c
67  *	Author:	Rich Draves
68  *	Date:	1989
69  *
70  *	Operations on kernel messages.
71  */
72 
73 
74 #include <mach/mach_types.h>
75 #include <mach/boolean.h>
76 #include <mach/kern_return.h>
77 #include <mach/message.h>
78 #include <mach/port.h>
79 #include <mach/vm_map.h>
80 #include <mach/mach_vm.h>
81 #include <mach/vm_statistics.h>
82 
83 #include <kern/kern_types.h>
84 #include <kern/assert.h>
85 #include <kern/debug.h>
86 #include <kern/ipc_kobject.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc.h>
89 #include <kern/processor.h>
90 #include <kern/thread.h>
91 #include <kern/thread_group.h>
92 #include <kern/sched_prim.h>
93 #include <kern/misc_protos.h>
94 #include <kern/cpu_data.h>
95 #include <kern/policy_internal.h>
96 #include <kern/mach_filter.h>
97 
98 #include <pthread/priority_private.h>
99 
100 #include <machine/limits.h>
101 
102 #include <vm/vm_map.h>
103 #include <vm/vm_object.h>
104 #include <vm/vm_kern.h>
105 
106 #include <ipc/port.h>
107 #include <ipc/ipc_types.h>
108 #include <ipc/ipc_entry.h>
109 #include <ipc/ipc_kmsg.h>
110 #include <ipc/ipc_notify.h>
111 #include <ipc/ipc_object.h>
112 #include <ipc/ipc_space.h>
113 #include <ipc/ipc_policy.h>
114 #include <ipc/ipc_port.h>
115 #include <ipc/ipc_right.h>
116 #include <ipc/ipc_hash.h>
117 #include <ipc/ipc_importance.h>
118 #include <ipc/ipc_service_port.h>
119 
120 #if MACH_FLIPC
121 #include <kern/mach_node.h>
122 #include <ipc/flipc.h>
123 #endif
124 
125 #include <os/overflow.h>
126 
127 #include <security/mac_mach_internal.h>
128 
129 #include <device/device_server.h>
130 
131 #include <string.h>
132 
133 #include <sys/kdebug.h>
134 #include <libkern/OSAtomic.h>
135 
136 #include <ptrauth.h>
137 #if __has_feature(ptrauth_calls)
138 #include <libkern/ptrauth_utils.h>
139 #endif
140 
141 
142 /*
143  * In kernel, complex mach msg have a simpler representation than userspace:
144  *
145  * <header>
146  * <desc-count>
147  * <descriptors> * desc-count
148  * <body>
149  *
150  * And the descriptors are of type `mach_msg_kdescriptor_t`,
151  * that is large enough to accommodate for any possible representation.
152  *
153  * The `type` field of any descriptor is always at the same offset,
154  * and the smallest possible descriptor is of size USER_DESC_SIZE_MIN.
155  *
156  * Note:
157  * - KERN_DESC_SIZE is 16 on all kernels
158  * - USER_DESC_SIZE_MIN is 12 on all kernels
159  */
160 
161 #define KERNEL_DESC_SIZE        sizeof(mach_msg_kdescriptor_t)
162 #define USER_DESC_SIZE_MIN      sizeof(mach_msg_type_descriptor_t)
163 #define USER_DESC_SIZE_MAX      KERNEL_DESC_SIZE
164 #define USER_DESC_MAX_DELTA     (KERNEL_DESC_SIZE - USER_DESC_SIZE_MIN)
165 #define USER_HEADER_SIZE_DELTA  (sizeof(mach_msg_header_t) - sizeof(mach_msg_user_header_t))
166 
167 
168 #define mach_validate_desc_type(t, size) \
169 	static_assert(sizeof(t) == (size))
170 
171 mach_validate_desc_type(mach_msg_descriptor_t, KERNEL_DESC_SIZE);
172 mach_validate_desc_type(mach_msg_kdescriptor_t, KERNEL_DESC_SIZE);
173 mach_validate_desc_type(mach_msg_port_descriptor_t, KERNEL_DESC_SIZE);
174 mach_validate_desc_type(mach_msg_ool_descriptor_t, KERNEL_DESC_SIZE);
175 mach_validate_desc_type(mach_msg_ool_ports_descriptor_t, KERNEL_DESC_SIZE);
176 mach_validate_desc_type(mach_msg_guarded_port_descriptor_t, KERNEL_DESC_SIZE);
177 
178 extern vm_map_t         ipc_kernel_copy_map;
179 extern const vm_size_t  msg_ool_size_small;
180 
181 /* zone for cached ipc_kmsg_t structures */
182 ZONE_DEFINE_ID(ZONE_ID_IPC_KMSG, "ipc kmsgs", struct ipc_kmsg,
183     ZC_CACHING | ZC_ZFREE_CLEARMEM);
184 #define ikm_require(kmsg) \
185 	zone_id_require(ZONE_ID_IPC_KMSG, sizeof(struct ipc_kmsg), kmsg)
186 #define ikm_require_aligned(kmsg) \
187 	zone_id_require_aligned(ZONE_ID_IPC_KMSG, kmsg)
188 
189 KALLOC_TYPE_VAR_DEFINE(KT_IPC_KMSG_KDATA_OOL,
190     mach_msg_base_t, mach_msg_kdescriptor_t, KT_DEFAULT);
191 
192 static TUNABLE(bool, enforce_strict_reply, "ipc_strict_reply", false);
193 
194 
195 #pragma mark ipc_kmsg layout and accessors
196 
197 /* Whether header, body, content and trailer occupy contiguous memory space */
198 static inline bool
ikm_is_linear(ipc_kmsg_t kmsg)199 ikm_is_linear(ipc_kmsg_t kmsg)
200 {
201 	return kmsg->ikm_type == IKM_TYPE_ALL_INLINED ||
202 	       kmsg->ikm_type == IKM_TYPE_KDATA_OOL;
203 }
204 
205 /* Size of kmsg header (plus body and descriptors for complex messages) */
206 __attribute__((always_inline, overloadable))
207 static mach_msg_size_t
ikm_kdata_size(mach_msg_size_t dsc_count,bool complex)208 ikm_kdata_size(
209 	mach_msg_size_t dsc_count,
210 	bool            complex)
211 {
212 	if (complex) {
213 		return sizeof(mach_msg_kbase_t) + dsc_count * KERNEL_DESC_SIZE;
214 	} else {
215 		return sizeof(mach_msg_header_t);
216 	}
217 }
218 
219 __attribute__((always_inline, overloadable))
220 static mach_msg_size_t
ikm_kdata_size(mach_msg_header_t * hdr)221 ikm_kdata_size(
222 	mach_msg_header_t *hdr)
223 {
224 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
225 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
226 
227 		return ikm_kdata_size(kbase->msgb_dsc_count, true);
228 	}
229 	return ikm_kdata_size(0, false);
230 }
231 
232 /*
233  * Returns start address of user data for kmsg.
234  *
235  * Caller is responsible for checking the size of udata buffer before attempting
236  * to write to the address returned.
237  *
238  * Condition:
239  *   1. kmsg descriptors must have been validated and expanded, or is a message
240  *      originated from kernel.
241  *   2. ikm_header() content may or may not be populated
242  */
243 void *
ikm_udata(ipc_kmsg_t kmsg,mach_msg_size_t dsc_count,bool complex)244 ikm_udata(
245 	ipc_kmsg_t      kmsg,
246 	mach_msg_size_t dsc_count,
247 	bool            complex)
248 {
249 	if (ikm_is_linear(kmsg)) {
250 		mach_msg_header_t *hdr = ikm_header(kmsg);
251 
252 		return (char *)hdr + ikm_kdata_size(dsc_count, complex);
253 	}
254 	return kmsg->ikm_udata;
255 }
256 
257 /*
258  * Returns start address of user data for kmsg, given a populated kmsg.
259  *
260  * Caller is responsible for checking the size of udata buffer before attempting
261  * to write to the address returned.
262  *
263  * Condition:
264  *   kmsg must have a populated header.
265  */
266 void *
ikm_udata_from_header(ipc_kmsg_t kmsg)267 ikm_udata_from_header(ipc_kmsg_t kmsg)
268 {
269 	if (ikm_is_linear(kmsg)) {
270 		mach_msg_header_t *hdr = ikm_header(kmsg);
271 
272 		return (char *)hdr + ikm_kdata_size(hdr);
273 	}
274 	return kmsg->ikm_udata;
275 }
276 
277 #if (DEVELOPMENT || DEBUG)
278 /* Returns end of kdata buffer (may contain extra space) */
279 vm_offset_t
ikm_kdata_end(ipc_kmsg_t kmsg)280 ikm_kdata_end(ipc_kmsg_t kmsg)
281 {
282 	switch (kmsg->ikm_type) {
283 	case IKM_TYPE_ALL_INLINED:
284 		return (vm_offset_t)kmsg->ikm_big_data + IKM_BIG_MSG_SIZE;
285 	case IKM_TYPE_UDATA_OOL:
286 		return (vm_offset_t)kmsg->ikm_small_data + IKM_SMALL_MSG_SIZE;
287 	default:
288 		return (vm_offset_t)kmsg->ikm_kdata + kmsg->ikm_kdata_size;
289 	}
290 }
291 #endif
292 
293 /*
294  * Returns message header address.
295  */
296 inline mach_msg_header_t *
ikm_header(ipc_kmsg_t kmsg)297 ikm_header(
298 	ipc_kmsg_t         kmsg)
299 {
300 	switch (kmsg->ikm_type) {
301 	case IKM_TYPE_ALL_INLINED:
302 		return (mach_msg_header_t *)kmsg->ikm_big_data;
303 	case IKM_TYPE_UDATA_OOL:
304 		return (mach_msg_header_t *)kmsg->ikm_small_data;
305 	default:
306 		return (mach_msg_header_t *)kmsg->ikm_kdata;
307 	}
308 }
309 
310 static inline mach_msg_aux_header_t *
ikm_aux_header(ipc_kmsg_t kmsg)311 ikm_aux_header(
312 	ipc_kmsg_t         kmsg)
313 {
314 	if (!kmsg->ikm_aux_size) {
315 		return NULL;
316 	}
317 
318 	assert(kmsg->ikm_aux_size >= sizeof(mach_msg_aux_header_t));
319 
320 	if (kmsg->ikm_type == IKM_TYPE_ALL_INLINED) {
321 		return (mach_msg_aux_header_t *)((vm_offset_t)(kmsg + 1) -
322 		       kmsg->ikm_aux_size);
323 	} else {
324 		assert(kmsg->ikm_type != IKM_TYPE_KDATA_OOL);
325 		return (mach_msg_aux_header_t *)((vm_offset_t)kmsg->ikm_udata +
326 		       kmsg->ikm_udata_size - kmsg->ikm_aux_size);
327 	}
328 }
329 
330 /*!
331  * @brief
332  * Returns the size of a user descriptor for a given type
333  */
334 static inline mach_msg_size_t
ikm_user_desc_size(mach_msg_descriptor_type_t type,bool is_task_64bit)335 ikm_user_desc_size(mach_msg_descriptor_type_t type, bool is_task_64bit)
336 {
337 	/*
338 	 * User descriptors come in two sizes:
339 	 * - USER_DESC_SIZE_MIN (12)
340 	 * - USER_DESC_SIZE_MAX (16)
341 	 *
342 	 * Ideally this function would be implemented as a "switch",
343 	 * unfortunately this produces terrible codegen, so we instead write
344 	 * the optimal code by hand with tons of static asserts.
345 	 *
346 	 * As of now there are only two cases:
347 	 * - port descriptors are always 12 bytes
348 	 * - other descriptors are 12 bytes on 32bits, and 16 on 64bits.
349 	 *
350 	 * If one of the static asserts break because you are adding a new
351 	 * descriptor type, make sure to update this function properly.
352 	 */
353 	static_assert(MACH_MSG_DESCRIPTOR_MAX == MACH_MSG_GUARDED_PORT_DESCRIPTOR);
354 
355 	if (type == MACH_MSG_PORT_DESCRIPTOR) {
356 		mach_validate_desc_type(mach_msg_user_port_descriptor_t, USER_DESC_SIZE_MIN);
357 		return USER_DESC_SIZE_MIN;
358 	}
359 	if (is_task_64bit) {
360 		mach_validate_desc_type(mach_msg_ool_descriptor64_t, USER_DESC_SIZE_MAX);
361 		mach_validate_desc_type(mach_msg_ool_ports_descriptor64_t, USER_DESC_SIZE_MAX);
362 		mach_validate_desc_type(mach_msg_guarded_port_descriptor64_t, USER_DESC_SIZE_MAX);
363 		return USER_DESC_SIZE_MAX;
364 	} else {
365 		mach_validate_desc_type(mach_msg_ool_descriptor32_t, USER_DESC_SIZE_MIN);
366 		mach_validate_desc_type(mach_msg_ool_ports_descriptor32_t, USER_DESC_SIZE_MIN);
367 		mach_validate_desc_type(mach_msg_guarded_port_descriptor32_t, USER_DESC_SIZE_MIN);
368 		return USER_DESC_SIZE_MIN;
369 	}
370 }
371 
372 __abortlike
373 static void
__ipc_kmsg_descriptor_invalid_type_panic(const mach_msg_kdescriptor_t * kdesc)374 __ipc_kmsg_descriptor_invalid_type_panic(
375 	const mach_msg_kdescriptor_t *kdesc)
376 {
377 	panic("Invalid descriptor type (%p: %d)",
378 	    kdesc, mach_msg_kdescriptor_type(kdesc));
379 }
380 
381 mach_msg_trailer_size_t
ipc_kmsg_trailer_size(mach_msg_option64_t option,vm_map_t map __unused)382 ipc_kmsg_trailer_size(mach_msg_option64_t option, vm_map_t map __unused)
383 {
384 	return REQUESTED_TRAILER_SIZE(map->max_offset > VM_MAX_ADDRESS, option);
385 }
386 
387 
388 /*
389  * Get the trailer address of kmsg.
390  */
391 mach_msg_max_trailer_t *
ipc_kmsg_get_trailer(ipc_kmsg_t kmsg)392 ipc_kmsg_get_trailer(
393 	ipc_kmsg_t              kmsg)
394 {
395 	mach_msg_header_t *hdr = ikm_header(kmsg);
396 	mach_msg_size_t    trailer_pos = hdr->msgh_size;
397 	vm_offset_t        base;
398 
399 	if (ikm_is_linear(kmsg)) {
400 		base = (vm_offset_t)hdr;
401 	} else {
402 		base = (vm_offset_t)kmsg->ikm_udata;
403 		trailer_pos -= ikm_kdata_size(hdr);
404 	}
405 
406 	return (mach_msg_max_trailer_t *)(base + trailer_pos);
407 }
408 
409 void
ipc_kmsg_set_voucher_port(ipc_kmsg_t kmsg,ipc_port_t voucher_port,mach_msg_type_name_t type)410 ipc_kmsg_set_voucher_port(
411 	ipc_kmsg_t           kmsg,
412 	ipc_port_t           voucher_port,
413 	mach_msg_type_name_t type)
414 {
415 	if (IP_VALID(voucher_port)) {
416 		assert(ip_kotype(voucher_port) == IKOT_VOUCHER);
417 	}
418 	kmsg->ikm_voucher_port = voucher_port;
419 	kmsg->ikm_voucher_type = type;
420 }
421 
422 ipc_port_t
ipc_kmsg_get_voucher_port(ipc_kmsg_t kmsg)423 ipc_kmsg_get_voucher_port(ipc_kmsg_t kmsg)
424 {
425 	return kmsg->ikm_voucher_port;
426 }
427 
428 void
ipc_kmsg_clear_voucher_port(ipc_kmsg_t kmsg)429 ipc_kmsg_clear_voucher_port(ipc_kmsg_t kmsg)
430 {
431 	kmsg->ikm_voucher_port = IP_NULL;
432 	kmsg->ikm_voucher_type = MACH_MSGH_BITS_ZERO;
433 }
434 
435 /*
436  * Caller has a reference to the kmsg and the mqueue lock held.
437  *
438  * As such, we can safely return a pointer to the thread group in the kmsg and
439  * not an additional reference. It is up to the caller to decide to take an
440  * additional reference on the thread group while still holding the mqueue lock,
441  * if needed.
442  */
443 #if CONFIG_PREADOPT_TG
444 struct thread_group *
ipc_kmsg_get_thread_group(ipc_kmsg_t kmsg)445 ipc_kmsg_get_thread_group(ipc_kmsg_t kmsg)
446 {
447 	struct thread_group *tg = NULL;
448 	kern_return_t __assert_only kr;
449 
450 	ipc_voucher_t voucher = convert_port_to_voucher(ipc_kmsg_get_voucher_port(kmsg));
451 	kr = bank_get_preadopt_thread_group(voucher, &tg);
452 	ipc_voucher_release(voucher);
453 
454 	return tg;
455 }
456 #endif
457 
458 #pragma mark ipc_kmsg signing
459 
460 __abortlike
461 static void
__ikm_signature_check_panic(ipc_kmsg_t kmsg,uint32_t sig)462 __ikm_signature_check_panic(ipc_kmsg_t kmsg, uint32_t sig)
463 {
464 	mach_msg_header_t *hdr = ikm_header(kmsg);
465 
466 	panic("IPC kmsg header signature mismatch: "
467 	    "kmsg=%p, hdr=%p, id=%d, sig=0x%08x (expected 0x%08x)",
468 	    kmsg, hdr, hdr->msgh_id, sig, kmsg->ikm_signature);
469 }
470 
471 static uint32_t
__ipc_kmsg_sign(ipc_kmsg_t kmsg,mach_msg_max_trailer_t * trailer,mach_msg_size_t * dsc_count)472 __ipc_kmsg_sign(
473 	ipc_kmsg_t              kmsg,
474 	mach_msg_max_trailer_t *trailer,
475 	mach_msg_size_t        *dsc_count)
476 {
477 	uint32_t           signature = 0;
478 	mach_msg_header_t *hdr  = ikm_header(kmsg);
479 	mach_msg_base_t    base;
480 
481 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
482 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
483 
484 		/*
485 		 * the "atomic" load will also be volatile which prevents the
486 		 * compiler from re-fetching that value after optimization.
487 		 */
488 		base.header = kbase->msgb_header;
489 		base.body.msgh_descriptor_count =
490 		    os_atomic_load(&kbase->msgb_dsc_count, relaxed);
491 	} else {
492 		base.header = *hdr;
493 		base.body.msgh_descriptor_count = 0;
494 	}
495 
496 	/* compute sig of a copy of the header with all varying bits masked off */
497 	base.header.msgh_bits &= MACH_MSGH_BITS_USER;
498 	base.header.msgh_bits &= ~MACH_MSGH_BITS_VOUCHER_MASK;
499 
500 #if __has_feature(ptrauth_calls)
501 	{
502 		uintptr_t data = (uintptr_t)kmsg;
503 
504 		data |= OS_PTRAUTH_DISCRIMINATOR("kmsg.ikm_signature") << 48;
505 
506 		data  = ptrauth_utils_sign_blob_generic(&base, sizeof(base), data, 0);
507 		data  = ptrauth_utils_sign_blob_generic(trailer,
508 		    MAX_TRAILER_SIZE, data, PTRAUTH_ADDR_DIVERSIFY);
509 		signature = (uint32_t)(data >> 32);
510 	}
511 #else
512 	(void)kmsg;
513 	(void)trailer;
514 #endif
515 
516 	if (dsc_count) {
517 		*dsc_count = base.body.msgh_descriptor_count;
518 	}
519 	return signature;
520 }
521 
522 static void
ipc_kmsg_sign(ipc_kmsg_t kmsg,mach_msg_max_trailer_t * trailer)523 ipc_kmsg_sign(ipc_kmsg_t kmsg, mach_msg_max_trailer_t *trailer)
524 {
525 	kmsg->ikm_signature = __ipc_kmsg_sign(kmsg, trailer, NULL);
526 }
527 
528 /*
529  *	Routine:	ipc_kmsg_init_trailer_and_sign
530  *	Purpose:
531  *		Initiailizes a trailer in a message safely,
532  *		and sign its header and trailer.
533  */
534 static void
ipc_kmsg_init_trailer_and_sign(ipc_kmsg_t kmsg,task_t sender)535 ipc_kmsg_init_trailer_and_sign(
536 	ipc_kmsg_t          kmsg,
537 	task_t              sender)
538 {
539 	static const mach_msg_max_trailer_t KERNEL_TRAILER_TEMPLATE = {
540 		.msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0,
541 		.msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE,
542 		.msgh_sender = KERNEL_SECURITY_TOKEN_VALUE,
543 		.msgh_audit = KERNEL_AUDIT_TOKEN_VALUE
544 	};
545 
546 	mach_msg_max_trailer_t *trailer = ipc_kmsg_get_trailer(kmsg);
547 
548 	if (sender == TASK_NULL) {
549 		memcpy(trailer, &KERNEL_TRAILER_TEMPLATE, sizeof(*trailer));
550 	} else {
551 		bzero(trailer, sizeof(*trailer));
552 		trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0;
553 		trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE;
554 		trailer->msgh_sender = *task_get_sec_token(sender);
555 		trailer->msgh_audit = *task_get_audit_token(sender);
556 	}
557 
558 	ipc_kmsg_sign(kmsg, trailer);
559 }
560 
561 /*
562  * Purpose:
563  *       Validate kmsg signature.
564  */
565 mach_msg_size_t
ipc_kmsg_validate_signature(ipc_kmsg_t kmsg)566 ipc_kmsg_validate_signature(
567 	ipc_kmsg_t kmsg)
568 {
569 	uint32_t         sig;
570 	mach_msg_size_t  dsc_count;
571 
572 	ikm_require_aligned(kmsg);
573 	sig = __ipc_kmsg_sign(kmsg, ipc_kmsg_get_trailer(kmsg), &dsc_count);
574 	if (sig != kmsg->ikm_signature) {
575 		__ikm_signature_check_panic(kmsg, sig);
576 	}
577 
578 	return dsc_count;
579 }
580 
581 void
ipc_kmsg_sign_descriptors(mach_msg_kdescriptor_t * kdesc,mach_msg_size_t dsc_count)582 ipc_kmsg_sign_descriptors(
583 	mach_msg_kdescriptor_t *kdesc,
584 	mach_msg_size_t         dsc_count)
585 {
586 #if __has_feature(ptrauth_calls)
587 	for (mach_msg_size_t i = 0; i < dsc_count; i++, kdesc++) {
588 		switch (mach_msg_kdescriptor_type(kdesc)) {
589 		case MACH_MSG_PORT_DESCRIPTOR:
590 			kdesc->kdesc_port.name =
591 			    kdesc->kdesc_port.kext_name;
592 			break;
593 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
594 		case MACH_MSG_OOL_DESCRIPTOR:
595 			kdesc->kdesc_memory.address =
596 			    kdesc->kdesc_memory.kext_address;
597 			break;
598 		case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
599 			mach_msg_ool_ports_descriptor_t *dsc = &kdesc->kdesc_port_array;
600 			ipc_port_t          *ports = dsc->kext_address;
601 			mach_port_array_t    array = dsc->kext_address;
602 
603 			for (mach_msg_size_t j = 0; j < dsc->count; j++) {
604 				array[i].port = ports[i];
605 			}
606 			dsc->address = array;
607 			break;
608 		}
609 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
610 			kdesc->kdesc_guarded_port.name =
611 			    kdesc->kdesc_guarded_port.kext_name;
612 			break;
613 		default:
614 			__ipc_kmsg_descriptor_invalid_type_panic(kdesc);
615 		}
616 	}
617 #else
618 #pragma unused(kdesc, dsc_count)
619 #endif /* __has_feature(ptrauth_calls) */
620 }
621 
622 static void
ipc_kmsg_relocate_descriptors(mach_msg_kdescriptor_t * dst_dsc,const mach_msg_kdescriptor_t * src_dsc,mach_msg_size_t dsc_count)623 ipc_kmsg_relocate_descriptors(
624 	mach_msg_kdescriptor_t *dst_dsc,
625 	const mach_msg_kdescriptor_t *src_dsc,
626 	mach_msg_size_t         dsc_count)
627 {
628 #if __has_feature(ptrauth_calls)
629 	for (mach_msg_size_t i = 0; i < dsc_count; i++, dst_dsc++, src_dsc++) {
630 		switch (mach_msg_kdescriptor_type(src_dsc)) {
631 		case MACH_MSG_PORT_DESCRIPTOR:
632 			dst_dsc->kdesc_port.name =
633 			    src_dsc->kdesc_port.name;
634 			break;
635 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
636 		case MACH_MSG_OOL_DESCRIPTOR:
637 			dst_dsc->kdesc_memory.address =
638 			    src_dsc->kdesc_memory.address;
639 			break;
640 		case MACH_MSG_OOL_PORTS_DESCRIPTOR:
641 			dst_dsc->kdesc_port_array.address =
642 			    src_dsc->kdesc_port_array.address;
643 			break;
644 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
645 			dst_dsc->kdesc_guarded_port.name =
646 			    src_dsc->kdesc_guarded_port.name;
647 			break;
648 		default:
649 			__ipc_kmsg_descriptor_invalid_type_panic(src_dsc);
650 		}
651 	}
652 #else
653 #pragma unused(dst_dsc, src_dsc, dsc_count)
654 #endif /* __has_feature(ptrauth_calls) */
655 }
656 
657 static void
ipc_kmsg_strip_descriptors(mach_msg_kdescriptor_t * dst_dsc,const mach_msg_kdescriptor_t * src_dsc,mach_msg_size_t dsc_count)658 ipc_kmsg_strip_descriptors(
659 	mach_msg_kdescriptor_t *dst_dsc,
660 	const mach_msg_kdescriptor_t *src_dsc,
661 	mach_msg_size_t         dsc_count)
662 {
663 #if __has_feature(ptrauth_calls)
664 	for (mach_msg_size_t i = 0; i < dsc_count; i++, dst_dsc++, src_dsc++) {
665 		switch (mach_msg_kdescriptor_type(src_dsc)) {
666 		case MACH_MSG_PORT_DESCRIPTOR:
667 			dst_dsc->kdesc_port.kext_name =
668 			    src_dsc->kdesc_port.name;
669 			break;
670 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
671 		case MACH_MSG_OOL_DESCRIPTOR:
672 			dst_dsc->kdesc_memory.kext_address =
673 			    src_dsc->kdesc_memory.address;
674 			break;
675 		case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
676 			mach_msg_ool_ports_descriptor_t *dsc = &dst_dsc->kdesc_port_array;
677 			ipc_port_t          *ports = dsc->address;
678 			mach_port_array_t    array = dsc->address;
679 
680 			for (mach_msg_size_t j = 0; j < dsc->count; j++) {
681 				ports[i] = array[i].port;
682 			}
683 			dsc->kext_address = array;
684 			break;
685 		}
686 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
687 			dst_dsc->kdesc_guarded_port.kext_name =
688 			    src_dsc->kdesc_guarded_port.name;
689 			break;
690 		default:
691 			__ipc_kmsg_descriptor_invalid_type_panic(src_dsc);
692 		}
693 	}
694 #else
695 #pragma unused(dst_dsc, src_dsc, dsc_count)
696 #endif /* __has_feature(ptrauth_calls) */
697 }
698 
699 
700 #pragma mark ipc_kmsg alloc/clean/free
701 
702 static inline void *
ikm_alloc_kdata_ool(size_t size,zalloc_flags_t flags)703 ikm_alloc_kdata_ool(size_t size, zalloc_flags_t flags)
704 {
705 	return kalloc_type_var_impl(KT_IPC_KMSG_KDATA_OOL,
706 	           size, flags, NULL);
707 }
708 
709 static inline void
ikm_free_kdata_ool(void * ptr,size_t size)710 ikm_free_kdata_ool(void *ptr, size_t size)
711 {
712 	kfree_type_var_impl(KT_IPC_KMSG_KDATA_OOL, ptr, size);
713 }
714 
715 /*
716  *	Routine:	ipc_kmsg_alloc
717  *	Purpose:
718  *		Allocate a kernel message structure.  If the
719  *		message is scalar and all the data resides inline, that is best.
720  *      Otherwise, allocate out of line buffers to fit the message and
721  *      the optional auxiliary data.
722  *
723  *	Conditions:
724  *		Nothing locked.
725  *
726  *      kmsg_size doesn't take the trailer or descriptor
727  *		inflation into account, but already accounts for the mach
728  *		message header expansion.
729  */
730 ipc_kmsg_t
ipc_kmsg_alloc(mach_msg_size_t kmsg_size,mach_msg_size_t aux_size,mach_msg_size_t desc_count,ipc_kmsg_alloc_flags_t flags)731 ipc_kmsg_alloc(
732 	mach_msg_size_t         kmsg_size,
733 	mach_msg_size_t         aux_size,
734 	mach_msg_size_t         desc_count,
735 	ipc_kmsg_alloc_flags_t  flags)
736 {
737 	mach_msg_size_t max_kmsg_size, max_delta, max_kdata_size,
738 	    max_udata_size, max_kmsg_and_aux_size;
739 	ipc_kmsg_t kmsg;
740 
741 	void *msg_kdata = NULL, *msg_udata = NULL;
742 	zalloc_flags_t alloc_flags = Z_WAITOK;
743 	ipc_kmsg_type_t kmsg_type;
744 
745 	/*
746 	 * In kernel descriptors, are of the same size (KERNEL_DESC_SIZE),
747 	 * but in userspace, depending on 64-bitness, descriptors might be
748 	 * smaller.
749 	 *
750 	 * When handling a userspace message however, we know how many
751 	 * descriptors have been declared, and we pad for the maximum expansion.
752 	 *
753 	 * During descriptor expansion, message header stays at the same place
754 	 * while everything after it gets shifted to higher address.
755 	 */
756 	if (flags & IPC_KMSG_ALLOC_KERNEL) {
757 		assert(aux_size == 0);
758 		max_delta = 0;
759 	} else if (os_mul_and_add_overflow(desc_count, USER_DESC_MAX_DELTA,
760 	    USER_HEADER_SIZE_DELTA, &max_delta)) {
761 		return IKM_NULL;
762 	}
763 
764 	if (os_add3_overflow(kmsg_size, MAX_TRAILER_SIZE, max_delta, &max_kmsg_size)) {
765 		return IKM_NULL;
766 	}
767 	if (os_add_overflow(max_kmsg_size, aux_size, &max_kmsg_and_aux_size)) {
768 		return IKM_NULL;
769 	}
770 
771 	/* First, determine the layout of the kmsg to allocate */
772 	if (max_kmsg_and_aux_size <= IKM_BIG_MSG_SIZE) {
773 		kmsg_type = IKM_TYPE_ALL_INLINED;
774 		max_udata_size = 0;
775 		max_kdata_size = 0;
776 	} else if (flags & IPC_KMSG_ALLOC_ALL_INLINE) {
777 		panic("size too large for the fast kmsg zone (%d)", kmsg_size);
778 	} else if (flags & IPC_KMSG_ALLOC_LINEAR) {
779 		/*
780 		 * Caller sets MACH64_SEND_KOBJECT_CALL or MACH64_SEND_ANY, or that
781 		 * the call originates from kernel, or it's a mach_msg() call.
782 		 * In any case, message does not carry aux data.
783 		 * We have validated mach_msg2() call options in mach_msg2_trap().
784 		 */
785 		if (aux_size != 0) {
786 			panic("non-zero aux size for kmsg type IKM_TYPE_KDATA_OOL.");
787 		}
788 		kmsg_type = IKM_TYPE_KDATA_OOL;
789 		max_udata_size = 0;
790 		max_kdata_size = max_kmsg_size;
791 	} else {
792 		mach_msg_size_t min_kdata_size;
793 
794 		/*
795 		 * If message can be splitted from the middle, IOW does not need to
796 		 * occupy contiguous memory space, sequester (header + descriptors)
797 		 * from (content + trailer + aux) for memory security.
798 		 */
799 		assert(max_kmsg_and_aux_size > IKM_BIG_MSG_SIZE);
800 
801 		/*
802 		 * max_kdata_size: Maximum combined size of header plus (optional) descriptors.
803 		 * This is _base_ size + descriptor count * kernel descriptor size.
804 		 */
805 		if (os_mul_and_add_overflow(desc_count, KERNEL_DESC_SIZE,
806 		    sizeof(mach_msg_base_t), &max_kdata_size)) {
807 			return IKM_NULL;
808 		}
809 
810 		/*
811 		 * min_kdata_size: Minimum combined size of header plus (optional) descriptors.
812 		 * This is _header_ size + descriptor count * minimal descriptor size.
813 		 */
814 		mach_msg_size_t min_size = (flags & IPC_KMSG_ALLOC_KERNEL) ?
815 		    KERNEL_DESC_SIZE : USER_DESC_SIZE_MIN;
816 		if (os_mul_and_add_overflow(desc_count, min_size,
817 		    sizeof(mach_msg_header_t), &min_kdata_size)) {
818 			return IKM_NULL;
819 		}
820 
821 		/*
822 		 * max_udata_size: Maximum combined size of message content, trailer and aux.
823 		 * This is total kmsg and aux size (already accounts for max trailer size) minus
824 		 * _minimum_ (header + descs) size.
825 		 */
826 		if (os_sub_overflow(max_kmsg_and_aux_size, min_kdata_size, &max_udata_size)) {
827 			return IKM_NULL;
828 		}
829 
830 		if (max_kdata_size <= IKM_SMALL_MSG_SIZE) {
831 			kmsg_type = IKM_TYPE_UDATA_OOL;
832 		} else {
833 			kmsg_type = IKM_TYPE_ALL_OOL;
834 		}
835 	}
836 
837 	if (flags & IPC_KMSG_ALLOC_ZERO) {
838 		alloc_flags |= Z_ZERO;
839 	}
840 	if (flags & IPC_KMSG_ALLOC_NOFAIL) {
841 		alloc_flags |= Z_NOFAIL;
842 	}
843 
844 	/* Then, allocate memory for both udata and kdata if needed, as well as kmsg */
845 	if (max_udata_size > 0) {
846 		msg_udata = kalloc_data(max_udata_size, alloc_flags);
847 		if (__improbable(msg_udata == NULL)) {
848 			return IKM_NULL;
849 		}
850 	}
851 
852 	if (kmsg_type == IKM_TYPE_ALL_OOL || kmsg_type == IKM_TYPE_KDATA_OOL) {
853 		if (kmsg_type == IKM_TYPE_ALL_OOL) {
854 			msg_kdata = kalloc_type(mach_msg_base_t, mach_msg_kdescriptor_t,
855 			    desc_count, alloc_flags | Z_SPRAYQTN);
856 		} else {
857 			msg_kdata = ikm_alloc_kdata_ool(max_kdata_size, alloc_flags);
858 		}
859 
860 		if (__improbable(msg_kdata == NULL)) {
861 			kfree_data(msg_udata, max_udata_size);
862 			return IKM_NULL;
863 		}
864 	}
865 
866 	static_assert(IPC_KMSG_MAX_AUX_DATA_SPACE <= UINT16_MAX,
867 	    "casting aux_size won't truncate");
868 
869 	kmsg = zalloc_id(ZONE_ID_IPC_KMSG, Z_WAITOK | Z_ZERO | Z_NOFAIL);
870 	kmsg->ikm_type = kmsg_type;
871 	kmsg->ikm_aux_size = (uint16_t)aux_size;
872 
873 	if (flags & IPC_KMSG_ALLOC_USE_KEEP_ALIVE) {
874 		assert(kmsg_type == IKM_TYPE_ALL_INLINED);
875 		kmsg->ikm_keep_alive = IKM_KEEP_ALIVE_OWNED;
876 	}
877 
878 	/* Finally, set up pointers properly */
879 	if (kmsg_type == IKM_TYPE_ALL_INLINED) {
880 		assert(msg_udata == NULL && msg_kdata == NULL);
881 	} else {
882 		if (kmsg_type == IKM_TYPE_UDATA_OOL) {
883 			kmsg->ikm_kdata = kmsg->ikm_small_data;
884 		} else {
885 			kmsg->ikm_kdata = msg_kdata;
886 		}
887 		kmsg->ikm_udata = msg_udata;
888 		kmsg->ikm_kdata_size = max_kdata_size;
889 		kmsg->ikm_udata_size = max_udata_size;
890 	}
891 
892 	return kmsg;
893 }
894 
895 /* re-export for IOKit's c++ */
896 extern ipc_kmsg_t ipc_kmsg_alloc_uext_reply(mach_msg_size_t);
897 
898 ipc_kmsg_t
ipc_kmsg_alloc_uext_reply(mach_msg_size_t size)899 ipc_kmsg_alloc_uext_reply(
900 	mach_msg_size_t         size)
901 {
902 	return ipc_kmsg_alloc(size, 0, 0, IPC_KMSG_ALLOC_KERNEL | IPC_KMSG_ALLOC_LINEAR |
903 	           IPC_KMSG_ALLOC_ZERO | IPC_KMSG_ALLOC_NOFAIL);
904 }
905 
906 /*
907  *	Routine:	ipc_kmsg_keep_alive_try_reusing()
908  *	Purpose:
909  *		Attempt to mark a preallocated message in-use.
910  *		Returns true on success, false on failure.
911  */
912 bool
ipc_kmsg_keep_alive_try_reusing(ipc_kmsg_t kmsg)913 ipc_kmsg_keep_alive_try_reusing(ipc_kmsg_t kmsg)
914 {
915 	uintptr_t v;
916 
917 	v = os_atomic_or_orig(&kmsg->ikm_keep_alive,
918 	    IKM_KEEP_ALIVE_IN_USE, relaxed);
919 
920 	/* if the message isn't owned, it can't use keep-alive */
921 	ipc_release_assert(v & IKM_KEEP_ALIVE_OWNED);
922 
923 	return (v & IKM_KEEP_ALIVE_IN_USE) == 0;
924 }
925 
926 /*
927  *	Routine:	ipc_kmsg_keep_alive_done_using
928  *	Purpose:
929  *		Marks an ipc kmsg as no longer in flight.
930  *		Returns true if the message is also no longer owned.
931  */
932 static bool
ipc_kmsg_keep_alive_done_using(ipc_kmsg_t kmsg)933 ipc_kmsg_keep_alive_done_using(ipc_kmsg_t kmsg)
934 {
935 	uintptr_t v = os_atomic_load(&kmsg->ikm_keep_alive, relaxed);
936 
937 	if (v == IKM_KEEP_ALIVE_NONE) {
938 		/* fastpath for most messages not using the facility */
939 		return true;
940 	}
941 
942 	v = os_atomic_andnot_orig(&kmsg->ikm_keep_alive,
943 	    IKM_KEEP_ALIVE_IN_USE, release);
944 
945 	/* if the message wasn't in-use, something is wrong */
946 	ipc_release_assert(v & IKM_KEEP_ALIVE_IN_USE);
947 
948 	if (v & IKM_KEEP_ALIVE_OWNED) {
949 		return false;
950 	}
951 	os_atomic_thread_fence(acquire);
952 	return true;
953 }
954 
955 /*
956  *	Routine:	ipc_kmsg_keep_alive_abandon()
957  *	Purpose:
958  *		Abandons a message that was marked as OWNED
959  *		as part of allocating it with IPC_KMSG_ALLOC_USE_KEEP_ALIVE.
960  */
961 void
ipc_kmsg_keep_alive_abandon(ipc_kmsg_t kmsg)962 ipc_kmsg_keep_alive_abandon(
963 	ipc_kmsg_t              kmsg)
964 {
965 	uintptr_t v;
966 
967 	v = os_atomic_andnot_orig(&kmsg->ikm_keep_alive,
968 	    IKM_KEEP_ALIVE_OWNED, release);
969 
970 	/* if the message wasn't owned, something is wrong */
971 	ipc_release_assert(v & IKM_KEEP_ALIVE_OWNED);
972 
973 	if ((v & IKM_KEEP_ALIVE_IN_USE) == 0) {
974 		os_atomic_thread_fence(acquire);
975 		ipc_kmsg_free(kmsg);
976 	}
977 }
978 
979 /*
980  *	Routine:	ipc_kmsg_free_allocations
981  *	Purpose:
982  *		Free external allocations of a kmsg.
983  *	Conditions:
984  *		Nothing locked.
985  */
986 static void
ipc_kmsg_free_allocations(ipc_kmsg_t kmsg)987 ipc_kmsg_free_allocations(
988 	ipc_kmsg_t              kmsg)
989 {
990 	mach_msg_size_t dsc_count = 0;
991 
992 	switch (kmsg->ikm_type) {
993 	case IKM_TYPE_ALL_INLINED:
994 		break;
995 	case IKM_TYPE_UDATA_OOL:
996 		kfree_data(kmsg->ikm_udata, kmsg->ikm_udata_size);
997 		/* kdata is inlined, udata freed */
998 		break;
999 	case IKM_TYPE_KDATA_OOL:
1000 		ikm_free_kdata_ool(kmsg->ikm_kdata, kmsg->ikm_kdata_size);
1001 		/* kdata freed, no udata */
1002 		break;
1003 	case IKM_TYPE_ALL_OOL:
1004 		dsc_count = (kmsg->ikm_kdata_size - sizeof(mach_msg_base_t)) /
1005 		    KERNEL_DESC_SIZE;
1006 		kfree_type(mach_msg_base_t, mach_msg_kdescriptor_t, dsc_count,
1007 		    kmsg->ikm_kdata);
1008 		/* kdata freed */
1009 		kfree_data(kmsg->ikm_udata, kmsg->ikm_udata_size);
1010 		/* udata freed */
1011 		break;
1012 	default:
1013 		panic("strange kmsg type");
1014 	}
1015 	kmsg->ikm_type = IKM_TYPE_ALL_INLINED;
1016 
1017 	/* leave nothing dangling or causing out of bounds */
1018 	kmsg->ikm_udata = NULL;
1019 	kmsg->ikm_kdata = NULL;
1020 	kmsg->ikm_udata_size = 0;
1021 	kmsg->ikm_kdata_size = 0;
1022 	kmsg->ikm_aux_size = 0;
1023 }
1024 
1025 /*
1026  *	Routine:	ipc_kmsg_free
1027  *	Purpose:
1028  *		Free a kernel message (and udata) buffer.
1029  *	Conditions:
1030  *		Nothing locked.
1031  */
1032 void
ipc_kmsg_free(ipc_kmsg_t kmsg)1033 ipc_kmsg_free(
1034 	ipc_kmsg_t      kmsg)
1035 {
1036 	assert(!IP_VALID(ipc_kmsg_get_voucher_port(kmsg)));
1037 
1038 	KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_FREE) | DBG_FUNC_NONE,
1039 	    VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
1040 	    0, 0, 0, 0);
1041 
1042 	/*
1043 	 * Check to see if an mk_timer asked for this message to stay
1044 	 * alive.
1045 	 */
1046 	if (kmsg->ikm_type == IKM_TYPE_ALL_INLINED &&
1047 	    !ipc_kmsg_keep_alive_done_using(kmsg)) {
1048 		return;
1049 	}
1050 
1051 	ipc_kmsg_free_allocations(kmsg);
1052 	zfree_id(ZONE_ID_IPC_KMSG, kmsg);
1053 	/* kmsg struct freed */
1054 }
1055 
1056 /*
1057  *	Routine:	ipc_kmsg_clean_header
1058  *	Purpose:
1059  *		Cleans the header of a kmsg.
1060  *	Conditions:
1061  *		Nothing locked.
1062  */
1063 static void
ipc_kmsg_clean_header(ipc_kmsg_t kmsg)1064 ipc_kmsg_clean_header(
1065 	ipc_kmsg_t              kmsg)
1066 {
1067 	ipc_object_t object;
1068 	mach_msg_header_t *hdr = ikm_header(kmsg);
1069 	mach_msg_bits_t mbits = hdr->msgh_bits;
1070 
1071 	/* deal with importance chain while we still have dest and voucher references */
1072 	ipc_importance_clean(kmsg);
1073 
1074 	object = ip_to_object(hdr->msgh_remote_port);
1075 	if (IO_VALID(object)) {
1076 		ipc_object_destroy_dest(object, MACH_MSGH_BITS_REMOTE(mbits));
1077 	}
1078 
1079 	object = ip_to_object(hdr->msgh_local_port);
1080 	if (IO_VALID(object)) {
1081 		ipc_object_destroy(object, MACH_MSGH_BITS_LOCAL(mbits));
1082 	}
1083 
1084 	object = ip_to_object(ipc_kmsg_get_voucher_port(kmsg));
1085 	if (IO_VALID(object)) {
1086 		assert(MACH_MSGH_BITS_VOUCHER(mbits) == MACH_MSG_TYPE_MOVE_SEND);
1087 		ipc_object_destroy(object, MACH_MSG_TYPE_PORT_SEND);
1088 		ipc_kmsg_clear_voucher_port(kmsg);
1089 	}
1090 }
1091 
1092 /*
1093  *	Routine:	ipc_kmsg_clean_descriptors
1094  *	Purpose:
1095  *		Cleans the body of a kernel message.
1096  *		Releases all rights, references, and memory.
1097  *
1098  *	Conditions:
1099  *		No locks held.
1100  */
1101 void
ipc_kmsg_clean_descriptors(mach_msg_kdescriptor_t * kdesc __counted_by (number),mach_msg_type_number_t number)1102 ipc_kmsg_clean_descriptors(
1103 	mach_msg_kdescriptor_t *kdesc __counted_by(number),
1104 	mach_msg_type_number_t  number)
1105 {
1106 	for (mach_msg_type_number_t i = 0; i < number; i++, kdesc++) {
1107 		switch (mach_msg_kdescriptor_type(kdesc)) {
1108 		case MACH_MSG_PORT_DESCRIPTOR: {
1109 			mach_msg_port_descriptor_t *dsc = &kdesc->kdesc_port;
1110 
1111 			/*
1112 			 * Destroy port rights carried in the message
1113 			 */
1114 			if (IP_VALID(dsc->name)) {
1115 				ipc_object_destroy(ip_to_object(dsc->name),
1116 				    dsc->disposition);
1117 				dsc->name = IP_NULL;
1118 			}
1119 			break;
1120 		}
1121 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
1122 		case MACH_MSG_OOL_DESCRIPTOR: {
1123 			mach_msg_ool_descriptor_t *dsc = &kdesc->kdesc_memory;
1124 			vm_map_copy_t copy = dsc->address;
1125 
1126 			/*
1127 			 * Destroy memory carried in the message
1128 			 */
1129 			if (copy) {
1130 				vm_map_copy_discard(copy);
1131 				dsc->address = NULL;
1132 			} else {
1133 				assert(dsc->size == 0);
1134 			}
1135 			break;
1136 		}
1137 		case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
1138 			mach_msg_ool_ports_descriptor_t *dsc = &kdesc->kdesc_port_array;
1139 			mach_port_array_t array = dsc->address;
1140 
1141 			for (mach_msg_size_t j = 0; j < dsc->count; j++) {
1142 				ipc_port_t port = array[j].port;
1143 
1144 				if (IP_VALID(port)) {
1145 					ipc_object_destroy(ip_to_object(port),
1146 					    dsc->disposition);
1147 				}
1148 			}
1149 			if (array) {
1150 				mach_port_array_free(array, dsc->count);
1151 				dsc->address = NULL;
1152 			} else {
1153 				assert(dsc->count == 0);
1154 			}
1155 			break;
1156 		}
1157 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR: {
1158 			mach_msg_guarded_port_descriptor_t *dsc = &kdesc->kdesc_guarded_port;
1159 
1160 			/*
1161 			 * Destroy port rights carried in the message
1162 			 */
1163 			if (IP_VALID(dsc->name)) {
1164 				ipc_object_destroy(ip_to_object(dsc->name),
1165 				    dsc->disposition);
1166 				dsc->name = IP_NULL;
1167 			}
1168 			break;
1169 		}
1170 		default:
1171 			__ipc_kmsg_descriptor_invalid_type_panic(kdesc);
1172 		}
1173 	}
1174 }
1175 
1176 /*
1177  *	Routine:	ipc_kmsg_clean
1178  *	Purpose:
1179  *		Cleans a kernel message.  Releases all rights,
1180  *		references, and memory held by the message.
1181  *	Conditions:
1182  *		No locks held.
1183  */
1184 
1185 static void
ipc_kmsg_clean(ipc_kmsg_t kmsg,mach_msg_size_t dsc_count)1186 ipc_kmsg_clean(ipc_kmsg_t kmsg, mach_msg_size_t dsc_count)
1187 {
1188 	ipc_kmsg_clean_header(kmsg);
1189 
1190 	if (dsc_count) {
1191 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(ikm_header(kmsg));
1192 
1193 		ipc_kmsg_clean_descriptors(kbase->msgb_dsc_array, dsc_count);
1194 	}
1195 }
1196 
1197 
1198 #pragma mark ipc_kmsg enqueue/destroy, qos, priority, voucher, ...
1199 
1200 /* we can't include the BSD <sys/persona.h> header here... */
1201 #ifndef PERSONA_ID_NONE
1202 #define PERSONA_ID_NONE ((uint32_t)-1)
1203 #endif
1204 
1205 /*
1206  *	Routine:	ipc_kmsg_enqueue_qos
1207  *	Purpose:
1208  *		Enqueue a kmsg, propagating qos
1209  *		overrides towards the head of the queue.
1210  *
1211  *	Returns:
1212  *		whether the head of the queue had
1213  *		it's override-qos adjusted because
1214  *		of this insertion.
1215  */
1216 
1217 bool
ipc_kmsg_enqueue_qos(ipc_kmsg_queue_t queue,ipc_kmsg_t kmsg)1218 ipc_kmsg_enqueue_qos(
1219 	ipc_kmsg_queue_t        queue,
1220 	ipc_kmsg_t              kmsg)
1221 {
1222 	mach_msg_qos_t qos_ovr = kmsg->ikm_qos_override;
1223 	ipc_kmsg_t     prev;
1224 
1225 	if (ipc_kmsg_enqueue(queue, kmsg)) {
1226 		return true;
1227 	}
1228 
1229 	/* apply QoS overrides towards the head */
1230 	prev = ipc_kmsg_queue_element(kmsg->ikm_link.prev);
1231 	while (prev != kmsg) {
1232 		if (qos_ovr <= prev->ikm_qos_override) {
1233 			return false;
1234 		}
1235 		prev->ikm_qos_override = qos_ovr;
1236 		prev = ipc_kmsg_queue_element(prev->ikm_link.prev);
1237 	}
1238 
1239 	return true;
1240 }
1241 
1242 /*
1243  *	Routine:	ipc_kmsg_override_qos
1244  *	Purpose:
1245  *		Update the override for a given kmsg already
1246  *		enqueued, propagating qos override adjustments
1247  *		towards	the head of the queue.
1248  *
1249  *	Returns:
1250  *		whether the head of the queue had
1251  *		it's override-qos adjusted because
1252  *		of this insertion.
1253  */
1254 
1255 bool
ipc_kmsg_override_qos(ipc_kmsg_queue_t queue,ipc_kmsg_t kmsg,mach_msg_qos_t qos_ovr)1256 ipc_kmsg_override_qos(
1257 	ipc_kmsg_queue_t    queue,
1258 	ipc_kmsg_t          kmsg,
1259 	mach_msg_qos_t      qos_ovr)
1260 {
1261 	ipc_kmsg_t first = ipc_kmsg_queue_first(queue);
1262 	ipc_kmsg_t cur = kmsg;
1263 
1264 	/* apply QoS overrides towards the head */
1265 	while (qos_ovr > cur->ikm_qos_override) {
1266 		cur->ikm_qos_override = qos_ovr;
1267 		if (cur == first) {
1268 			return true;
1269 		}
1270 		cur = ipc_kmsg_queue_element(cur->ikm_link.prev);
1271 	}
1272 
1273 	return false;
1274 }
1275 
1276 /*
1277  *	Routine:	ipc_kmsg_destroy
1278  *	Purpose:
1279  *		Destroys a kernel message.  Releases all rights,
1280  *		references, and memory held by the message.
1281  *		Frees the message.
1282  *	Conditions:
1283  *		No locks held.
1284  */
1285 
1286 void
ipc_kmsg_destroy(ipc_kmsg_t kmsg,ipc_kmsg_destroy_flags_t flags)1287 ipc_kmsg_destroy(
1288 	ipc_kmsg_t                     kmsg,
1289 	ipc_kmsg_destroy_flags_t       flags)
1290 {
1291 	/* sign the msg if it has not been signed */
1292 	boolean_t sign_msg = (flags & IPC_KMSG_DESTROY_NOT_SIGNED);
1293 	mach_msg_header_t *hdr = ikm_header(kmsg);
1294 
1295 	if (flags & IPC_KMSG_DESTROY_SKIP_REMOTE) {
1296 		hdr->msgh_remote_port = MACH_PORT_NULL;
1297 		/* re-sign the msg since content changed */
1298 		sign_msg = true;
1299 	}
1300 
1301 	if (flags & IPC_KMSG_DESTROY_SKIP_LOCAL) {
1302 		hdr->msgh_local_port = MACH_PORT_NULL;
1303 		/* re-sign the msg since content changed */
1304 		sign_msg = true;
1305 	}
1306 
1307 	if (sign_msg) {
1308 		ipc_kmsg_sign(kmsg, ipc_kmsg_get_trailer(kmsg));
1309 	}
1310 
1311 	/*
1312 	 *	Destroying a message can cause more messages to be destroyed.
1313 	 *	Curtail recursion by putting messages on the deferred
1314 	 *	destruction queue.  If this was the first message on the
1315 	 *	queue, this instance must process the full queue.
1316 	 */
1317 	if (ipc_kmsg_delayed_destroy(kmsg)) {
1318 		ipc_kmsg_reap_delayed();
1319 	}
1320 }
1321 
1322 /*
1323  *	Routine:	ipc_kmsg_delayed_destroy
1324  *	Purpose:
1325  *		Enqueues a kernel message for deferred destruction.
1326  *	Returns:
1327  *		Boolean indicator that the caller is responsible to reap
1328  *		deferred messages.
1329  */
1330 
1331 bool
ipc_kmsg_delayed_destroy(ipc_kmsg_t kmsg)1332 ipc_kmsg_delayed_destroy(
1333 	ipc_kmsg_t kmsg)
1334 {
1335 	return ipc_kmsg_enqueue(&current_thread()->ith_messages, kmsg);
1336 }
1337 
1338 /*
1339  *	Routine:	ipc_kmsg_delayed_destroy_queue
1340  *	Purpose:
1341  *		Enqueues a queue of kernel messages for deferred destruction.
1342  *	Returns:
1343  *		Boolean indicator that the caller is responsible to reap
1344  *		deferred messages.
1345  */
1346 
1347 bool
ipc_kmsg_delayed_destroy_queue(ipc_kmsg_queue_t queue)1348 ipc_kmsg_delayed_destroy_queue(
1349 	ipc_kmsg_queue_t        queue)
1350 {
1351 	return circle_queue_concat_tail(&current_thread()->ith_messages, queue);
1352 }
1353 
1354 /*
1355  *	Routine:	ipc_kmsg_reap_delayed
1356  *	Purpose:
1357  *		Destroys messages from the per-thread
1358  *		deferred reaping queue.
1359  *	Conditions:
1360  *		No locks held. kmsgs on queue must be signed.
1361  */
1362 
1363 void
ipc_kmsg_reap_delayed(void)1364 ipc_kmsg_reap_delayed(void)
1365 {
1366 	ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
1367 	ipc_kmsg_t kmsg;
1368 
1369 	/*
1370 	 * must leave kmsg in queue while cleaning it to assure
1371 	 * no nested calls recurse into here.
1372 	 */
1373 	while ((kmsg = ipc_kmsg_queue_first(queue)) != IKM_NULL) {
1374 		/*
1375 		 * Kmsgs queued for delayed destruction either come from
1376 		 * ipc_kmsg_destroy() or ipc_kmsg_delayed_destroy_queue(),
1377 		 * where we handover all kmsgs enqueued on port to destruction
1378 		 * queue in O(1). In either case, all kmsgs must have been
1379 		 * signed.
1380 		 *
1381 		 * For each unreceived msg, validate its signature before freeing.
1382 		 */
1383 		ipc_kmsg_clean(kmsg, ipc_kmsg_validate_signature(kmsg));
1384 		ipc_kmsg_rmqueue(queue, kmsg);
1385 		ipc_kmsg_free(kmsg);
1386 	}
1387 }
1388 
1389 static pthread_priority_compact_t
ipc_get_current_thread_priority(void)1390 ipc_get_current_thread_priority(void)
1391 {
1392 	thread_t thread = current_thread();
1393 	thread_qos_t qos;
1394 	int relpri;
1395 
1396 	qos = thread_get_requested_qos(thread, &relpri);
1397 	if (!qos) {
1398 		qos = thread_user_promotion_qos_for_pri(thread->base_pri);
1399 		relpri = 0;
1400 	}
1401 	return _pthread_priority_make_from_thread_qos(qos, relpri, 0);
1402 }
1403 
1404 static kern_return_t
ipc_kmsg_set_qos(ipc_kmsg_t kmsg,mach_msg_option64_t options,mach_msg_priority_t priority)1405 ipc_kmsg_set_qos(
1406 	ipc_kmsg_t kmsg,
1407 	mach_msg_option64_t options,
1408 	mach_msg_priority_t priority)
1409 {
1410 	kern_return_t kr;
1411 	mach_msg_header_t *hdr = ikm_header(kmsg);
1412 	ipc_port_t special_reply_port = hdr->msgh_local_port;
1413 	ipc_port_t dest_port = hdr->msgh_remote_port;
1414 
1415 	if ((options & MACH_SEND_OVERRIDE) &&
1416 	    !mach_msg_priority_is_pthread_priority(priority)) {
1417 		mach_msg_qos_t qos = mach_msg_priority_qos(priority);
1418 		int relpri = mach_msg_priority_relpri(priority);
1419 		mach_msg_qos_t ovr = mach_msg_priority_overide_qos(priority);
1420 
1421 		kmsg->ikm_ppriority = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
1422 		kmsg->ikm_qos_override = MAX(qos, ovr);
1423 	} else {
1424 #if CONFIG_VOUCHER_DEPRECATED
1425 		kr = ipc_get_pthpriority_from_kmsg_voucher(kmsg, &kmsg->ikm_ppriority);
1426 #else
1427 		kr = KERN_FAILURE;
1428 #endif /* CONFIG_VOUCHER_DEPRECATED */
1429 		if (kr != KERN_SUCCESS) {
1430 			if (options & MACH_SEND_PROPAGATE_QOS) {
1431 				kmsg->ikm_ppriority = ipc_get_current_thread_priority();
1432 			} else {
1433 				kmsg->ikm_ppriority = MACH_MSG_PRIORITY_UNSPECIFIED;
1434 			}
1435 		}
1436 
1437 		if (options & MACH_SEND_OVERRIDE) {
1438 			mach_msg_qos_t qos = _pthread_priority_thread_qos(kmsg->ikm_ppriority);
1439 			mach_msg_qos_t ovr = _pthread_priority_thread_qos(priority);
1440 			kmsg->ikm_qos_override = MAX(qos, ovr);
1441 		} else {
1442 			kmsg->ikm_qos_override = _pthread_priority_thread_qos(kmsg->ikm_ppriority);
1443 		}
1444 	}
1445 
1446 	kr = KERN_SUCCESS;
1447 
1448 	if (IP_VALID(special_reply_port) &&
1449 	    special_reply_port->ip_specialreply &&
1450 	    !ip_is_kobject(dest_port) &&
1451 	    MACH_MSGH_BITS_LOCAL(hdr->msgh_bits) == MACH_MSG_TYPE_PORT_SEND_ONCE) {
1452 		boolean_t sync_bootstrap_checkin = !!(options & MACH_SEND_SYNC_BOOTSTRAP_CHECKIN);
1453 		/*
1454 		 * Link the destination port to special reply port and make sure that
1455 		 * dest port has a send turnstile, else allocate one.
1456 		 */
1457 		ipc_port_link_special_reply_port(special_reply_port, dest_port, sync_bootstrap_checkin);
1458 	}
1459 	return kr;
1460 }
1461 
1462 static kern_return_t
ipc_kmsg_set_qos_kernel(ipc_kmsg_t kmsg)1463 ipc_kmsg_set_qos_kernel(
1464 	ipc_kmsg_t kmsg)
1465 {
1466 	ipc_port_t dest_port = ikm_header(kmsg)->msgh_remote_port;
1467 	kmsg->ikm_qos_override = dest_port->ip_kernel_qos_override;
1468 	kmsg->ikm_ppriority = _pthread_priority_make_from_thread_qos(kmsg->ikm_qos_override, 0, 0);
1469 	return KERN_SUCCESS;
1470 }
1471 
1472 /*
1473  *	Routine:	ipc_kmsg_link_reply_context_locked
1474  *	Purpose:
1475  *		Link any required context from the sending voucher
1476  *		to the reply port. The ipc_kmsg_copyin_from_user function will
1477  *		enforce that the sender calls mach_msg in this context.
1478  *	Conditions:
1479  *		reply port is locked
1480  */
1481 static void
ipc_kmsg_link_reply_context_locked(ipc_port_t reply_port,ipc_port_t voucher_port)1482 ipc_kmsg_link_reply_context_locked(
1483 	ipc_port_t reply_port,
1484 	ipc_port_t voucher_port)
1485 {
1486 	kern_return_t __assert_only kr;
1487 	uint32_t persona_id = 0;
1488 	ipc_voucher_t voucher;
1489 
1490 	ip_mq_lock_held(reply_port);
1491 
1492 	if (!ip_active(reply_port)) {
1493 		return;
1494 	}
1495 
1496 	voucher = convert_port_to_voucher(voucher_port);
1497 
1498 	kr = bank_get_bank_ledger_thread_group_and_persona(voucher, NULL, NULL, &persona_id);
1499 	assert(kr == KERN_SUCCESS);
1500 	ipc_voucher_release(voucher);
1501 
1502 	if (persona_id == 0 || persona_id == PERSONA_ID_NONE) {
1503 		/* there was no persona context to record */
1504 		return;
1505 	}
1506 
1507 	/*
1508 	 * Set the persona_id as the context on the reply port.
1509 	 * This will force the thread that replies to have adopted a voucher
1510 	 * with a matching persona.
1511 	 */
1512 	reply_port->ip_reply_context = persona_id;
1513 
1514 	return;
1515 }
1516 
1517 static kern_return_t
ipc_kmsg_validate_reply_port_locked(ipc_port_t reply_port,mach_msg_option64_t options)1518 ipc_kmsg_validate_reply_port_locked(
1519 	ipc_port_t              reply_port,
1520 	mach_msg_option64_t     options)
1521 {
1522 	ip_mq_lock_held(reply_port);
1523 
1524 	if (!ip_active(reply_port)) {
1525 		/*
1526 		 * Ideally, we would enforce that the reply receive right is
1527 		 * active, but asynchronous XPC cancellation destroys the
1528 		 * receive right, so we just have to return success here.
1529 		 */
1530 		return KERN_SUCCESS;
1531 	}
1532 
1533 	if (options & MACH_SEND_MSG) {
1534 		/*
1535 		 * If the rely port is active, then it should not be
1536 		 * in-transit, and the receive right should be in the caller's
1537 		 * IPC space.
1538 		 */
1539 		if (!ip_in_space(reply_port, current_task()->itk_space)) {
1540 			return KERN_INVALID_CAPABILITY;
1541 		}
1542 
1543 		/*
1544 		 * A port used as a reply port in an RPC should have exactly 1
1545 		 * extant send-once right which we either just made or are
1546 		 * moving as part of the IPC.
1547 		 */
1548 		if (reply_port->ip_sorights != 1) {
1549 			return KERN_INVALID_CAPABILITY;
1550 		}
1551 		/*
1552 		 * XPC uses an extra send-right to keep the name of the reply
1553 		 * right around through cancellation.  That makes it harder to
1554 		 * enforce a particular semantic kere, so for now, we say that
1555 		 * you can have a maximum of 1 send right (in addition to your
1556 		 * send once right). In the future, it would be great to lock
1557 		 * this down even further.
1558 		 */
1559 		if (reply_port->ip_srights > 1) {
1560 			return KERN_INVALID_CAPABILITY;
1561 		}
1562 
1563 		/*
1564 		 * The sender can also specify that the receive right should
1565 		 * be immovable. Note that this check only applies to
1566 		 * send-only operations. Combined send/receive or rcv-only
1567 		 * operations can specify an immovable receive right by
1568 		 * opt-ing into guarded descriptors (MACH_RCV_GUARDED_DESC)
1569 		 * and using the MACH_MSG_STRICT_REPLY options flag.
1570 		 */
1571 		if (MACH_SEND_REPLY_IS_IMMOVABLE(options)) {
1572 			if (!reply_port->ip_immovable_receive) {
1573 				return KERN_INVALID_CAPABILITY;
1574 			}
1575 		}
1576 	}
1577 
1578 	/*
1579 	 * don't enforce this yet: need a better way of indicating the
1580 	 * receiver wants this...
1581 	 */
1582 #if 0
1583 	if (MACH_RCV_WITH_IMMOVABLE_REPLY(options)) {
1584 		if (!reply_port->ip_immovable_receive) {
1585 			return KERN_INVALID_CAPABILITY;
1586 		}
1587 	}
1588 #endif /* 0  */
1589 
1590 	return KERN_SUCCESS;
1591 }
1592 
1593 /*
1594  *	Routine:	ipc_kmsg_validate_reply_context_locked
1595  *	Purpose:
1596  *		Validate that the current thread is running in the context
1597  *		required by the destination port.
1598  *	Conditions:
1599  *		dest_port is locked
1600  *	Returns:
1601  *		MACH_MSG_SUCCESS on success.
1602  *		On error, an EXC_GUARD exception is also raised.
1603  *		This function *always* resets the port reply context.
1604  */
1605 static mach_msg_return_t
ipc_kmsg_validate_reply_context_locked(mach_msg_option64_t option,ipc_port_t dest_port,ipc_voucher_t voucher,mach_port_name_t voucher_name)1606 ipc_kmsg_validate_reply_context_locked(
1607 	mach_msg_option64_t option,
1608 	ipc_port_t dest_port,
1609 	ipc_voucher_t voucher,
1610 	mach_port_name_t voucher_name)
1611 {
1612 	uint32_t dest_ctx = dest_port->ip_reply_context;
1613 	dest_port->ip_reply_context = 0;
1614 
1615 	if (!ip_active(dest_port)) {
1616 		return MACH_MSG_SUCCESS;
1617 	}
1618 
1619 	if (voucher == IPC_VOUCHER_NULL || !MACH_PORT_VALID(voucher_name)) {
1620 		if ((option & MACH_SEND_KERNEL) == 0) {
1621 			mach_port_guard_exception(voucher_name, 0,
1622 			    (MPG_FLAGS_STRICT_REPLY_INVALID_VOUCHER | dest_ctx),
1623 			    kGUARD_EXC_STRICT_REPLY);
1624 		}
1625 		return MACH_SEND_INVALID_CONTEXT;
1626 	}
1627 
1628 	kern_return_t __assert_only kr;
1629 	uint32_t persona_id = 0;
1630 	kr = bank_get_bank_ledger_thread_group_and_persona(voucher, NULL, NULL, &persona_id);
1631 	assert(kr == KERN_SUCCESS);
1632 
1633 	if (dest_ctx != persona_id) {
1634 		if ((option & MACH_SEND_KERNEL) == 0) {
1635 			mach_port_guard_exception(voucher_name, 0,
1636 			    (MPG_FLAGS_STRICT_REPLY_MISMATCHED_PERSONA | ((((uint64_t)persona_id << 32) & MPG_FLAGS_STRICT_REPLY_MASK) | dest_ctx)),
1637 			    kGUARD_EXC_STRICT_REPLY);
1638 		}
1639 		return MACH_SEND_INVALID_CONTEXT;
1640 	}
1641 
1642 	return MACH_MSG_SUCCESS;
1643 }
1644 
1645 
1646 #define moved_provisional_reply_ports() \
1647 	(moved_provisional_reply_port(dest_type, dest_soright) \
1648 	|| moved_provisional_reply_port(reply_type, reply_soright) \
1649 	|| moved_provisional_reply_port(voucher_type, voucher_soright)) \
1650 
1651 
1652 #pragma mark ipc_kmsg copyin and inflate (from user)
1653 /*!
1654  * @defgroup IPC kmsg copyin and inflate functions
1655  * @{
1656  *
1657  * IPC kmsg inflate
1658  * ~~~~~~~~~~~~~~~~
1659  *
1660  * This is the operation that turns the user representation of a message,
1661  * into a message in kernel representation, without any rights.
1662  *
1663  * This is driven by @c ipc_kmsg_get_and_inflate_from_user() which will:
1664  * - convert the message header into kernel layout (mach_msg_header_t),
1665  * - convert the descriptors into kernel layout,
1666  * - copy the body bytes.
1667  *
1668  *
1669  * IPC (right) copyin
1670  * ~~~~~~~~~~~~~~~~~~
1671  *
1672  * This is the operation that turns the userspace port names and VM addresses
1673  * in to actual IPC ports and vm_map_copy_t objects.
1674  *
1675  * This is done on an IPC kmsg in "kernel representation" and just replace
1676  * userspace scalar values with kernel pointers in place.
1677  *
1678  * @c ipc_kmsg_copyin_from_user() is the function that drives the entire
1679  * inflate and copyin logic, applying various filtering at each stage.
1680  */
1681 
1682 
1683 /*
1684  * Macros to help inflate descriptors in place.
1685  *
1686  * the `addr` parameters must be of type `char *` so that the compiler
1687  * must assume these addresses alias (and they do).
1688  */
1689 #define ikm_udsc_type(addr)         __IGNORE_WCASTALIGN(((const mach_msg_type_descriptor_t *)(addr))->type)
1690 #define ikm_udsc_get(dst, addr)     __IGNORE_WCASTALIGN(*(dst) = *(const typeof(*(dst)) *)(addr))
1691 #define ikm_kdsc_zero(addr, type)   ((type *)memset(addr, 0, sizeof(type)))
1692 
1693 /*
1694  *	Routine:	ipc_kmsg_copyin_header
1695  *	Purpose:
1696  *		"Copy-in" port rights in the header of a message.
1697  *		Operates atomically; if it doesn't succeed the
1698  *		message header and the space are left untouched.
1699  *		If it does succeed the remote/local port fields
1700  *		contain object pointers instead of port names,
1701  *		and the bits field is updated.  The destination port
1702  *		will be a valid port pointer.
1703  *
1704  *	Conditions:
1705  *		Nothing locked. May add MACH64_SEND_ALWAYS option.
1706  *	Returns:
1707  *		MACH_MSG_SUCCESS	Successful copyin.
1708  *		MACH_SEND_INVALID_HEADER
1709  *			Illegal value in the message header bits.
1710  *		MACH_SEND_INVALID_DEST	The space is dead.
1711  *		MACH_SEND_INVALID_DEST	Can't copyin destination port.
1712  *			(Either KERN_INVALID_NAME or KERN_INVALID_RIGHT.)
1713  *		MACH_SEND_INVALID_REPLY	Can't copyin reply port.
1714  *			(Either KERN_INVALID_NAME or KERN_INVALID_RIGHT.)
1715  */
1716 
1717 static mach_msg_return_t
ipc_kmsg_copyin_header(ipc_kmsg_t kmsg,ipc_space_t space,mach_msg_priority_t priority,mach_msg_option64_t * option64p)1718 ipc_kmsg_copyin_header(
1719 	ipc_kmsg_t              kmsg,
1720 	ipc_space_t             space,
1721 	mach_msg_priority_t     priority,
1722 	mach_msg_option64_t     *option64p)
1723 {
1724 	mach_msg_header_t *msg = ikm_header(kmsg);
1725 	mach_msg_bits_t mbits = msg->msgh_bits & MACH_MSGH_BITS_USER;
1726 	mach_port_name_t dest_name = CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port);
1727 	mach_port_name_t reply_name = CAST_MACH_PORT_TO_NAME(msg->msgh_local_port);
1728 	mach_port_name_t voucher_name = MACH_PORT_NULL;
1729 	kern_return_t kr;
1730 
1731 	mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
1732 	mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
1733 	mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits);
1734 	ipc_object_t dest_port = IO_NULL;
1735 	ipc_object_t reply_port = IO_NULL;
1736 	ipc_port_t dest_soright = IP_NULL;
1737 	ipc_port_t dport = IP_NULL;
1738 	ipc_port_t reply_soright = IP_NULL;
1739 	ipc_port_t voucher_soright = IP_NULL;
1740 	ipc_port_t release_port = IP_NULL;
1741 	ipc_port_t voucher_port = IP_NULL;
1742 	ipc_port_t voucher_release_port = IP_NULL;
1743 	ipc_entry_t dest_entry = IE_NULL;
1744 	ipc_entry_t reply_entry = IE_NULL;
1745 	ipc_entry_t voucher_entry = IE_NULL;
1746 	ipc_object_copyin_flags_t dest_flags = IPC_OBJECT_COPYIN_FLAGS_ALLOW_REPLY_MAKE_SEND_ONCE | IPC_OBJECT_COPYIN_FLAGS_ALLOW_REPLY_MOVE_SEND_ONCE;
1747 	ipc_object_copyin_flags_t reply_flags = IPC_OBJECT_COPYIN_FLAGS_ALLOW_REPLY_MAKE_SEND_ONCE;
1748 	int reply_port_semantics_violation = 0;
1749 
1750 	int assertcnt = 0;
1751 	mach_msg_option64_t options = *option64p;
1752 #if IMPORTANCE_INHERITANCE
1753 	boolean_t needboost = FALSE;
1754 #endif /* IMPORTANCE_INHERITANCE */
1755 
1756 	if ((mbits != msg->msgh_bits) ||
1757 	    (!MACH_MSG_TYPE_PORT_ANY_SEND(dest_type)) ||
1758 	    ((reply_type == 0) ?
1759 	    (reply_name != MACH_PORT_NULL) :
1760 	    !MACH_MSG_TYPE_PORT_ANY_SEND(reply_type))) {
1761 		return MACH_SEND_INVALID_HEADER;
1762 	}
1763 
1764 	if (!MACH_PORT_VALID(dest_name)) {
1765 		return MACH_SEND_INVALID_DEST;
1766 	}
1767 
1768 	is_write_lock(space);
1769 	if (!is_active(space)) {
1770 		is_write_unlock(space);
1771 		return MACH_SEND_INVALID_DEST;
1772 	}
1773 	/* space locked and active */
1774 
1775 	/*
1776 	 *	If there is a voucher specified, make sure the disposition is
1777 	 *	valid and the entry actually refers to a voucher port.  Don't
1778 	 *	actually copy in until we validate destination and reply.
1779 	 */
1780 	if (voucher_type != MACH_MSGH_BITS_ZERO) {
1781 		voucher_name = msg->msgh_voucher_port;
1782 
1783 		if (voucher_name == MACH_PORT_DEAD ||
1784 		    (voucher_type != MACH_MSG_TYPE_MOVE_SEND &&
1785 		    voucher_type != MACH_MSG_TYPE_COPY_SEND)) {
1786 			is_write_unlock(space);
1787 			if ((options & MACH64_SEND_KERNEL) == 0) {
1788 				mach_port_guard_exception(voucher_name, 0, 0, kGUARD_EXC_SEND_INVALID_VOUCHER);
1789 			}
1790 			return MACH_SEND_INVALID_VOUCHER;
1791 		}
1792 
1793 		if (voucher_name != MACH_PORT_NULL) {
1794 			voucher_entry = ipc_entry_lookup(space, voucher_name);
1795 			if (voucher_entry == IE_NULL ||
1796 			    (voucher_entry->ie_bits & MACH_PORT_TYPE_SEND) == 0 ||
1797 			    io_kotype(voucher_entry->ie_object) != IKOT_VOUCHER) {
1798 				is_write_unlock(space);
1799 				if ((options & MACH64_SEND_KERNEL) == 0) {
1800 					mach_port_guard_exception(voucher_name, 0, 0, kGUARD_EXC_SEND_INVALID_VOUCHER);
1801 				}
1802 				return MACH_SEND_INVALID_VOUCHER;
1803 			}
1804 		} else {
1805 			voucher_type = MACH_MSG_TYPE_MOVE_SEND;
1806 		}
1807 	}
1808 
1809 	if (enforce_strict_reply && MACH_SEND_WITH_STRICT_REPLY(options) &&
1810 	    (!MACH_PORT_VALID(reply_name) ||
1811 	    ((reply_type != MACH_MSG_TYPE_MAKE_SEND_ONCE) && (reply_type != MACH_MSG_TYPE_MOVE_SEND_ONCE))
1812 	    )) {
1813 		/*
1814 		 * The caller cannot enforce a reply context with an invalid
1815 		 * reply port name, or a non-send_once reply disposition.
1816 		 */
1817 		is_write_unlock(space);
1818 		if ((options & MACH_SEND_KERNEL) == 0) {
1819 			mach_port_guard_exception(reply_name, 0,
1820 			    (MPG_FLAGS_STRICT_REPLY_INVALID_REPLY_DISP | reply_type),
1821 			    kGUARD_EXC_STRICT_REPLY);
1822 		}
1823 		return MACH_SEND_INVALID_REPLY;
1824 	}
1825 
1826 	/*
1827 	 *	Handle combinations of validating destination and reply; along
1828 	 *	with copying in destination, reply, and voucher in an atomic way.
1829 	 */
1830 
1831 	if (dest_name == voucher_name) {
1832 		/*
1833 		 *	If the destination name is the same as the voucher name,
1834 		 *	the voucher_entry must already be known.  Either that or
1835 		 *	the destination name is MACH_PORT_NULL (i.e. invalid).
1836 		 */
1837 		dest_entry = voucher_entry;
1838 		if (dest_entry == IE_NULL) {
1839 			goto invalid_dest;
1840 		}
1841 
1842 		/*
1843 		 *	Make sure a future copyin of the reply port will succeed.
1844 		 *	Once we start copying in the dest/voucher pair, we can't
1845 		 *	back out.
1846 		 */
1847 		if (MACH_PORT_VALID(reply_name)) {
1848 			assert(reply_type != 0); /* because reply_name not null */
1849 
1850 			/* It is just WRONG if dest, voucher, and reply are all the same. */
1851 			if (voucher_name == reply_name) {
1852 				goto invalid_reply;
1853 			}
1854 			reply_entry = ipc_entry_lookup(space, reply_name);
1855 			if (reply_entry == IE_NULL) {
1856 				goto invalid_reply;
1857 			}
1858 			assert(dest_entry != reply_entry); /* names are not equal */
1859 			if (!ipc_right_copyin_check_reply(space, reply_name, reply_entry, reply_type, dest_entry, &reply_port_semantics_violation)) {
1860 				goto invalid_reply;
1861 			}
1862 		}
1863 
1864 		/*
1865 		 *	Do the joint copyin of the dest disposition and
1866 		 *	voucher disposition from the one entry/port.  We
1867 		 *	already validated that the voucher copyin would
1868 		 *	succeed (above).  So, any failure in combining
1869 		 *	the copyins can be blamed on the destination.
1870 		 */
1871 		kr = ipc_right_copyin_two(space, dest_name, dest_entry,
1872 		    dest_type, voucher_type, IPC_OBJECT_COPYIN_FLAGS_NONE, IPC_OBJECT_COPYIN_FLAGS_NONE,
1873 		    &dest_port, &dest_soright, &release_port);
1874 		if (kr != KERN_SUCCESS) {
1875 			assert(kr != KERN_INVALID_CAPABILITY);
1876 			goto invalid_dest;
1877 		}
1878 		voucher_port = ip_object_to_port(dest_port);
1879 
1880 		/*
1881 		 * could not have been one of these dispositions,
1882 		 * validated the port was a true kernel voucher port above,
1883 		 * AND was successfully able to copyin both dest and voucher.
1884 		 */
1885 		assert(dest_type != MACH_MSG_TYPE_MAKE_SEND);
1886 		assert(dest_type != MACH_MSG_TYPE_MAKE_SEND_ONCE);
1887 		assert(dest_type != MACH_MSG_TYPE_MOVE_SEND_ONCE);
1888 
1889 		/*
1890 		 *	Perform the delayed reply right copyin (guaranteed success).
1891 		 */
1892 		if (reply_entry != IE_NULL) {
1893 			kr = ipc_right_copyin(space, reply_name, reply_entry,
1894 			    reply_type, IPC_OBJECT_COPYIN_FLAGS_DEADOK | reply_flags,
1895 			    &reply_port, &reply_soright,
1896 			    &release_port, &assertcnt, 0, NULL);
1897 			assert(assertcnt == 0);
1898 			assert(kr == KERN_SUCCESS);
1899 		}
1900 	} else {
1901 		if (dest_name == reply_name) {
1902 			/*
1903 			 *	Destination and reply ports are the same!
1904 			 *	This is very similar to the case where the
1905 			 *	destination and voucher ports were the same
1906 			 *	(except the reply port disposition is not
1907 			 *	previously validated).
1908 			 */
1909 			dest_entry = ipc_entry_lookup(space, dest_name);
1910 			if (dest_entry == IE_NULL) {
1911 				goto invalid_dest;
1912 			}
1913 
1914 			reply_entry = dest_entry;
1915 			assert(reply_type != 0); /* because name not null */
1916 
1917 			/*
1918 			 *	Pre-validate that the reply right can be copied in by itself.
1919 			 *  Fail if reply port is marked as immovable send.
1920 			 */
1921 			if (!ipc_right_copyin_check_reply(space, reply_name, reply_entry, reply_type, dest_entry, &reply_port_semantics_violation)) {
1922 				goto invalid_reply;
1923 			}
1924 
1925 			/*
1926 			 *	Do the joint copyin of the dest disposition and
1927 			 *	reply disposition from the one entry/port.
1928 			 */
1929 			kr = ipc_right_copyin_two(space, dest_name, dest_entry, dest_type, reply_type,
1930 			    dest_flags, reply_flags, &dest_port, &dest_soright, &release_port);
1931 			if (kr == KERN_INVALID_CAPABILITY) {
1932 				goto invalid_reply;
1933 			} else if (kr != KERN_SUCCESS) {
1934 				goto invalid_dest;
1935 			}
1936 			reply_port = dest_port;
1937 		} else {
1938 			/*
1939 			 *	Handle destination and reply independently, as
1940 			 *	they are independent entries (even if the entries
1941 			 *	refer to the same port).
1942 			 *
1943 			 *	This can be the tough case to make atomic.
1944 			 *
1945 			 *	The difficult problem is serializing with port death.
1946 			 *	The bad case is when dest_port dies after its copyin,
1947 			 *	reply_port dies before its copyin, and dest_port dies before
1948 			 *	reply_port.  Then the copyins operated as if dest_port was
1949 			 *	alive and reply_port was dead, which shouldn't have happened
1950 			 *	because they died in the other order.
1951 			 *
1952 			 *	Note that it is easy for a user task to tell if
1953 			 *	a copyin happened before or after a port died.
1954 			 *	If a port dies before copyin, a dead-name notification
1955 			 *	is generated and the dead name's urefs are incremented,
1956 			 *	and if the copyin happens first, a port-deleted
1957 			 *	notification is generated.
1958 			 *
1959 			 *	Even so, avoiding that potentially detectable race is too
1960 			 *	expensive - and no known code cares about it.  So, we just
1961 			 *	do the expedient thing and copy them in one after the other.
1962 			 */
1963 
1964 			dest_entry = ipc_entry_lookup(space, dest_name);
1965 			if (dest_entry == IE_NULL) {
1966 				goto invalid_dest;
1967 			}
1968 			assert(dest_entry != voucher_entry);
1969 
1970 			/*
1971 			 *	Make sure reply port entry is valid before dest copyin.
1972 			 */
1973 			if (MACH_PORT_VALID(reply_name)) {
1974 				if (reply_name == voucher_name) {
1975 					goto invalid_reply;
1976 				}
1977 				reply_entry = ipc_entry_lookup(space, reply_name);
1978 				if (reply_entry == IE_NULL) {
1979 					goto invalid_reply;
1980 				}
1981 				assert(dest_entry != reply_entry); /* names are not equal */
1982 				assert(reply_type != 0); /* because reply_name not null */
1983 
1984 				if (!ipc_right_copyin_check_reply(space, reply_name, reply_entry, reply_type, dest_entry, &reply_port_semantics_violation)) {
1985 					goto invalid_reply;
1986 				}
1987 			}
1988 
1989 			/*
1990 			 *	copyin the destination.
1991 			 */
1992 			kr = ipc_right_copyin(space, dest_name, dest_entry, dest_type,
1993 			    (IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND | IPC_OBJECT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE | dest_flags),
1994 			    &dest_port, &dest_soright,
1995 			    &release_port, &assertcnt, 0, NULL);
1996 			assert(assertcnt == 0);
1997 			if (kr != KERN_SUCCESS) {
1998 				goto invalid_dest;
1999 			}
2000 			assert(IO_VALID(dest_port));
2001 			assert(!IP_VALID(release_port));
2002 
2003 			/*
2004 			 *	Copyin the pre-validated reply right.
2005 			 *	It's OK if the reply right has gone dead in the meantime.
2006 			 */
2007 			if (MACH_PORT_VALID(reply_name)) {
2008 				kr = ipc_right_copyin(space, reply_name, reply_entry,
2009 				    reply_type, IPC_OBJECT_COPYIN_FLAGS_DEADOK | reply_flags,
2010 				    &reply_port, &reply_soright,
2011 				    &release_port, &assertcnt, 0, NULL);
2012 				assert(assertcnt == 0);
2013 				assert(kr == KERN_SUCCESS);
2014 			} else {
2015 				/* convert invalid name to equivalent ipc_object type */
2016 				reply_port = ip_to_object(CAST_MACH_NAME_TO_PORT(reply_name));
2017 			}
2018 		}
2019 
2020 		/*
2021 		 * Finally can copyin the voucher right now that dest and reply
2022 		 * are fully copied in (guaranteed success).
2023 		 */
2024 		if (IE_NULL != voucher_entry) {
2025 			kr = ipc_right_copyin(space, voucher_name, voucher_entry,
2026 			    voucher_type, IPC_OBJECT_COPYIN_FLAGS_NONE,
2027 			    (ipc_object_t *)&voucher_port,
2028 			    &voucher_soright,
2029 			    &voucher_release_port,
2030 			    &assertcnt, 0, NULL);
2031 			assert(assertcnt == 0);
2032 			assert(KERN_SUCCESS == kr);
2033 			assert(IP_VALID(voucher_port));
2034 			require_ip_active(voucher_port);
2035 		}
2036 	}
2037 
2038 	dest_type = ipc_object_copyin_type(dest_type);
2039 	reply_type = ipc_object_copyin_type(reply_type);
2040 
2041 	dport = ip_object_to_port(dest_port);
2042 	/*
2043 	 *	If the dest port died, or is a kobject AND its receive right belongs to kernel,
2044 	 *  allow copyin of immovable send rights in the message body (port descriptor) to
2045 	 *  succeed since those send rights are simply "moved" or "copied" into kernel.
2046 	 *
2047 	 *  See: ipc_object_copyin().
2048 	 */
2049 
2050 	ip_mq_lock(dport);
2051 
2052 #if CONFIG_SERVICE_PORT_INFO
2053 	/*
2054 	 * Service name is later used in CA telemetry in case of reply port security semantics violations.
2055 	 */
2056 	mach_service_port_info_t sp_info = NULL;
2057 	struct mach_service_port_info sp_info_filled = {};
2058 	if (ip_active(dport) && (dport->ip_service_port) && (dport->ip_splabel)) {
2059 		ipc_service_port_label_get_info((ipc_service_port_label_t)dport->ip_splabel, &sp_info_filled);
2060 		sp_info = &sp_info_filled;
2061 	}
2062 #endif /* CONFIG_SERVICE_PORT_INFO */
2063 
2064 	if (!ip_active(dport) || (ip_is_kobject(dport) &&
2065 	    ip_in_space(dport, ipc_space_kernel))) {
2066 		assert(ip_kotype(dport) != IKOT_TIMER);
2067 		kmsg->ikm_flags |= IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND;
2068 	}
2069 
2070 	/*
2071 	 * JMM - Without rdar://problem/6275821, this is the last place we can
2072 	 * re-arm the send-possible notifications.  It may trigger unexpectedly
2073 	 * early (send may NOT have failed), but better than missing.  We assure
2074 	 * we won't miss by forcing MACH_SEND_ALWAYS if we got past arming.
2075 	 */
2076 	if (((options & MACH_SEND_NOTIFY) != 0) &&
2077 	    dest_type != MACH_MSG_TYPE_PORT_SEND_ONCE &&
2078 	    dest_entry != IE_NULL && dest_entry->ie_request != IE_REQ_NONE) {
2079 		/* dport still locked from above */
2080 		if (ip_active(dport) && !ip_in_space(dport, ipc_space_kernel)) {
2081 			/* dport could be in-transit, or in an ipc space */
2082 			if (ip_full(dport)) {
2083 #if IMPORTANCE_INHERITANCE
2084 				needboost = ipc_port_request_sparm(dport, dest_name,
2085 				    dest_entry->ie_request,
2086 				    options,
2087 				    priority);
2088 				if (needboost == FALSE) {
2089 					ip_mq_unlock(dport);
2090 				}
2091 #else
2092 				ipc_port_request_sparm(dport, dest_name,
2093 				    dest_entry->ie_request,
2094 				    options,
2095 				    priority);
2096 				ip_mq_unlock(dport);
2097 #endif /* IMPORTANCE_INHERITANCE */
2098 			} else {
2099 				*option64p |= MACH64_SEND_ALWAYS;
2100 				options = *option64p;
2101 				ip_mq_unlock(dport);
2102 			}
2103 		} else {
2104 			ip_mq_unlock(dport);
2105 		}
2106 	} else {
2107 		ip_mq_unlock(dport);
2108 	}
2109 	/* dport is unlocked, unless needboost == TRUE */
2110 
2111 	is_write_unlock(space);
2112 
2113 #if IMPORTANCE_INHERITANCE
2114 	/*
2115 	 * If our request is the first boosting send-possible
2116 	 * notification this cycle, push the boost down the
2117 	 * destination port.
2118 	 */
2119 	if (needboost == TRUE) {
2120 		/* dport still locked from above */
2121 		if (ipc_port_importance_delta(dport, IPID_OPTION_SENDPOSSIBLE, 1) == FALSE) {
2122 			ip_mq_unlock(dport);
2123 		}
2124 	}
2125 #endif /* IMPORTANCE_INHERITANCE */
2126 
2127 	/* dport is unlocked */
2128 
2129 	if (dest_soright != IP_NULL) {
2130 		ipc_notify_port_deleted(dest_soright, dest_name);
2131 	}
2132 	if (reply_soright != IP_NULL) {
2133 		ipc_notify_port_deleted(reply_soright, reply_name);
2134 	}
2135 	if (voucher_soright != IP_NULL) {
2136 		ipc_notify_port_deleted(voucher_soright, voucher_name);
2137 	}
2138 
2139 	/*
2140 	 * No room to store voucher port in in-kernel msg header,
2141 	 * so we store it back in the kmsg itself. Store original voucher
2142 	 * type there as well, but set the bits to the post-copyin type.
2143 	 */
2144 	if (IP_VALID(voucher_port)) {
2145 		ipc_kmsg_set_voucher_port(kmsg, voucher_port, voucher_type);
2146 		voucher_type = MACH_MSG_TYPE_MOVE_SEND;
2147 	}
2148 
2149 	msg->msgh_bits = MACH_MSGH_BITS_SET(dest_type, reply_type, voucher_type, mbits);
2150 	msg->msgh_remote_port = ip_object_to_port(dest_port);
2151 	msg->msgh_local_port = ip_object_to_port(reply_port);
2152 
2153 	/*
2154 	 * capture the qos value(s) for the kmsg qos,
2155 	 * and apply any override before we enqueue the kmsg.
2156 	 */
2157 	ipc_kmsg_set_qos(kmsg, options, priority);
2158 
2159 	/* then sign the header and trailer as soon as possible */
2160 	ipc_kmsg_init_trailer_and_sign(kmsg, current_task());
2161 
2162 	if (release_port != IP_NULL) {
2163 		ip_release(release_port);
2164 	}
2165 
2166 	if (voucher_release_port != IP_NULL) {
2167 		ip_release(voucher_release_port);
2168 	}
2169 
2170 	if (enforce_strict_reply && MACH_SEND_WITH_STRICT_REPLY(options) &&
2171 	    IP_VALID(msg->msgh_local_port)) {
2172 		/*
2173 		 * We've already validated that the reply disposition is a
2174 		 * [make/move] send-once. Ideally, we should enforce that the
2175 		 * reply port is also not dead, but XPC asynchronous
2176 		 * cancellation can make the reply port dead before we
2177 		 * actually make it to the mach_msg send.
2178 		 *
2179 		 * Here, we ensure that if we have a non-dead reply port, then
2180 		 * the reply port's receive right should not be in-transit,
2181 		 * and should live in the caller's IPC space.
2182 		 */
2183 		ipc_port_t rport = msg->msgh_local_port;
2184 		ip_mq_lock(rport);
2185 		kr = ipc_kmsg_validate_reply_port_locked(rport, options);
2186 		ip_mq_unlock(rport);
2187 		if (kr != KERN_SUCCESS) {
2188 			/*
2189 			 * no descriptors have been copied in yet, but the
2190 			 * full header has been copied in: clean it up
2191 			 */
2192 			ipc_kmsg_clean_header(kmsg);
2193 			if ((options & MACH_SEND_KERNEL) == 0) {
2194 				mach_port_guard_exception(reply_name, 0,
2195 				    (MPG_FLAGS_STRICT_REPLY_INVALID_REPLY_PORT | kr),
2196 				    kGUARD_EXC_STRICT_REPLY);
2197 			}
2198 			return MACH_SEND_INVALID_REPLY;
2199 		}
2200 	}
2201 
2202 	if (moved_provisional_reply_ports()) {
2203 		send_prp_telemetry(msg->msgh_id);
2204 	}
2205 
2206 	if (reply_port_semantics_violation) {
2207 		/* Currently rate limiting it to sucess paths only. */
2208 		task_t task = current_task_early();
2209 		if (task && reply_port_semantics_violation == REPLY_PORT_SEMANTICS_VIOLATOR) {
2210 			task_lock(task);
2211 			if (!task_has_reply_port_telemetry(task)) {
2212 				/* Crash report rate limited to once per task per host. */
2213 				mach_port_guard_exception(reply_name, 0, 0, kGUARD_EXC_REQUIRE_REPLY_PORT_SEMANTICS);
2214 				task_set_reply_port_telemetry(task);
2215 			}
2216 			task_unlock(task);
2217 		}
2218 #if CONFIG_SERVICE_PORT_INFO
2219 		stash_reply_port_semantics_violations_telemetry(sp_info, reply_port_semantics_violation, msg->msgh_id);
2220 #else
2221 		stash_reply_port_semantics_violations_telemetry(NULL, reply_port_semantics_violation, msg->msgh_id);
2222 #endif
2223 	}
2224 	return MACH_MSG_SUCCESS;
2225 
2226 invalid_reply:
2227 	is_write_unlock(space);
2228 
2229 	if (release_port != IP_NULL) {
2230 		ip_release(release_port);
2231 	}
2232 
2233 	assert(voucher_port == IP_NULL);
2234 	assert(voucher_soright == IP_NULL);
2235 
2236 	if ((options & MACH_SEND_KERNEL) == 0) {
2237 		mach_port_guard_exception(reply_name, 0, 0, kGUARD_EXC_SEND_INVALID_REPLY);
2238 	}
2239 	return MACH_SEND_INVALID_REPLY;
2240 
2241 invalid_dest:
2242 	is_write_unlock(space);
2243 
2244 	if (release_port != IP_NULL) {
2245 		ip_release(release_port);
2246 	}
2247 
2248 	if (reply_soright != IP_NULL) {
2249 		ipc_notify_port_deleted(reply_soright, reply_name);
2250 	}
2251 
2252 	assert(voucher_port == IP_NULL);
2253 	assert(voucher_soright == IP_NULL);
2254 
2255 	return MACH_SEND_INVALID_DEST;
2256 }
2257 
2258 
2259 static mach_msg_return_t
ipc_kmsg_inflate_port_descriptor(char * kdesc_addr,const char * udesc_addr,mach_msg_send_uctx_t * send_uctx)2260 ipc_kmsg_inflate_port_descriptor(
2261 	char                   *kdesc_addr,
2262 	const char             *udesc_addr,
2263 	mach_msg_send_uctx_t   *send_uctx)
2264 {
2265 	mach_msg_user_port_descriptor_t udesc;
2266 	mach_msg_port_descriptor_t *kdesc;
2267 
2268 	ikm_udsc_get(&udesc, udesc_addr);
2269 	if (os_add_overflow(send_uctx->send_dsc_port_count, 1,
2270 	    &send_uctx->send_dsc_port_count)) {
2271 		return MACH_SEND_TOO_LARGE;
2272 	}
2273 
2274 	kdesc = ikm_kdsc_zero(kdesc_addr, mach_msg_port_descriptor_t);
2275 	kdesc->u_name      = CAST_MACH_NAME_TO_PORT(udesc.name);
2276 	kdesc->disposition = udesc.disposition;
2277 	kdesc->type        = udesc.type;
2278 	return MACH_MSG_SUCCESS;
2279 }
2280 
2281 static mach_msg_return_t
ipc_kmsg_copyin_port_descriptor(mach_msg_port_descriptor_t * dsc,ipc_space_t space,ipc_port_t dest_port,ipc_kmsg_t kmsg,mach_msg_option64_t options)2282 ipc_kmsg_copyin_port_descriptor(
2283 	mach_msg_port_descriptor_t *dsc,
2284 	ipc_space_t             space,
2285 	ipc_port_t              dest_port,
2286 	ipc_kmsg_t              kmsg,
2287 	mach_msg_option64_t     options)
2288 {
2289 	mach_msg_type_name_t user_disp = dsc->disposition;
2290 	mach_port_name_t     name = CAST_MACH_PORT_TO_NAME(dsc->u_name);
2291 	mach_msg_type_name_t result_disp;
2292 	ipc_object_t         object;
2293 	kern_return_t        kr;
2294 
2295 	result_disp = ipc_object_copyin_type(user_disp);
2296 	if (MACH_PORT_VALID(name)) {
2297 		kr = ipc_object_copyin(space, name, user_disp, &object,
2298 		    0, NULL, kmsg->ikm_flags);
2299 		if (kr != KERN_SUCCESS) {
2300 			if (((options & MACH_SEND_KERNEL) == 0) && (kr == KERN_INVALID_RIGHT)) {
2301 				mach_port_guard_exception(name, 0, 0, kGUARD_EXC_SEND_INVALID_RIGHT);
2302 			}
2303 			return MACH_SEND_INVALID_RIGHT;
2304 		}
2305 
2306 		if (result_disp == MACH_MSG_TYPE_PORT_RECEIVE &&
2307 		    ipc_port_check_circularity(ip_object_to_port(object),
2308 		    dest_port)) {
2309 			ikm_header(kmsg)->msgh_bits |= MACH_MSGH_BITS_CIRCULAR;
2310 		}
2311 		dsc->name = ip_object_to_port(object);
2312 	} else {
2313 		dsc->name = CAST_MACH_NAME_TO_PORT(name);
2314 	}
2315 
2316 	dsc->disposition = result_disp;
2317 	return MACH_MSG_SUCCESS;
2318 }
2319 
2320 
2321 static mach_msg_return_t
ipc_kmsg_inflate_ool_descriptor(char * kdesc_addr,const char * udesc_addr,mach_msg_send_uctx_t * send_uctx,bool isU64)2322 ipc_kmsg_inflate_ool_descriptor(
2323 	char                   *kdesc_addr,
2324 	const char             *udesc_addr,
2325 	mach_msg_send_uctx_t   *send_uctx,
2326 	bool                    isU64)
2327 {
2328 	mach_msg_ool_descriptor64_t udesc;
2329 	mach_msg_ool_descriptor_t *kdesc;
2330 
2331 	if (isU64) {
2332 		ikm_udsc_get(&udesc, udesc_addr);
2333 	} else {
2334 		mach_msg_ool_descriptor32_t udesc32;
2335 
2336 		ikm_udsc_get(&udesc32, udesc_addr);
2337 		udesc = (mach_msg_ool_descriptor64_t){
2338 			.address     = udesc32.address,
2339 			.size        = udesc32.size,
2340 			.deallocate  = udesc32.deallocate,
2341 			.copy        = udesc32.copy,
2342 			.type        = udesc32.type,
2343 		};
2344 	}
2345 
2346 	switch (udesc.copy) {
2347 	case MACH_MSG_PHYSICAL_COPY:
2348 	case MACH_MSG_VIRTUAL_COPY:
2349 		break;
2350 	default:
2351 		return MACH_SEND_INVALID_TYPE;
2352 	}
2353 
2354 	if (udesc.size > msg_ool_size_small &&
2355 	    udesc.copy == MACH_MSG_PHYSICAL_COPY &&
2356 	    !udesc.deallocate) {
2357 		vm_size_t size;
2358 
2359 		if (round_page_overflow(udesc.size, &size) ||
2360 		    os_add_overflow(send_uctx->send_dsc_vm_size, size,
2361 		    &send_uctx->send_dsc_vm_size)) {
2362 			return MACH_MSG_VM_KERNEL;
2363 		}
2364 	}
2365 
2366 	kdesc = ikm_kdsc_zero(kdesc_addr, mach_msg_ool_descriptor_t);
2367 	kdesc->u_address  = udesc.address;
2368 	kdesc->size       = udesc.size;
2369 	kdesc->deallocate = udesc.deallocate;
2370 	kdesc->copy       = udesc.copy;
2371 	kdesc->type       = udesc.type;
2372 	return MACH_MSG_SUCCESS;
2373 }
2374 
2375 static mach_msg_return_t
ipc_kmsg_copyin_ool_descriptor(mach_msg_ool_descriptor_t * dsc,mach_vm_address_t * paddr,vm_size_t * space_needed,vm_map_t map)2376 ipc_kmsg_copyin_ool_descriptor(
2377 	mach_msg_ool_descriptor_t *dsc,
2378 	mach_vm_address_t      *paddr,
2379 	vm_size_t              *space_needed,
2380 	vm_map_t                map)
2381 {
2382 	mach_vm_size_t length = dsc->size;
2383 	vm_map_copy_t  copy = VM_MAP_COPY_NULL;
2384 
2385 	if (length == 0) {
2386 		/* nothing to do */
2387 	} else if (length > msg_ool_size_small &&
2388 	    (dsc->copy == MACH_MSG_PHYSICAL_COPY) && !dsc->deallocate) {
2389 		mach_vm_size_t    length_aligned = round_page(length);
2390 		mach_vm_address_t addr = *paddr;
2391 
2392 		/*
2393 		 * If the request is a physical copy and the source
2394 		 * is not being deallocated, then allocate space
2395 		 * in the kernel's pageable ipc copy map and copy
2396 		 * the data in.  The semantics guarantee that the
2397 		 * data will have been physically copied before
2398 		 * the send operation terminates.  Thus if the data
2399 		 * is not being deallocated, we must be prepared
2400 		 * to page if the region is sufficiently large.
2401 		 */
2402 		if (mach_copyin(dsc->u_address, (char *)addr, length)) {
2403 			return MACH_SEND_INVALID_MEMORY;
2404 		}
2405 
2406 		/*
2407 		 * The kernel ipc copy map is marked no_zero_fill.
2408 		 * If the transfer is not a page multiple, we need
2409 		 * to zero fill the balance.
2410 		 */
2411 		if (!page_aligned(length)) {
2412 			bzero((char *)addr + length, length_aligned - length);
2413 		}
2414 
2415 		if (vm_map_copyin(ipc_kernel_copy_map, addr, length,
2416 		    true, &copy) != KERN_SUCCESS) {
2417 			return MACH_MSG_VM_KERNEL;
2418 		}
2419 
2420 		*paddr        += length_aligned;
2421 		*space_needed -= length_aligned;
2422 	} else {
2423 		/*
2424 		 * Make a vm_map_copy_t of the of the data.  If the
2425 		 * data is small, this will do an optimized physical
2426 		 * copy.  Otherwise, it will do a virtual copy.
2427 		 *
2428 		 * NOTE: A virtual copy is OK if the original is being
2429 		 * deallocted, even if a physical copy was requested.
2430 		 */
2431 		switch (vm_map_copyin(map, dsc->u_address, length,
2432 		    dsc->deallocate, &copy)) {
2433 		case KERN_SUCCESS:
2434 			break;
2435 		case KERN_RESOURCE_SHORTAGE:
2436 			return MACH_MSG_VM_KERNEL;
2437 		default:
2438 			return MACH_SEND_INVALID_MEMORY;
2439 		}
2440 	}
2441 
2442 	dsc->address = copy;
2443 	return MACH_MSG_SUCCESS;
2444 }
2445 
2446 
2447 static mach_msg_return_t
ipc_kmsg_inflate_ool_ports_descriptor(char * kdesc_addr,const char * udesc_addr,mach_msg_send_uctx_t * send_uctx,bool isU64)2448 ipc_kmsg_inflate_ool_ports_descriptor(
2449 	char                   *kdesc_addr,
2450 	const char             *udesc_addr,
2451 	mach_msg_send_uctx_t   *send_uctx,
2452 	bool                    isU64)
2453 {
2454 	mach_msg_ool_ports_descriptor64_t udesc;
2455 	mach_msg_ool_ports_descriptor_t *kdesc;
2456 
2457 	if (isU64) {
2458 		ikm_udsc_get(&udesc, udesc_addr);
2459 	} else {
2460 		mach_msg_ool_ports_descriptor32_t udesc32;
2461 
2462 		ikm_udsc_get(&udesc32, udesc_addr);
2463 		udesc = (mach_msg_ool_ports_descriptor64_t){
2464 			.address     = udesc32.address,
2465 			.deallocate  = udesc32.deallocate,
2466 			.copy        = udesc32.copy,
2467 			.disposition = udesc32.disposition,
2468 			.type        = udesc32.type,
2469 			.count       = udesc32.count,
2470 		};
2471 	}
2472 
2473 	if (os_add_overflow(send_uctx->send_dsc_port_count, udesc.count,
2474 	    &send_uctx->send_dsc_port_count)) {
2475 		return MACH_SEND_TOO_LARGE;
2476 	}
2477 
2478 	kdesc = ikm_kdsc_zero(kdesc_addr, mach_msg_ool_ports_descriptor_t);
2479 	kdesc->u_address   = udesc.address;
2480 	kdesc->deallocate  = udesc.deallocate;
2481 	kdesc->copy        = udesc.copy;
2482 	kdesc->disposition = udesc.disposition;
2483 	kdesc->type        = udesc.type;
2484 	kdesc->count       = udesc.count;
2485 	return MACH_MSG_SUCCESS;
2486 }
2487 
2488 static mach_msg_return_t
ipc_kmsg_copyin_ool_ports_descriptor(mach_msg_ool_ports_descriptor_t * dsc,vm_map_t map,ipc_space_t space,ipc_port_t dest_port,ipc_kmsg_t kmsg,mach_msg_option64_t options)2489 ipc_kmsg_copyin_ool_ports_descriptor(
2490 	mach_msg_ool_ports_descriptor_t *dsc,
2491 	vm_map_t                map,
2492 	ipc_space_t             space,
2493 	ipc_port_t              dest_port,
2494 	ipc_kmsg_t              kmsg,
2495 	mach_msg_option64_t     options)
2496 {
2497 	mach_msg_type_name_t user_disp = dsc->disposition;
2498 	mach_msg_size_t      count = dsc->count;
2499 	mach_msg_type_name_t result_disp;
2500 	mach_port_array_t    array = NULL;
2501 	mach_port_name_t    *names;
2502 	mach_vm_size_t       names_size;
2503 
2504 	result_disp = ipc_object_copyin_type(user_disp);
2505 	names_size  = count * sizeof(mach_port_name_t);
2506 
2507 	if (count) {
2508 		array = mach_port_array_alloc(count, Z_WAITOK | Z_SPRAYQTN);
2509 
2510 		/* use the end of the array to store names we will copy in */
2511 		names = (mach_port_name_t *)(array + count) - count;
2512 
2513 		if (mach_copyin(dsc->u_address, names, names_size)) {
2514 			mach_port_array_free(array, count);
2515 			return MACH_SEND_INVALID_MEMORY;
2516 		}
2517 	}
2518 
2519 	if (dsc->deallocate) {
2520 		(void)mach_vm_deallocate(map, dsc->u_address, names_size);
2521 	}
2522 
2523 	for (mach_msg_size_t i = 0; i < count; i++) {
2524 		mach_port_name_t name = names[i];
2525 		ipc_object_t     object;
2526 		kern_return_t    kr;
2527 
2528 		if (!MACH_PORT_VALID(name)) {
2529 			array[i].port = CAST_MACH_NAME_TO_PORT(name);
2530 			continue;
2531 		}
2532 
2533 		kr = ipc_object_copyin(space, name, user_disp, &object,
2534 		    0, NULL, kmsg->ikm_flags);
2535 
2536 		if (kr != KERN_SUCCESS) {
2537 			for (mach_msg_size_t j = 0; j < i; j++) {
2538 				object = ip_to_object(array[j].port);
2539 				if (IPC_OBJECT_VALID(object)) {
2540 					ipc_object_destroy(object, result_disp);
2541 				}
2542 			}
2543 			mach_port_array_free(array, count);
2544 
2545 			if (((options & MACH_SEND_KERNEL) == 0) && (kr == KERN_INVALID_RIGHT)) {
2546 				mach_port_guard_exception(name, 0, 0, kGUARD_EXC_SEND_INVALID_RIGHT);
2547 			}
2548 			return MACH_SEND_INVALID_RIGHT;
2549 		}
2550 
2551 		if (result_disp == MACH_MSG_TYPE_PORT_RECEIVE &&
2552 		    ipc_port_check_circularity(ip_object_to_port(object),
2553 		    dest_port)) {
2554 			ikm_header(kmsg)->msgh_bits |= MACH_MSGH_BITS_CIRCULAR;
2555 		}
2556 
2557 		array[i].port = ip_object_to_port(object);
2558 	}
2559 
2560 	dsc->disposition = result_disp;
2561 	dsc->address     = array;
2562 	return MACH_MSG_SUCCESS;
2563 }
2564 
2565 
2566 static mach_msg_return_t
ipc_kmsg_inflate_guarded_port_descriptor(char * kdesc_addr,const char * udesc_addr,mach_msg_send_uctx_t * send_uctx,bool isU64)2567 ipc_kmsg_inflate_guarded_port_descriptor(
2568 	char                   *kdesc_addr,
2569 	const char             *udesc_addr,
2570 	mach_msg_send_uctx_t   *send_uctx,
2571 	bool                    isU64)
2572 {
2573 	mach_msg_guarded_port_descriptor64_t udesc;
2574 	mach_msg_guarded_port_descriptor_t *kdesc;
2575 
2576 	if (isU64) {
2577 		ikm_udsc_get(&udesc, udesc_addr);
2578 	} else {
2579 		mach_msg_guarded_port_descriptor32_t udesc32;
2580 
2581 		ikm_udsc_get(&udesc32, udesc_addr);
2582 		udesc = (mach_msg_guarded_port_descriptor64_t){
2583 			.context     = udesc32.context,
2584 			.flags       = udesc32.flags,
2585 			.disposition = udesc32.disposition,
2586 			.type        = udesc32.type,
2587 			.name        = udesc32.name,
2588 		};
2589 	}
2590 
2591 	if (os_add_overflow(send_uctx->send_dsc_port_count, 1,
2592 	    &send_uctx->send_dsc_port_count)) {
2593 		return MACH_SEND_TOO_LARGE;
2594 	}
2595 
2596 	/* Only MACH_MSG_TYPE_MOVE_RECEIVE is supported for now */
2597 	if (udesc.disposition != MACH_MSG_TYPE_MOVE_RECEIVE) {
2598 		return MACH_SEND_INVALID_TYPE;
2599 	}
2600 
2601 	if (!udesc.flags ||
2602 	    ((udesc.flags & ~MACH_MSG_GUARD_FLAGS_MASK) != 0) ||
2603 	    ((udesc.flags & MACH_MSG_GUARD_FLAGS_UNGUARDED_ON_SEND) && (udesc.context != 0))) {
2604 		return MACH_SEND_INVALID_TYPE;
2605 	}
2606 
2607 	kdesc = ikm_kdsc_zero(kdesc_addr, mach_msg_guarded_port_descriptor_t);
2608 	kdesc->u_context   = udesc.context;
2609 	kdesc->flags       = udesc.flags;
2610 	kdesc->disposition = udesc.disposition;
2611 	kdesc->type        = udesc.type;
2612 	kdesc->u_name      = udesc.name;
2613 	return MACH_MSG_SUCCESS;
2614 }
2615 
2616 static mach_msg_return_t
ipc_kmsg_copyin_guarded_port_descriptor(mach_msg_guarded_port_descriptor_t * dsc,ipc_space_t space,ipc_port_t dest_port,ipc_kmsg_t kmsg,mach_msg_option64_t options)2617 ipc_kmsg_copyin_guarded_port_descriptor(
2618 	mach_msg_guarded_port_descriptor_t *dsc,
2619 	ipc_space_t             space,
2620 	ipc_port_t              dest_port,
2621 	ipc_kmsg_t              kmsg,
2622 	mach_msg_option64_t     options)
2623 {
2624 	mach_msg_type_name_t   user_disp = dsc->disposition;
2625 	mach_msg_guard_flags_t guard_flags = dsc->flags;
2626 	mach_port_name_t       name = dsc->u_name;
2627 	mach_msg_type_name_t   result_disp;
2628 	ipc_object_t           object;
2629 	kern_return_t          kr;
2630 
2631 	result_disp = ipc_object_copyin_type(user_disp);
2632 	if (MACH_PORT_VALID(name)) {
2633 		kr = ipc_object_copyin(space, name, user_disp, &object,
2634 		    dsc->u_context, &guard_flags, kmsg->ikm_flags);
2635 		if (kr != KERN_SUCCESS) {
2636 			if (((options & MACH_SEND_KERNEL) == 0) && (kr == KERN_INVALID_RIGHT)) {
2637 				mach_port_guard_exception(name, 0, 0, kGUARD_EXC_SEND_INVALID_RIGHT);
2638 			}
2639 			return MACH_SEND_INVALID_RIGHT;
2640 		}
2641 
2642 		if (result_disp == MACH_MSG_TYPE_PORT_RECEIVE &&
2643 		    ipc_port_check_circularity(ip_object_to_port(object),
2644 		    dest_port)) {
2645 			ikm_header(kmsg)->msgh_bits |= MACH_MSGH_BITS_CIRCULAR;
2646 		}
2647 		dsc->name = ip_object_to_port(object);
2648 	} else {
2649 		dsc->name = CAST_MACH_NAME_TO_PORT(name);
2650 	}
2651 
2652 	dsc->flags       = guard_flags;
2653 	dsc->disposition = result_disp;
2654 	dsc->u_name      = 0;
2655 	return MACH_MSG_SUCCESS;
2656 }
2657 
2658 
2659 static mach_msg_return_t
ipc_kmsg_inflate_descriptor(char * kdesc,const char * udesc,mach_msg_send_uctx_t * send_uctx,bool isU64)2660 ipc_kmsg_inflate_descriptor(
2661 	char                   *kdesc,
2662 	const char             *udesc,
2663 	mach_msg_send_uctx_t   *send_uctx,
2664 	bool                    isU64)
2665 {
2666 	switch (ikm_udsc_type(udesc)) {
2667 	case MACH_MSG_PORT_DESCRIPTOR:
2668 		return ipc_kmsg_inflate_port_descriptor(kdesc, udesc, send_uctx);
2669 	case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
2670 	case MACH_MSG_OOL_DESCRIPTOR:
2671 		return ipc_kmsg_inflate_ool_descriptor(kdesc, udesc, send_uctx, isU64);
2672 	case MACH_MSG_OOL_PORTS_DESCRIPTOR:
2673 		return ipc_kmsg_inflate_ool_ports_descriptor(kdesc, udesc, send_uctx, isU64);
2674 	case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
2675 		return ipc_kmsg_inflate_guarded_port_descriptor(kdesc, udesc, send_uctx, isU64);
2676 	default:
2677 		/* verified by ipc_kmsg_measure_descriptors_from_user() */
2678 		__builtin_unreachable();
2679 	}
2680 }
2681 
2682 static mach_msg_return_t
ipc_kmsg_inflate_descriptors(char * const descs,mach_msg_send_uctx_t * send_uctx,bool isU64)2683 ipc_kmsg_inflate_descriptors(
2684 	char             *const descs,
2685 	mach_msg_send_uctx_t   *send_uctx,
2686 	bool                    isU64)
2687 {
2688 	const mach_msg_size_t   desc_count = send_uctx->send_dsc_count;
2689 	const mach_msg_size_t   desc_ksize = desc_count * KERNEL_DESC_SIZE;
2690 	const mach_msg_size_t   desc_usize = send_uctx->send_dsc_usize;
2691 	char                   *kdesc      = descs;
2692 	char                   *udesc      = descs;
2693 	mach_msg_return_t       mr         = MACH_MSG_SUCCESS;
2694 
2695 	if (__probable(desc_count <= 64)) {
2696 		/*
2697 		 * If there are less than 64 descriptors, then we can use
2698 		 * the udesc_mask to know by how much to shift data,
2699 		 * and inflate right to left.
2700 		 */
2701 		kdesc += desc_ksize;
2702 		udesc += desc_usize;
2703 
2704 		for (uint64_t bit = 1ull << (desc_count - 1); bit; bit >>= 1) {
2705 			kdesc -= KERNEL_DESC_SIZE;
2706 			if (send_uctx->send_dsc_mask & bit) {
2707 				udesc -= USER_DESC_SIZE_MAX;
2708 			} else {
2709 				udesc -= USER_DESC_SIZE_MIN;
2710 			}
2711 			mr = ipc_kmsg_inflate_descriptor(kdesc, udesc,
2712 			    send_uctx, isU64);
2713 			if (mr != MACH_MSG_SUCCESS) {
2714 				return mr;
2715 			}
2716 		}
2717 	} else {
2718 		/*
2719 		 * Else, move all descriptors at the end of the buffer,
2720 		 * and inflate them left to right.
2721 		 */
2722 
2723 		udesc += desc_ksize - desc_usize;
2724 		memmove(udesc, kdesc, desc_usize);
2725 
2726 		for (mach_msg_size_t i = 0; i < desc_count; i++) {
2727 			mach_msg_size_t dsize;
2728 
2729 			dsize = ikm_user_desc_size(ikm_udsc_type(udesc), isU64);
2730 			mr = ipc_kmsg_inflate_descriptor(kdesc, udesc,
2731 			    send_uctx, isU64);
2732 			if (mr != MACH_MSG_SUCCESS) {
2733 				return mr;
2734 			}
2735 			udesc += dsize;
2736 			kdesc += KERNEL_DESC_SIZE;
2737 		}
2738 	}
2739 
2740 	return MACH_MSG_SUCCESS;
2741 }
2742 
2743 static inline bool
ipc_kmsg_user_desc_type_is_valid(mach_msg_descriptor_type_t type,mach_msg_option64_t options)2744 ipc_kmsg_user_desc_type_is_valid(
2745 	mach_msg_descriptor_type_t type,
2746 	mach_msg_option64_t        options)
2747 {
2748 	switch (type) {
2749 	case MACH_MSG_PORT_DESCRIPTOR:
2750 	case MACH_MSG_OOL_DESCRIPTOR:
2751 	case MACH_MSG_OOL_PORTS_DESCRIPTOR:
2752 		return true;
2753 	case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
2754 	case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
2755 		/*
2756 		 * only allow port and memory descriptors for kobjects and
2757 		 * driverkit.
2758 		 */
2759 		return !(options & (MACH64_SEND_KOBJECT_CALL | MACH64_SEND_DK_CALL));
2760 	default:
2761 		return false;
2762 	}
2763 }
2764 
2765 /*!
2766  * @brief
2767  * Quickly validate and measure the layout of user descriptors.
2768  *
2769  * @description
2770  * This function fills:
2771  * - the send_dsc_usize field with the size of user descriptors,
2772  * - the send_dsc_mask field representing which of the first 64
2773  *   first descriptors whose size is 12 (bit is 0) or 16 (bit is 1).
2774  *
2775  * @param addr          the address of where user descriptors start.
2776  * @param size          the size of the data to parse (descriptors might
2777  *                      be less, but can't be more).
2778  * @param send_uctx     the context used for this MACH_SEND_MSG operation.
2779  * @param options       the options for this MACH_SEND_MSG operation.
2780  * @param isU64         whether the current user task is 64 bit.
2781  * @returns
2782  * - MACH_MSG_SUCCESS   if parsing was successful.
2783  * - MACH_SEND_MSG_TOO_SMALL
2784  *                      if there wasn't enough data to parse
2785  *                      send_dsc_count descriptors
2786  * - MACH_SEND_INVALID_TYPE
2787  *                      if descriptors types parsed aren't valid
2788  *                      or allowed by policy.
2789  */
2790 __result_use_check
2791 static mach_msg_return_t
ipc_kmsg_measure_descriptors_from_user(vm_address_t addr,mach_msg_size_t size,mach_msg_send_uctx_t * send_uctx,mach_msg_option64_t options,bool isU64)2792 ipc_kmsg_measure_descriptors_from_user(
2793 	vm_address_t            addr,
2794 	mach_msg_size_t         size,
2795 	mach_msg_send_uctx_t   *send_uctx,
2796 	mach_msg_option64_t     options,
2797 	bool                    isU64)
2798 {
2799 	mach_msg_size_t dcnt = send_uctx->send_dsc_count;
2800 	mach_msg_size_t dpos = 0;
2801 	uint64_t        mask = 0;
2802 	uint64_t        bit  = 1;
2803 
2804 	for (mach_msg_size_t i = 0; i < dcnt; i++, bit <<= 1) {
2805 		mach_msg_descriptor_type_t dtype;
2806 		mach_msg_size_t dsize;
2807 
2808 		if (dpos + USER_DESC_SIZE_MIN > size) {
2809 			return MACH_SEND_MSG_TOO_SMALL;
2810 		}
2811 		dtype = ikm_udsc_type(addr + dpos);
2812 		if (!ipc_kmsg_user_desc_type_is_valid(dtype, options)) {
2813 			return MACH_SEND_INVALID_TYPE;
2814 		}
2815 		dsize = ikm_user_desc_size(dtype, isU64);
2816 		if (dsize == USER_DESC_SIZE_MAX) {
2817 			mask |= bit;
2818 		}
2819 		dpos += dsize;
2820 		if (dpos > size) {
2821 			return MACH_SEND_MSG_TOO_SMALL;
2822 		}
2823 	}
2824 
2825 	send_uctx->send_dsc_usize = dpos;
2826 	send_uctx->send_dsc_mask  = mask;
2827 	return MACH_MSG_SUCCESS;
2828 }
2829 
2830 /*
2831  *	Routine:	ipc_kmsg_copyin_body
2832  *	Purpose:
2833  *		"Copy-in" port rights and out-of-line memory
2834  *		in the message body.
2835  *
2836  *		In all failure cases, the message is left holding
2837  *		no rights or memory.  However, the message buffer
2838  *		is not deallocated.  If successful, the message
2839  *		contains a valid destination port.
2840  *	Conditions:
2841  *		Nothing locked.
2842  *	Returns:
2843  *		MACH_MSG_SUCCESS	Successful copyin.
2844  *		MACH_SEND_INVALID_MEMORY	Can't grab out-of-line memory.
2845  *		MACH_SEND_INVALID_RIGHT	Can't copyin port right in body.
2846  *		MACH_SEND_INVALID_TYPE	Bad type specification.
2847  *		MACH_SEND_MSG_TOO_SMALL	Body is too small for types/data.
2848  *		MACH_SEND_INVALID_RT_OOL_SIZE OOL Buffer too large for RT
2849  *		MACH_MSG_INVALID_RT_DESCRIPTOR Dealloc and RT are incompatible
2850  *		MACH_SEND_NO_GRANT_DEST	Dest port doesn't accept ports in body
2851  */
2852 
2853 static mach_msg_return_t
ipc_kmsg_copyin_body(ipc_kmsg_t kmsg,mach_msg_send_uctx_t * send_uctx,ipc_space_t space,vm_map_t map,mach_msg_option64_t options)2854 ipc_kmsg_copyin_body(
2855 	ipc_kmsg_t              kmsg,
2856 	mach_msg_send_uctx_t   *send_uctx,
2857 	ipc_space_t             space,
2858 	vm_map_t                map,
2859 	mach_msg_option64_t     options)
2860 {
2861 	mach_msg_type_number_t  dsc_count = send_uctx->send_dsc_count;
2862 	vm_size_t               psize = send_uctx->send_dsc_vm_size;
2863 	mach_vm_address_t       paddr = 0;
2864 	mach_msg_header_t      *hdr   = ikm_header(kmsg);
2865 	mach_msg_kbase_t       *kbase = mach_msg_header_to_kbase(hdr);
2866 	ipc_port_t              dest_port = hdr->msgh_remote_port;
2867 
2868 	assert(hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX);
2869 
2870 	/*
2871 	 * Allocate space in the pageable kernel ipc copy map for all the
2872 	 * ool data that is to be physically copied.  Map is marked wait for
2873 	 * space.
2874 	 */
2875 	if (psize) {
2876 		kern_return_t kr;
2877 
2878 		kr  = mach_vm_allocate_kernel(ipc_kernel_copy_map, &paddr, psize,
2879 		    VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_IPC);
2880 		if (kr != KERN_SUCCESS) {
2881 			ipc_kmsg_clean_header(kmsg);
2882 			return MACH_MSG_VM_KERNEL;
2883 		}
2884 	}
2885 
2886 	/*
2887 	 * Receive right of a libxpc connection port is moved as a part of kmsg's body
2888 	 * 1. from a client to a service during connection etsablishment.
2889 	 * 2. back to the client on service's death or port deallocation.
2890 	 *
2891 	 * Any other attempt to move this receive right is not allowed.
2892 	 */
2893 	kmsg->ikm_flags |= IPC_OBJECT_COPYIN_FLAGS_ALLOW_CONN_IMMOVABLE_RECEIVE;
2894 
2895 	for (mach_msg_size_t copied_in_dscs = 0; copied_in_dscs < dsc_count; copied_in_dscs++) {
2896 		mach_msg_kdescriptor_t *kdesc = &kbase->msgb_dsc_array[copied_in_dscs];
2897 		mach_msg_return_t mr;
2898 
2899 		switch (mach_msg_kdescriptor_type(kdesc)) {
2900 		case MACH_MSG_PORT_DESCRIPTOR:
2901 			mr = ipc_kmsg_copyin_port_descriptor(&kdesc->kdesc_port,
2902 			    space, dest_port, kmsg, options);
2903 			break;
2904 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
2905 		case MACH_MSG_OOL_DESCRIPTOR:
2906 			mr = ipc_kmsg_copyin_ool_descriptor(&kdesc->kdesc_memory,
2907 			    &paddr, &psize, map);
2908 			break;
2909 		case MACH_MSG_OOL_PORTS_DESCRIPTOR:
2910 			mr = ipc_kmsg_copyin_ool_ports_descriptor(&kdesc->kdesc_port_array,
2911 			    map, space, dest_port, kmsg, options);
2912 			break;
2913 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
2914 			mr = ipc_kmsg_copyin_guarded_port_descriptor(&kdesc->kdesc_guarded_port,
2915 			    space, dest_port, kmsg, options);
2916 			break;
2917 		default:
2918 			__builtin_unreachable();
2919 		}
2920 
2921 		if (MACH_MSG_SUCCESS != mr) {
2922 			/* clean from start of message descriptors to copied_in_dscs */
2923 			ipc_kmsg_clean_header(kmsg);
2924 			ipc_kmsg_clean_descriptors(kbase->msgb_dsc_array,
2925 			    copied_in_dscs);
2926 			if (psize) {
2927 				kmem_free(ipc_kernel_copy_map, paddr, psize);
2928 			}
2929 			return mr;
2930 		}
2931 	}
2932 
2933 	assert(psize == 0);
2934 	return MACH_MSG_SUCCESS;
2935 }
2936 
2937 /*
2938  *	Routine:	ipc_kmsg_get_and_inflate_from_user()
2939  *	Purpose:
2940  *		Copies in user message (and aux) to the allocated
2941  *		kernel message buffer, and expands header and descriptor
2942  *		into "kernel" format.
2943  *
2944  *	Conditions:
2945  *		msg up to sizeof(mach_msg_user_header_t) has been previously
2946  *		copied in, and number of descriptors has been made known.
2947  *
2948  *		if send_aux_size is not 0, mach_msg_validate_data_vectors()
2949  *		guarantees that aux_size must be larger than
2950  *		mach_msg_aux_header_t.
2951  */
2952 static mach_msg_return_t
ipc_kmsg_get_and_inflate_from_user(ipc_kmsg_t kmsg,mach_msg_send_uctx_t * send_uctx,mach_msg_header_t * khdr,vm_map_t map,mach_msg_option64_t options)2953 ipc_kmsg_get_and_inflate_from_user(
2954 	ipc_kmsg_t              kmsg,
2955 	mach_msg_send_uctx_t   *send_uctx,
2956 	mach_msg_header_t      *khdr,
2957 	vm_map_t                map,
2958 	mach_msg_option64_t     options)
2959 {
2960 	bool                    isU64 = (map->max_offset > VM_MAX_ADDRESS);
2961 	mach_msg_user_header_t *uhdr  = &send_uctx->send_header;
2962 	char                   *kdesc = (char *)khdr; /* where descriptors start */
2963 	char                   *kbody = NULL;         /* where the body starts   */
2964 	mach_msg_size_t         upos  = 0;            /* copyin cursor so far    */
2965 	mach_msg_size_t         usize = send_uctx->send_msg_size;
2966 	mach_msg_return_t       mr    = MACH_MSG_SUCCESS;
2967 
2968 	/*
2969 	 * Step 1: inflate the header in kernel representation
2970 	 *
2971 	 * Notable steps:
2972 	 * - the msgh_bits are normalized
2973 	 * - the msgh_size is incorrect until we measure descriptors
2974 	 */
2975 	*khdr = (mach_msg_header_t){
2976 		.msgh_bits         = uhdr->msgh_bits & MACH_MSGH_BITS_USER,
2977 		.msgh_size         = usize + USER_HEADER_SIZE_DELTA,
2978 		.msgh_remote_port  = CAST_MACH_NAME_TO_PORT(uhdr->msgh_remote_port),
2979 		.msgh_local_port   = CAST_MACH_NAME_TO_PORT(uhdr->msgh_local_port),
2980 		.msgh_voucher_port = uhdr->msgh_voucher_port,
2981 		.msgh_id           = uhdr->msgh_id,
2982 	};
2983 
2984 	if (uhdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
2985 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(khdr);
2986 
2987 		kbase->msgb_dsc_count = send_uctx->send_dsc_count;
2988 		kdesc = (char *)(kbase + 1);
2989 		upos  = sizeof(mach_msg_user_base_t);
2990 	} else {
2991 		kdesc = (char *)(khdr + 1);
2992 		upos  = sizeof(mach_msg_user_header_t);
2993 	}
2994 	if (ikm_is_linear(kmsg)) {
2995 		kbody = (char *)kdesc +
2996 		    send_uctx->send_dsc_count * KERNEL_DESC_SIZE;
2997 	} else {
2998 		kbody = kmsg->ikm_udata;
2999 	}
3000 
3001 	/*
3002 	 * Step 2: inflate descriptors in kernel representation
3003 	 *
3004 	 * Notable steps:
3005 	 * - for linear messages we will copy the entire body too at once.
3006 	 * - the msgh_size will be updated for the inflated size of descriptors.
3007 	 */
3008 	if (send_uctx->send_dsc_count) {
3009 		mach_msg_size_t desc_count = send_uctx->send_dsc_count;
3010 		mach_msg_size_t desc_ksize = desc_count * KERNEL_DESC_SIZE;
3011 		mach_msg_size_t copyin_size;
3012 
3013 		/*
3014 		 * If kmsg is linear, copy in all data in the buffer.
3015 		 * Otherwise, first copyin until the end of descriptors
3016 		 * or the message, whichever comes first.
3017 		 */
3018 		if (ikm_is_linear(kmsg)) {
3019 			copyin_size = usize - upos;
3020 		} else {
3021 			copyin_size = MIN(desc_ksize, usize - upos);
3022 		}
3023 		assert((vm_offset_t)kdesc + copyin_size <= ikm_kdata_end(kmsg));
3024 
3025 		if (copyinmsg(send_uctx->send_msg_addr + upos, kdesc, copyin_size)) {
3026 			return MACH_SEND_INVALID_DATA;
3027 		}
3028 		upos += copyin_size;
3029 
3030 		/*
3031 		 * pre-validate and measure the descriptors user claims
3032 		 * to have by checking their size and type.
3033 		 */
3034 		mr = ipc_kmsg_measure_descriptors_from_user((vm_address_t)kdesc,
3035 		    copyin_size, send_uctx, options, isU64);
3036 		if (mr != MACH_MSG_SUCCESS) {
3037 			return mr;
3038 		}
3039 		khdr->msgh_size += desc_ksize - send_uctx->send_dsc_usize;
3040 
3041 		/*
3042 		 * If the descriptors user size is smaller than their
3043 		 * kernel size, we copied in some piece of body that we need to
3044 		 * relocate, and we need to inflate descriptors.
3045 		 */
3046 		if (send_uctx->send_dsc_usize != desc_ksize) {
3047 			memmove(kbody, kdesc + send_uctx->send_dsc_usize,
3048 			    copyin_size - send_uctx->send_dsc_usize);
3049 			kbody += copyin_size - send_uctx->send_dsc_usize;
3050 		}
3051 
3052 		mr = ipc_kmsg_inflate_descriptors(kdesc, send_uctx,
3053 		    map->max_offset > VM_MAX_ADDRESS);
3054 		if (mr != MACH_MSG_SUCCESS) {
3055 			return mr;
3056 		}
3057 	}
3058 
3059 	/*
3060 	 * Step 3: copy pure user data remaining.
3061 	 */
3062 	if (upos < usize &&
3063 	    copyinmsg(send_uctx->send_msg_addr + upos, kbody, usize - upos)) {
3064 		return MACH_SEND_INVALID_DATA;
3065 	}
3066 	kbody += usize - upos;
3067 
3068 	/*
3069 	 * Step 4: copy auxiliary data if any
3070 	 */
3071 	if (send_uctx->send_aux_size) {
3072 		mach_msg_aux_header_t *aux_hdr  = ikm_aux_header(kmsg);
3073 		mach_msg_size_t        aux_size = send_uctx->send_aux_size;
3074 
3075 		assert((vm_offset_t)kbody <= (vm_offset_t)aux_hdr);
3076 		assert(aux_size >= sizeof(aux_hdr[0]));
3077 
3078 		/* initialize aux data header */
3079 		aux_hdr->msgdh_size = send_uctx->send_aux_size;
3080 		aux_hdr->msgdh_reserved = 0;
3081 
3082 		/* copyin aux data after the header */
3083 		if (aux_size > sizeof(aux_hdr[0]) &&
3084 		    copyinmsg(send_uctx->send_aux_addr + sizeof(*aux_hdr),
3085 		    aux_hdr + 1, aux_size - sizeof(*aux_hdr))) {
3086 			return MACH_SEND_INVALID_DATA;
3087 		}
3088 	}
3089 
3090 	return MACH_MSG_SUCCESS;
3091 }
3092 
3093 /*
3094  *	Routine:	ipc_kmsg_copyin_from_user
3095  *	Purpose:
3096  *		"Copy-in" port rights and out-of-line memory
3097  *		in the message.
3098  *
3099  *		In all failure cases, the message is left holding
3100  *		no rights or memory.  However, the message buffer
3101  *		is not deallocated.  If successful, the message
3102  *		contains a valid destination port.
3103  *	Conditions:
3104  *		Nothing locked.
3105  *	Returns:
3106  *		MACH_MSG_SUCCESS	Successful copyin.
3107  *		MACH_SEND_INVALID_HEADER Illegal value in the message header bits.
3108  *		MACH_SEND_INVALID_DEST	Can't copyin destination port.
3109  *		MACH_SEND_INVALID_REPLY	Can't copyin reply port.
3110  *		MACH_SEND_INVALID_MEMORY	Can't grab out-of-line memory.
3111  *		MACH_SEND_INVALID_RIGHT	Can't copyin port right in body.
3112  *		MACH_SEND_INVALID_TYPE	Bad type specification.
3113  *		MACH_SEND_MSG_TOO_SMALL	Body is too small for types/data.
3114  */
3115 
3116 mach_msg_return_t
ipc_kmsg_copyin_from_user(ipc_kmsg_t kmsg,mach_msg_send_uctx_t * send_uctx,ipc_space_t space,vm_map_t map,mach_msg_priority_t priority,mach_msg_option64_t * option64p)3117 ipc_kmsg_copyin_from_user(
3118 	ipc_kmsg_t              kmsg,
3119 	mach_msg_send_uctx_t   *send_uctx,
3120 	ipc_space_t             space,
3121 	vm_map_t                map,
3122 	mach_msg_priority_t     priority,
3123 	mach_msg_option64_t    *option64p)
3124 {
3125 	mach_msg_option64_t options = *option64p;
3126 	mach_msg_header_t  *hdr = ikm_header(kmsg);
3127 	mach_msg_return_t   mr;
3128 
3129 	mr = ipc_validate_kmsg_header_schema_from_user(&send_uctx->send_header,
3130 	    send_uctx->send_dsc_count, options);
3131 	if (mr != MACH_MSG_SUCCESS) {
3132 		return mr;
3133 	}
3134 
3135 	mr = ipc_kmsg_get_and_inflate_from_user(kmsg, send_uctx,
3136 	    hdr, map, options);
3137 	if (mr != MACH_MSG_SUCCESS) {
3138 		return mr;
3139 	}
3140 
3141 	mr = ipc_validate_kmsg_schema_from_user(hdr, send_uctx, options);
3142 	if (mr != MACH_MSG_SUCCESS) {
3143 		return mr;
3144 	}
3145 
3146 	/* copyin_header may add MACH64_SEND_ALWAYS option */
3147 	mr = ipc_kmsg_copyin_header(kmsg, space, priority, option64p);
3148 	if (mr != MACH_MSG_SUCCESS) {
3149 		return mr;
3150 	}
3151 	options = *option64p;
3152 
3153 	mr = ipc_validate_kmsg_header_from_user(hdr, send_uctx, options);
3154 	if (mr != MACH_MSG_SUCCESS) {
3155 		/* no descriptors have been copied in yet */
3156 		ipc_kmsg_clean_header(kmsg);
3157 		return mr;
3158 	}
3159 
3160 	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_MSG_SEND) | DBG_FUNC_NONE,
3161 	    VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
3162 	    (uintptr_t)hdr->msgh_bits,
3163 	    (uintptr_t)hdr->msgh_id,
3164 	    VM_KERNEL_ADDRPERM((uintptr_t)unsafe_convert_port_to_voucher(ipc_kmsg_get_voucher_port(kmsg))),
3165 	    0);
3166 
3167 	DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_copyin_from_user header:\n%.8x\n%.8x\n%p\n%p\n%p\n%.8x\n",
3168 	    hdr->msgh_size,
3169 	    hdr->msgh_bits,
3170 	    hdr->msgh_remote_port,
3171 	    hdr->msgh_local_port,
3172 	    ipc_kmsg_get_voucher_port(kmsg),
3173 	    hdr->msgh_id);
3174 
3175 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
3176 		mr = ipc_kmsg_copyin_body(kmsg, send_uctx, space, map, options);
3177 	}
3178 
3179 	return mr;
3180 }
3181 
3182 /** @} */
3183 #pragma mark ipc_kmsg copyout and deflate (to user)
3184 /*!
3185  * @defgroup IPC kmsg copyout and deflate functions
3186  * @{
3187  *
3188  * IPC (right) copyout
3189  * ~~~~~~~~~~~~~~~~~~~
3190  *
3191  * This is the operation that turns kernel objects like IPC ports or
3192  * vm_map_copy_t and turns them into port names or userspace VM addresses.
3193  *
3194  * This is done on an IPC kmsg in "kernel representation" and just replace
3195  * kernel pointers with scalar values only meaningful to userspace in place.
3196  *
3197  * There are several copyout machineries that will drive this operation:
3198  * - @c ipc_kmsg_copyout() for the regular case,
3199  * - @c ipc_kmsg_copyout_pseudo() for pseud-receive,
3200  * - @c ipc_kmsg_copyout_dest_to_user() for receive error cases
3201  *   where the actual message is destroyed and a minimal message
3202  *   is received instead.
3203  *
3204  * Copied out messages do not hold any "right" in the "kdata" part of the
3205  * message anymore.
3206  *
3207  *
3208  * IPC kmsg deflate
3209  * ~~~~~~~~~~~~~~~~
3210  *
3211  * This is the operation that turns a message in kernel representation,
3212  * but with rights copied out, into user representation.
3213  *
3214  * This is driven by @c ipc_kmsg_deflate() which will:
3215  * - convert the message header into user layout (mach_msg_user_header_t),
3216  * - convert the descriptors into user layout,
3217  * - generate receive time parts of the trailer and convert it to user layout.
3218  *
3219  * This operation mangles the payload of the kmsg, making most of the kmsg
3220  * functions have undefined behavior. The only valid things to do with
3221  * a deflated message is to copy the bytes back to userspace and destroy
3222  * the message with @c ipc_kmsg_free().
3223  *
3224  *
3225  * Note that deflation will maintain the position of the pure data bodies
3226  * trailers and auxiliary data payloads. The deflation causes the header
3227  * desscriptors to contract by moving the start of the message rather
3228  * than by shortening it.
3229  *
3230  * As a result, it means that deflation works left-to-right (end toward start),
3231  * starting with the trailer, then descriptors and header last.
3232  * (@see @c ipc_kmsg_deflate() and @c ipc_kmsg_deflate_descriptors()).
3233  *
3234  *
3235  * IPC kmsg "put"
3236  * ~~~~~~~~~~~~~~
3237  *
3238  * This denotes the operation that copies the paylaod of an IPC kmsg into the
3239  * provided buffer, ending with the IPC kmsg being freed.
3240  *
3241  * There are two possible variants of this operation:
3242  *
3243  * - @c ipc_kmsg_put_to_kernel() which uses a kernel provided buffer,
3244  *   and performs no transformation. It is used for kernel upcall replies
3245  *   (see kernel_mach_msg_rpc()).
3246  *
3247  * - @c ipc_kmsg_put_to_user() which uses a user provided buffer.
3248  *   The message will undergo copyout and deflation before the put to user
3249  *   actually happens. This is used by the user mach_msg() receive paths.
3250  */
3251 
3252 /*!
3253  * @typedef ikm_deflate_context_t
3254  *
3255  * @brief
3256  * Data structure holding the various parameters during a deflate operation.
3257  *
3258  * @field dctx_uhdr             the pointer to the start of the user header
3259  * @field dctx_udata            the pointer to the pure data parts or NULL
3260  * @field dctx_trailer          the pointer to the trailer,
3261  *                              or NULL if doing a pseudo-receive.
3262  * @field dctx_aux_hdr          the pointer to the auxiliary data or NULL.
3263  *
3264  * @field dctx_uhdr_size        the number of bytes to copyout from dctx_uhdr.
3265  * @field dctx_udata_size       the number of bytes to copyout from dctx_udata,
3266  *                              or 0 if dctx_udata is NULL.
3267  * @field dctx_trailer_size     the size of the trailer,
3268  *                              or 0 if dctx_trailer is NULL.
3269  * @field dctx_aux_size         the size of the auxiliary data payload,
3270  *                              or 0 if dctx_aux_hdr is NULL.
3271  * @field dctx_isU64            whether the user process receiving the message
3272  *                              is 32 or 64bits.
3273  */
3274 typedef struct {
3275 	char                   *dctx_uhdr;
3276 	char                   *dctx_udata;
3277 	mach_msg_max_trailer_t *dctx_trailer;
3278 	mach_msg_aux_header_t  *dctx_aux_hdr;
3279 	mach_msg_size_t         dctx_uhdr_size;
3280 	mach_msg_size_t         dctx_udata_size;
3281 	mach_msg_size_t         dctx_trailer_size;
3282 	mach_msg_size_t         dctx_aux_size;
3283 	bool                    dctx_isU64;
3284 } ikm_deflate_context_t;
3285 
3286 #define ipc_kmsg_deflate_put(udesc_end, value) \
3287 	memcpy((udesc_end) - sizeof(*(value)), (value), sizeof(*(value)))
3288 
3289 /*
3290  *	Routine:	ipc_kmsg_copyout_header
3291  *	Purpose:
3292  *		"Copy-out" port rights in the header of a message.
3293  *		Operates atomically; if it doesn't succeed the
3294  *		message header and the space are left untouched.
3295  *		If it does succeed the remote/local port fields
3296  *		contain port names instead of object pointers,
3297  *		and the bits field is updated.
3298  *	Conditions:
3299  *		Nothing locked.
3300  *	Returns:
3301  *		MACH_MSG_SUCCESS	Copied out port rights.
3302  *		MACH_RCV_INVALID_NOTIFY
3303  *			Notify is non-null and doesn't name a receive right.
3304  *			(Either KERN_INVALID_NAME or KERN_INVALID_RIGHT.)
3305  *		MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_SPACE
3306  *			The space is dead.
3307  *		MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_SPACE
3308  *			No room in space for another name.
3309  *		MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_KERNEL
3310  *			Couldn't allocate memory for the reply port.
3311  *		MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_KERNEL
3312  *			Couldn't allocate memory for the dead-name request.
3313  */
3314 static mach_msg_return_t
ipc_kmsg_copyout_header(ipc_kmsg_t kmsg,mach_msg_header_t * msg,ipc_space_t space,mach_msg_option64_t option)3315 ipc_kmsg_copyout_header(
3316 	ipc_kmsg_t              kmsg,
3317 	mach_msg_header_t      *msg,
3318 	ipc_space_t             space,
3319 	mach_msg_option64_t     option)
3320 {
3321 	mach_msg_bits_t mbits = msg->msgh_bits;
3322 	ipc_port_t dest = msg->msgh_remote_port;
3323 
3324 	mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
3325 	mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
3326 	mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits);
3327 	ipc_port_t reply = msg->msgh_local_port;
3328 	ipc_port_t release_reply_port = IP_NULL;
3329 	mach_port_name_t dest_name, reply_name;
3330 
3331 	ipc_port_t voucher = ipc_kmsg_get_voucher_port(kmsg);
3332 	uintptr_t voucher_addr = 0;
3333 	ipc_port_t release_voucher_port = IP_NULL;
3334 	mach_port_name_t voucher_name;
3335 
3336 	uint32_t entries_held = 0;
3337 	boolean_t need_write_lock = FALSE;
3338 	ipc_object_copyout_flags_t reply_copyout_options = IPC_OBJECT_COPYOUT_FLAGS_NONE;
3339 	kern_return_t kr;
3340 
3341 	assert(IP_VALID(dest));
3342 
3343 	/*
3344 	 * While we still hold a reference on the received-from port,
3345 	 * process all send-possible notfications we received along with
3346 	 * the message.
3347 	 */
3348 	ipc_port_spnotify(dest);
3349 
3350 	/*
3351 	 * Reserve any potentially needed entries in the target space.
3352 	 * We'll free any unused before unlocking the space.
3353 	 */
3354 	if (IP_VALID(reply)) {
3355 		entries_held++;
3356 		need_write_lock = TRUE;
3357 	}
3358 	if (IP_VALID(voucher)) {
3359 		assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
3360 
3361 		if ((option & MACH_RCV_VOUCHER) != 0) {
3362 			entries_held++;
3363 		}
3364 		need_write_lock = TRUE;
3365 		voucher_addr = unsafe_convert_port_to_voucher(voucher);
3366 	}
3367 
3368 	if (need_write_lock) {
3369 handle_reply_again:
3370 		is_write_lock(space);
3371 
3372 		while (entries_held) {
3373 			if (!is_active(space)) {
3374 				is_write_unlock(space);
3375 				return MACH_RCV_HEADER_ERROR |
3376 				       MACH_MSG_IPC_SPACE;
3377 			}
3378 
3379 			kr = ipc_entries_hold(space, entries_held);
3380 			if (KERN_SUCCESS == kr) {
3381 				break;
3382 			}
3383 
3384 			kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
3385 			if (KERN_SUCCESS != kr) {
3386 				return MACH_RCV_HEADER_ERROR |
3387 				       MACH_MSG_IPC_SPACE;
3388 			}
3389 			/* space was unlocked and relocked - retry */
3390 		}
3391 
3392 		/* Handle reply port. */
3393 		if (IP_VALID(reply)) {
3394 			ipc_port_t reply_subst = IP_NULL;
3395 			ipc_entry_t entry;
3396 
3397 			ip_mq_lock_check_aligned(reply);
3398 
3399 			/* Is the reply port still active and allowed to be copied out? */
3400 			if (!ip_active(reply) ||
3401 			    !ip_label_check(space, reply, reply_type,
3402 			    &reply_copyout_options, &reply_subst)) {
3403 				/* clear the context value */
3404 				reply->ip_reply_context = 0;
3405 				ip_mq_unlock(reply);
3406 
3407 				assert(reply_subst == IP_NULL);
3408 				release_reply_port = reply;
3409 				reply = IP_DEAD;
3410 				reply_name = MACH_PORT_DEAD;
3411 				goto done_with_reply;
3412 			}
3413 
3414 			/* is the kolabel requesting a substitution */
3415 			if (reply_subst != IP_NULL) {
3416 				/*
3417 				 * port is unlocked, its right consumed
3418 				 * space is unlocked
3419 				 */
3420 				assert(reply_type == MACH_MSG_TYPE_PORT_SEND);
3421 				msg->msgh_local_port = reply = reply_subst;
3422 				goto handle_reply_again;
3423 			}
3424 
3425 
3426 			/* Is there already an entry we can use? */
3427 			if ((reply_type != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
3428 			    ipc_right_reverse(space, ip_to_object(reply), &reply_name, &entry)) {
3429 				assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
3430 			} else {
3431 				/* claim a held entry for the reply port */
3432 				assert(entries_held > 0);
3433 				entries_held--;
3434 				ipc_entry_claim(space, ip_to_object(reply),
3435 				    &reply_name, &entry);
3436 			}
3437 
3438 			/* space and reply port are locked and active */
3439 			ip_reference(reply);         /* hold onto the reply port */
3440 
3441 			/*
3442 			 * If the receiver would like to enforce strict reply
3443 			 * semantics, and the message looks like it expects a reply,
3444 			 * and contains a voucher, then link the context in the
3445 			 * voucher with the reply port so that the next message sent
3446 			 * to the reply port must come from a thread that has a
3447 			 * matching context (voucher).
3448 			 */
3449 			if (enforce_strict_reply && MACH_RCV_WITH_STRICT_REPLY(option) && IP_VALID(voucher)) {
3450 				if (ipc_kmsg_validate_reply_port_locked(reply, option) != KERN_SUCCESS) {
3451 					/* if the receiver isn't happy with the reply port: fail the receive. */
3452 					assert(!ip_is_pinned(reply));
3453 					ipc_entry_dealloc(space, ip_to_object(reply),
3454 					    reply_name, entry);
3455 					ip_mq_unlock(reply);
3456 					is_write_unlock(space);
3457 					ip_release(reply);
3458 					return MACH_RCV_INVALID_REPLY;
3459 				}
3460 				ipc_kmsg_link_reply_context_locked(reply, voucher);
3461 			} else {
3462 				/*
3463 				 * if the receive did not choose to participate
3464 				 * in the strict reply/RPC, then don't enforce
3465 				 * anything (as this could lead to booby-trapped
3466 				 * messages that kill the server).
3467 				 */
3468 				reply->ip_reply_context = 0;
3469 			}
3470 
3471 			kr = ipc_right_copyout(space, reply_name, entry,
3472 			    reply_type, IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL,
3473 			    ip_to_object(reply));
3474 			assert(kr == KERN_SUCCESS);
3475 			/* reply port is unlocked */
3476 		} else {
3477 			reply_name = CAST_MACH_PORT_TO_NAME(reply);
3478 		}
3479 
3480 done_with_reply:
3481 
3482 		/* Handle voucher port. */
3483 		if (voucher_type != MACH_MSGH_BITS_ZERO) {
3484 			assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
3485 
3486 			if (!IP_VALID(voucher)) {
3487 				if ((option & MACH_RCV_VOUCHER) == 0) {
3488 					voucher_type = MACH_MSGH_BITS_ZERO;
3489 				}
3490 				voucher_name = MACH_PORT_NULL;
3491 				goto done_with_voucher;
3492 			}
3493 
3494 #if CONFIG_PREADOPT_TG
3495 			struct knote *kn = current_thread()->ith_knote;
3496 			if (kn == ITH_KNOTE_NULL || kn == ITH_KNOTE_PSEUDO) {
3497 				/*
3498 				 * We are not in this path of voucher copyout because of
3499 				 * kevent - we cannot expect a voucher preadopt happening on
3500 				 * this thread for this message later on
3501 				 */
3502 				KDBG_DEBUG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NA),
3503 				    thread_tid(current_thread()), 0, 0, 0);
3504 			}
3505 #endif
3506 
3507 			/* clear voucher from its hiding place back in the kmsg */
3508 			ipc_kmsg_clear_voucher_port(kmsg);
3509 
3510 			if ((option & MACH_RCV_VOUCHER) != 0) {
3511 				ipc_entry_t entry;
3512 
3513 				ip_mq_lock_check_aligned(voucher);
3514 
3515 				if (ipc_right_reverse(space, ip_to_object(voucher),
3516 				    &voucher_name, &entry)) {
3517 					assert(entry->ie_bits & MACH_PORT_TYPE_SEND);
3518 				} else {
3519 					assert(entries_held > 0);
3520 					entries_held--;
3521 					ipc_entry_claim(space, ip_to_object(voucher), &voucher_name, &entry);
3522 				}
3523 				/* space is locked and active */
3524 
3525 				assert(ip_kotype(voucher) == IKOT_VOUCHER);
3526 				kr = ipc_right_copyout(space, voucher_name, entry,
3527 				    MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3528 				    NULL, NULL, ip_to_object(voucher));
3529 				/* voucher port is unlocked */
3530 			} else {
3531 				voucher_type = MACH_MSGH_BITS_ZERO;
3532 				release_voucher_port = voucher;
3533 				voucher_name = MACH_PORT_NULL;
3534 			}
3535 		} else {
3536 			voucher_name = msg->msgh_voucher_port;
3537 		}
3538 
3539 done_with_voucher:
3540 
3541 		ip_mq_lock(dest);
3542 		is_write_unlock(space);
3543 	} else {
3544 		/*
3545 		 *	No reply or voucher port!  This is an easy case.
3546 		 *
3547 		 *	We only need to check that the space is still
3548 		 *	active once we locked the destination:
3549 		 *
3550 		 *	- if the space holds a receive right for `dest`,
3551 		 *	  then holding the port lock means we can't fail
3552 		 *	  to notice if the space went dead because
3553 		 *	  the is_write_unlock() will pair with
3554 		 *	  os_atomic_barrier_before_lock_acquire() + ip_mq_lock().
3555 		 *
3556 		 *	- if this space doesn't hold a receive right
3557 		 *	  for `dest`, then `dest->ip_receiver` points
3558 		 *	  elsewhere, and ipc_object_copyout_dest() will
3559 		 *	  handle this situation, and failing to notice
3560 		 *	  that the space was dead is accetable.
3561 		 */
3562 
3563 		os_atomic_barrier_before_lock_acquire();
3564 		ip_mq_lock(dest);
3565 		if (!is_active(space)) {
3566 			ip_mq_unlock(dest);
3567 			return MACH_RCV_HEADER_ERROR | MACH_MSG_IPC_SPACE;
3568 		}
3569 
3570 		reply_name = CAST_MACH_PORT_TO_NAME(reply);
3571 
3572 		if (voucher_type != MACH_MSGH_BITS_ZERO) {
3573 			assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
3574 			if ((option & MACH_RCV_VOUCHER) == 0) {
3575 				voucher_type = MACH_MSGH_BITS_ZERO;
3576 			}
3577 			voucher_name = MACH_PORT_NULL;
3578 		} else {
3579 			voucher_name = msg->msgh_voucher_port;
3580 		}
3581 	}
3582 
3583 	/*
3584 	 *	At this point, the space is unlocked and the destination
3585 	 *	port is locked.
3586 	 *	reply_name is taken care of; we still need dest_name.
3587 	 *	We still hold a ref for reply (if it is valid).
3588 	 *
3589 	 *	If the space holds receive rights for the destination,
3590 	 *	we return its name for the right.  Otherwise the task
3591 	 *	managed to destroy or give away the receive right between
3592 	 *	receiving the message and this copyout.  If the destination
3593 	 *	is dead, return MACH_PORT_DEAD, and if the receive right
3594 	 *	exists somewhere else (another space, in transit)
3595 	 *	return MACH_PORT_NULL.
3596 	 *
3597 	 *	Making this copyout operation atomic with the previous
3598 	 *	copyout of the reply port is a bit tricky.  If there was
3599 	 *	no real reply port (it wasn't IP_VALID) then this isn't
3600 	 *	an issue.  If the reply port was dead at copyout time,
3601 	 *	then we are OK, because if dest is dead we serialize
3602 	 *	after the death of both ports and if dest is alive
3603 	 *	we serialize after reply died but before dest's (later) death.
3604 	 *	So assume reply was alive when we copied it out.  If dest
3605 	 *	is alive, then we are OK because we serialize before
3606 	 *	the ports' deaths.  So assume dest is dead when we look at it.
3607 	 *	If reply dies/died after dest, then we are OK because
3608 	 *	we serialize after dest died but before reply dies.
3609 	 *	So the hard case is when reply is alive at copyout,
3610 	 *	dest is dead at copyout, and reply died before dest died.
3611 	 *	In this case pretend that dest is still alive, so
3612 	 *	we serialize while both ports are alive.
3613 	 *
3614 	 *	Because the space lock is held across the copyout of reply
3615 	 *	and locking dest, the receive right for dest can't move
3616 	 *	in or out of the space while the copyouts happen, so
3617 	 *	that isn't an atomicity problem.  In the last hard case
3618 	 *	above, this implies that when dest is dead that the
3619 	 *	space couldn't have had receive rights for dest at
3620 	 *	the time reply was copied-out, so when we pretend
3621 	 *	that dest is still alive, we can return MACH_PORT_NULL.
3622 	 *
3623 	 *	If dest == reply, then we have to make it look like
3624 	 *	either both copyouts happened before the port died,
3625 	 *	or both happened after the port died.  This special
3626 	 *	case works naturally if the timestamp comparison
3627 	 *	is done correctly.
3628 	 */
3629 
3630 	if (ip_active(dest)) {
3631 		ipc_object_copyout_dest(space, ip_to_object(dest),
3632 		    dest_type, &dest_name);
3633 		/* dest is unlocked */
3634 	} else {
3635 		ipc_port_timestamp_t timestamp;
3636 
3637 		timestamp = ip_get_death_time(dest);
3638 		ip_mq_unlock(dest);
3639 		ip_release(dest);
3640 
3641 		if (IP_VALID(reply)) {
3642 			ip_mq_lock(reply);
3643 			if (ip_active(reply) ||
3644 			    IP_TIMESTAMP_ORDER(timestamp,
3645 			    ip_get_death_time(reply))) {
3646 				dest_name = MACH_PORT_DEAD;
3647 			} else {
3648 				dest_name = MACH_PORT_NULL;
3649 			}
3650 			ip_mq_unlock(reply);
3651 		} else {
3652 			dest_name = MACH_PORT_DEAD;
3653 		}
3654 	}
3655 
3656 	if (IP_VALID(reply)) {
3657 		ip_release(reply);
3658 	}
3659 
3660 	if (IP_VALID(release_reply_port)) {
3661 		if (reply_type == MACH_MSG_TYPE_PORT_SEND_ONCE) {
3662 			ipc_port_release_sonce(release_reply_port);
3663 		} else {
3664 			ipc_port_release_send(release_reply_port);
3665 		}
3666 	}
3667 
3668 	if ((option & MACH_RCV_VOUCHER) != 0) {
3669 		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_MSG_RECV) | DBG_FUNC_NONE,
3670 		    VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
3671 		    (uintptr_t)msg->msgh_bits,
3672 		    (uintptr_t)msg->msgh_id,
3673 		    VM_KERNEL_ADDRPERM(voucher_addr), 0);
3674 	} else {
3675 		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_MSG_RECV_VOUCHER_REFUSED) | DBG_FUNC_NONE,
3676 		    VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
3677 		    (uintptr_t)msg->msgh_bits,
3678 		    (uintptr_t)msg->msgh_id,
3679 		    VM_KERNEL_ADDRPERM(voucher_addr), 0);
3680 	}
3681 
3682 	if (IP_VALID(release_voucher_port)) {
3683 		ipc_port_release_send(release_voucher_port);
3684 	}
3685 
3686 	msg->msgh_bits = MACH_MSGH_BITS_SET(reply_type, dest_type,
3687 	    voucher_type, mbits);
3688 	msg->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name);
3689 	msg->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name);
3690 	msg->msgh_voucher_port = voucher_name;
3691 
3692 	return MACH_MSG_SUCCESS;
3693 }
3694 
3695 /*
3696  *	Routine:	ipc_kmsg_copyout_object
3697  *	Purpose:
3698  *		Copy-out a port right.  Always returns a name,
3699  *		even for unsuccessful return codes.  Always
3700  *		consumes the supplied object.
3701  *	Conditions:
3702  *		Nothing locked.
3703  *	Returns:
3704  *		MACH_MSG_SUCCESS	The space acquired the right
3705  *			(name is valid) or the object is dead (MACH_PORT_DEAD).
3706  *		MACH_MSG_IPC_SPACE	No room in space for the right,
3707  *			or the space is dead.  (Name is MACH_PORT_NULL.)
3708  *		MACH_MSG_IPC_KERNEL	Kernel resource shortage.
3709  *			(Name is MACH_PORT_NULL.)
3710  */
3711 static mach_msg_return_t
ipc_kmsg_copyout_object(ipc_space_t space,ipc_object_t object,mach_msg_type_name_t msgt_name,mach_port_context_t * context,mach_msg_guard_flags_t * guard_flags,mach_port_name_t * namep)3712 ipc_kmsg_copyout_object(
3713 	ipc_space_t             space,
3714 	ipc_object_t            object,
3715 	mach_msg_type_name_t    msgt_name,
3716 	mach_port_context_t     *context,
3717 	mach_msg_guard_flags_t  *guard_flags,
3718 	mach_port_name_t        *namep)
3719 {
3720 	kern_return_t kr;
3721 
3722 	if (!IO_VALID(object)) {
3723 		*namep = CAST_MACH_PORT_TO_NAME(object);
3724 		return MACH_MSG_SUCCESS;
3725 	}
3726 
3727 	kr = ipc_object_copyout(space, object, msgt_name, IPC_OBJECT_COPYOUT_FLAGS_NONE,
3728 	    context, guard_flags, namep);
3729 	if (kr != KERN_SUCCESS) {
3730 		if (kr == KERN_INVALID_CAPABILITY) {
3731 			*namep = MACH_PORT_DEAD;
3732 		} else {
3733 			*namep = MACH_PORT_NULL;
3734 
3735 			if (kr == KERN_RESOURCE_SHORTAGE) {
3736 				return MACH_MSG_IPC_KERNEL;
3737 			} else {
3738 				return MACH_MSG_IPC_SPACE;
3739 			}
3740 		}
3741 	}
3742 
3743 	return MACH_MSG_SUCCESS;
3744 }
3745 
3746 /*
3747  *	Routine:	ipc_kmsg_copyout_reply_object
3748  *	Purpose:
3749  *      Kernel swallows the send-once right associated with reply port.
3750  *      Always returns a name, even for unsuccessful return codes.
3751  *      Returns
3752  *          MACH_MSG_SUCCESS Returns name of receive right for reply port.
3753  *              Name is valid if the space acquired the right and msgt_name would be changed from MOVE_SO to MAKE_SO.
3754  *              Name is MACH_PORT_DEAD if the object is dead.
3755  *              Name is MACH_PORT_NULL if its entry could not be found in task's ipc space.
3756  *          MACH_MSG_IPC_SPACE
3757  *              The space is dead.  (Name is MACH_PORT_NULL.)
3758  *	Conditions:
3759  *      Nothing locked.
3760  */
3761 static mach_msg_return_t
ipc_kmsg_copyout_reply_object(ipc_space_t space,ipc_object_t object,mach_msg_type_name_t * msgt_name,mach_port_name_t * namep)3762 ipc_kmsg_copyout_reply_object(
3763 	ipc_space_t             space,
3764 	ipc_object_t            object,
3765 	mach_msg_type_name_t    *msgt_name,
3766 	mach_port_name_t        *namep)
3767 {
3768 	ipc_port_t port;
3769 	ipc_entry_t entry;
3770 	kern_return_t kr;
3771 
3772 	if (!IO_VALID(object)) {
3773 		*namep = CAST_MACH_PORT_TO_NAME(object);
3774 		return MACH_MSG_SUCCESS;
3775 	}
3776 
3777 	port = ip_object_to_port(object);
3778 
3779 	assert(ip_is_reply_port(port));
3780 	assert(*msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE);
3781 
3782 	is_write_lock(space);
3783 
3784 	if (!is_active(space)) {
3785 		ipc_port_release_sonce(port);
3786 		is_write_unlock(space);
3787 		*namep = MACH_PORT_NULL;
3788 		return MACH_MSG_IPC_SPACE;
3789 	}
3790 
3791 	ip_mq_lock(port);
3792 
3793 	if (!ip_active(port)) {
3794 		*namep = MACH_PORT_DEAD;
3795 		kr = MACH_MSG_SUCCESS;
3796 		goto out;
3797 	}
3798 
3799 	/* space is locked and active. object is locked and active. */
3800 	if (!ipc_right_reverse(space, object, namep, &entry)) {
3801 		*namep = MACH_PORT_NULL;
3802 		kr = MACH_MSG_SUCCESS;
3803 		goto out;
3804 	}
3805 
3806 	assert(entry->ie_bits & MACH_PORT_TYPE_RECEIVE);
3807 
3808 	*msgt_name = MACH_MSG_TYPE_MAKE_SEND_ONCE;
3809 	ipc_port_release_sonce_and_unlock(port);
3810 	/* object is unlocked. */
3811 
3812 	is_write_unlock(space);
3813 
3814 	return MACH_MSG_SUCCESS;
3815 
3816 out:
3817 
3818 	/* space and object are locked. */
3819 	ipc_port_release_sonce_and_unlock(port);
3820 
3821 	is_write_unlock(space);
3822 
3823 	return kr;
3824 }
3825 
3826 
3827 static mach_msg_return_t
ipc_kmsg_copyout_port_descriptor(mach_msg_port_descriptor_t * dsc,ipc_space_t space)3828 ipc_kmsg_copyout_port_descriptor(
3829 	mach_msg_port_descriptor_t *dsc,
3830 	ipc_space_t             space)
3831 {
3832 	mach_port_name_t  name;
3833 	mach_msg_return_t mr;
3834 
3835 	/* Copyout port right carried in the message */
3836 	mr = ipc_kmsg_copyout_object(space,
3837 	    ip_to_object(dsc->name), dsc->disposition, NULL, NULL, &name);
3838 	dsc->u_name = CAST_MACH_NAME_TO_PORT(name);
3839 	return mr;
3840 }
3841 
3842 static char *
ipc_kmsg_deflate_port_descriptor(char * udesc_end,const mach_msg_port_descriptor_t * kdesc)3843 ipc_kmsg_deflate_port_descriptor(
3844 	char                   *udesc_end,
3845 	const mach_msg_port_descriptor_t *kdesc)
3846 {
3847 	mach_msg_user_port_descriptor_t udesc = {
3848 		.name        = CAST_MACH_PORT_TO_NAME(kdesc->u_name),
3849 		.disposition = kdesc->disposition,
3850 		.type        = kdesc->type,
3851 	};
3852 
3853 	return ipc_kmsg_deflate_put(udesc_end, &udesc);
3854 }
3855 #if 0 /* done to avoid merge conflicts, will be cleaned up with RDAR_91262248 */
3856 }
3857 
3858 extern char *proc_best_name(struct proc *proc);
3859 static mach_msg_descriptor_t *
3860 
3861 #endif
3862 static mach_msg_return_t
ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t * dsc,vm_map_t map)3863 ipc_kmsg_copyout_ool_descriptor(
3864 	mach_msg_ool_descriptor_t  *dsc,
3865 	vm_map_t                    map)
3866 {
3867 	vm_map_copy_t               copy = dsc->address;
3868 	vm_map_size_t               size = dsc->size;
3869 	vm_map_address_t            rcv_addr;
3870 	boolean_t                   misaligned = FALSE;
3871 	mach_msg_return_t           mr  = MACH_MSG_SUCCESS;
3872 
3873 	if (copy != VM_MAP_COPY_NULL) {
3874 		kern_return_t kr;
3875 
3876 		rcv_addr = 0;
3877 		if (vm_map_copy_validate_size(map, copy, &size) == FALSE) {
3878 			panic("Inconsistent OOL/copyout size on %p: expected %d, got %lld @%p",
3879 			    dsc, dsc->size, (unsigned long long)copy->size, copy);
3880 		}
3881 
3882 		if ((copy->type == VM_MAP_COPY_ENTRY_LIST) &&
3883 		    (trunc_page(copy->offset) != copy->offset ||
3884 		    round_page(dsc->size) != dsc->size)) {
3885 			misaligned = TRUE;
3886 		}
3887 
3888 		if (misaligned) {
3889 			mach_vm_offset_t rounded_addr;
3890 			vm_map_size_t   rounded_size;
3891 			vm_map_offset_t effective_page_mask, effective_page_size;
3892 
3893 			effective_page_mask = VM_MAP_PAGE_MASK(map);
3894 			effective_page_size = effective_page_mask + 1;
3895 
3896 			rounded_size = vm_map_round_page(copy->offset + size, effective_page_mask) - vm_map_trunc_page(copy->offset, effective_page_mask);
3897 
3898 			kr = mach_vm_allocate_kernel(map, &rounded_addr,
3899 			    rounded_size, VM_FLAGS_ANYWHERE, VM_MEMORY_MACH_MSG);
3900 
3901 			if (kr == KERN_SUCCESS) {
3902 				/*
3903 				 * vm_map_copy_overwrite does a full copy
3904 				 * if size is too small to optimize.
3905 				 * So we tried skipping the offset adjustment
3906 				 * if we fail the 'size' test.
3907 				 *
3908 				 * if (size >= VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size)
3909 				 *
3910 				 * This resulted in leaked memory especially on the
3911 				 * older watches (16k user - 4k kernel) because we
3912 				 * would do a physical copy into the start of this
3913 				 * rounded range but could leak part of it
3914 				 * on deallocation if the 'size' being deallocated
3915 				 * does not cover the full range. So instead we do
3916 				 * the misalignment adjustment always so that on
3917 				 * deallocation we will remove the full range.
3918 				 */
3919 				if ((rounded_addr & effective_page_mask) !=
3920 				    (copy->offset & effective_page_mask)) {
3921 					/*
3922 					 * Need similar mis-alignment of source and destination...
3923 					 */
3924 					rounded_addr += (copy->offset & effective_page_mask);
3925 
3926 					assert((rounded_addr & effective_page_mask) == (copy->offset & effective_page_mask));
3927 				}
3928 				rcv_addr = rounded_addr;
3929 
3930 				kr = vm_map_copy_overwrite(map, rcv_addr, copy, size, FALSE);
3931 			}
3932 		} else {
3933 			kr = vm_map_copyout_size(map, &rcv_addr, copy, size);
3934 		}
3935 		if (kr != KERN_SUCCESS) {
3936 			if (kr == KERN_RESOURCE_SHORTAGE) {
3937 				mr = MACH_MSG_VM_KERNEL;
3938 			} else {
3939 				mr = MACH_MSG_VM_SPACE;
3940 			}
3941 			vm_map_copy_discard(copy);
3942 			rcv_addr = 0;
3943 			size = 0;
3944 		}
3945 	} else {
3946 		rcv_addr = 0;
3947 		size = 0;
3948 	}
3949 
3950 	dsc->u_address = rcv_addr;
3951 	dsc->size      = size;
3952 	return mr;
3953 }
3954 
3955 static char *
ipc_kmsg_deflate_memory_descriptor(char * udesc_end,const mach_msg_ool_descriptor_t * kdesc,bool isU64)3956 ipc_kmsg_deflate_memory_descriptor(
3957 	char                   *udesc_end,
3958 	const mach_msg_ool_descriptor_t *kdesc,
3959 	bool                    isU64)
3960 {
3961 	bool deallocate = (kdesc->copy == MACH_MSG_VIRTUAL_COPY);
3962 
3963 	if (isU64) {
3964 		mach_msg_ool_descriptor64_t udesc = {
3965 			.address     = kdesc->u_address,
3966 			.size        = kdesc->size,
3967 			.deallocate  = deallocate,
3968 			.copy        = kdesc->copy,
3969 			.type        = kdesc->type,
3970 		};
3971 
3972 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
3973 	} else {
3974 		mach_msg_ool_descriptor32_t udesc = {
3975 			.address     = (uint32_t)kdesc->u_address,
3976 			.size        = kdesc->size,
3977 			.deallocate  = deallocate,
3978 			.copy        = kdesc->copy,
3979 			.type        = kdesc->type,
3980 		};
3981 
3982 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
3983 	}
3984 }
3985 
3986 
3987 static mach_msg_return_t
ipc_kmsg_copyout_ool_ports_descriptor(mach_msg_kdescriptor_t * kdesc,vm_map_t map,ipc_space_t space)3988 ipc_kmsg_copyout_ool_ports_descriptor(
3989 	mach_msg_kdescriptor_t *kdesc,
3990 	vm_map_t                map,
3991 	ipc_space_t             space)
3992 {
3993 	mach_msg_ool_ports_descriptor_t *dsc = &kdesc->kdesc_port_array;
3994 	mach_msg_type_name_t    disp  = dsc->disposition;
3995 	mach_msg_type_number_t  count = dsc->count;
3996 	mach_port_array_t       array = dsc->address;
3997 	mach_port_name_t       *names = dsc->address;
3998 
3999 	vm_size_t               names_length = count * sizeof(mach_port_name_t);
4000 	mach_vm_offset_t        rcv_addr = 0;
4001 	mach_msg_return_t       mr = MACH_MSG_SUCCESS;
4002 
4003 	if (count != 0 && array != NULL) {
4004 		kern_return_t kr;
4005 		vm_tag_t tag;
4006 
4007 		/*
4008 		 * Dynamically allocate the region
4009 		 */
4010 		if (vm_kernel_map_is_kernel(map)) {
4011 			tag = VM_KERN_MEMORY_IPC;
4012 		} else {
4013 			tag = VM_MEMORY_MACH_MSG;
4014 		}
4015 
4016 		kr = mach_vm_allocate_kernel(map, &rcv_addr, names_length,
4017 		    VM_FLAGS_ANYWHERE, tag);
4018 
4019 		/*
4020 		 * Handle the port rights and copy out the names
4021 		 * for those rights out to user-space.
4022 		 */
4023 		if (kr == MACH_MSG_SUCCESS) {
4024 			for (mach_msg_size_t i = 0; i < count; i++) {
4025 				ipc_object_t object = ip_to_object(array[i].port);
4026 
4027 				mr |= ipc_kmsg_copyout_object(space, object,
4028 				    disp, NULL, NULL, &names[i]);
4029 			}
4030 			if (copyoutmap(map, names, rcv_addr, names_length)) {
4031 				mr |= MACH_MSG_VM_SPACE;
4032 			}
4033 			mach_port_array_free(array, count);
4034 		} else {
4035 			ipc_kmsg_clean_descriptors(kdesc, 1);
4036 			if (kr == KERN_RESOURCE_SHORTAGE) {
4037 				mr = MACH_MSG_VM_KERNEL;
4038 			} else {
4039 				mr = MACH_MSG_VM_SPACE;
4040 			}
4041 			rcv_addr = 0;
4042 		}
4043 	}
4044 
4045 	dsc->u_address = rcv_addr;
4046 	return mr;
4047 }
4048 
4049 static char *
ipc_kmsg_deflate_port_array_descriptor(char * udesc_end,const mach_msg_ool_ports_descriptor_t * kdesc,bool isU64)4050 ipc_kmsg_deflate_port_array_descriptor(
4051 	char                   *udesc_end,
4052 	const mach_msg_ool_ports_descriptor_t *kdesc,
4053 	bool                    isU64)
4054 {
4055 	if (isU64) {
4056 		mach_msg_ool_ports_descriptor64_t udesc = {
4057 			.address     = kdesc->u_address,
4058 			.count       = kdesc->count,
4059 			.deallocate  = true,
4060 			.copy        = MACH_MSG_VIRTUAL_COPY,
4061 			.disposition = kdesc->disposition,
4062 			.type        = kdesc->type,
4063 		};
4064 
4065 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
4066 	} else {
4067 		mach_msg_ool_ports_descriptor32_t udesc = {
4068 			.address     = (uint32_t)kdesc->u_address,
4069 			.count       = kdesc->count,
4070 			.deallocate  = true,
4071 			.copy        = MACH_MSG_VIRTUAL_COPY,
4072 			.disposition = kdesc->disposition,
4073 			.type        = kdesc->type,
4074 		};
4075 
4076 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
4077 	}
4078 }
4079 
4080 
4081 static mach_msg_return_t
ipc_kmsg_copyout_guarded_port_descriptor(mach_msg_guarded_port_descriptor_t * dsc,ipc_space_t space,mach_msg_option64_t option)4082 ipc_kmsg_copyout_guarded_port_descriptor(
4083 	mach_msg_guarded_port_descriptor_t *dsc,
4084 	ipc_space_t             space,
4085 	mach_msg_option64_t     option)
4086 {
4087 	mach_port_t             port    = dsc->name;
4088 	mach_msg_type_name_t    disp    = dsc->disposition;
4089 	mach_msg_guard_flags_t  flags   = dsc->flags;
4090 	mach_port_name_t        name    = MACH_PORT_NULL;
4091 	mach_msg_return_t       mr      = MACH_MSG_SUCCESS;
4092 	mach_port_context_t     context = 0;
4093 
4094 	/* Currently kernel_task doesnt support receiving guarded port descriptors */
4095 	struct knote *kn = current_thread()->ith_knote;
4096 	if ((kn != ITH_KNOTE_PSEUDO) && ((option & MACH_RCV_GUARDED_DESC) == 0)) {
4097 #if DEVELOPMENT || DEBUG
4098 		/*
4099 		 * Simulated crash needed for debugging, notifies the receiver to opt into receiving
4100 		 * guarded descriptors.
4101 		 */
4102 		mach_port_guard_exception(current_thread()->ith_receiver_name,
4103 		    0, 0, kGUARD_EXC_RCV_GUARDED_DESC);
4104 #endif
4105 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_DESTROY_GUARDED_DESC),
4106 		    current_thread()->ith_receiver_name,
4107 		    VM_KERNEL_ADDRPERM(port), disp, flags);
4108 
4109 		ipc_object_destroy(ip_to_object(port), disp);
4110 	} else {
4111 		mr = ipc_kmsg_copyout_object(space,
4112 		    ip_to_object(port), disp, &context, &flags, &name);
4113 	}
4114 
4115 	dsc->u_name    = name;
4116 	dsc->u_context = context;
4117 	dsc->flags     = flags;
4118 	return mr;
4119 }
4120 
4121 static char *
ipc_kmsg_deflate_guarded_port_descriptor(char * udesc_end,const mach_msg_guarded_port_descriptor_t * kdesc,bool isU64)4122 ipc_kmsg_deflate_guarded_port_descriptor(
4123 	char                   *udesc_end,
4124 	const mach_msg_guarded_port_descriptor_t *kdesc,
4125 	bool                    isU64)
4126 {
4127 	if (isU64) {
4128 		mach_msg_guarded_port_descriptor64_t udesc = {
4129 			.context     = kdesc->u_context,
4130 			.flags       = kdesc->flags,
4131 			.disposition = kdesc->disposition,
4132 			.type        = kdesc->type,
4133 			.name        = kdesc->u_name,
4134 		};
4135 
4136 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
4137 	} else {
4138 		mach_msg_guarded_port_descriptor32_t udesc = {
4139 			.context     = (uint32_t)kdesc->u_context,
4140 			.flags       = kdesc->flags,
4141 			.disposition = kdesc->disposition,
4142 			.type        = kdesc->type,
4143 			.name        = kdesc->u_name,
4144 		};
4145 
4146 		return ipc_kmsg_deflate_put(udesc_end, &udesc);
4147 	}
4148 }
4149 
4150 
4151 /*
4152  *	Routine:	ipc_kmsg_copyout_descriptors
4153  *	Purpose:
4154  *		"Copy-out" port rights and out-of-line memory
4155  *		in the body of a message.
4156  *
4157  *		The error codes are a combination of special bits.
4158  *		The copyout proceeds despite errors.
4159  *	Conditions:
4160  *		Nothing locked.
4161  *	Returns:
4162  *		MACH_MSG_SUCCESS	Successful copyout.
4163  *		MACH_MSG_IPC_SPACE	No room for port right in name space.
4164  *		MACH_MSG_VM_SPACE	No room for memory in address space.
4165  *		MACH_MSG_IPC_KERNEL	Resource shortage handling port right.
4166  *		MACH_MSG_VM_KERNEL	Resource shortage handling memory.
4167  *		MACH_MSG_INVALID_RT_DESCRIPTOR Descriptor incompatible with RT
4168  */
4169 
4170 static mach_msg_return_t
ipc_kmsg_copyout_descriptors(mach_msg_kdescriptor_t * kdesc,mach_msg_size_t dsc_count,ipc_space_t space,vm_map_t map,mach_msg_option64_t option)4171 ipc_kmsg_copyout_descriptors(
4172 	mach_msg_kdescriptor_t *kdesc,
4173 	mach_msg_size_t         dsc_count,
4174 	ipc_space_t             space,
4175 	vm_map_t                map,
4176 	mach_msg_option64_t     option)
4177 {
4178 	mach_msg_return_t mr = MACH_MSG_SUCCESS;
4179 
4180 	assert(current_task() != kernel_task);
4181 
4182 	for (mach_msg_size_t i = 0; i < dsc_count; i++, kdesc++) {
4183 		switch (mach_msg_kdescriptor_type(kdesc)) {
4184 		case MACH_MSG_PORT_DESCRIPTOR:
4185 			mr |= ipc_kmsg_copyout_port_descriptor(&kdesc->kdesc_port,
4186 			    space);
4187 			break;
4188 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
4189 		case MACH_MSG_OOL_DESCRIPTOR:
4190 			mr |= ipc_kmsg_copyout_ool_descriptor(&kdesc->kdesc_memory,
4191 			    map);
4192 			break;
4193 		case MACH_MSG_OOL_PORTS_DESCRIPTOR:
4194 			mr |= ipc_kmsg_copyout_ool_ports_descriptor(kdesc,
4195 			    map, space);
4196 			break;
4197 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
4198 			mr |= ipc_kmsg_copyout_guarded_port_descriptor(&kdesc->kdesc_guarded_port,
4199 			    space, option);
4200 			break;
4201 		default:
4202 			__ipc_kmsg_descriptor_invalid_type_panic(kdesc);
4203 		}
4204 	}
4205 
4206 	if (mr != MACH_MSG_SUCCESS) {
4207 		mr |= MACH_RCV_BODY_ERROR;
4208 	}
4209 	return mr;
4210 }
4211 
4212 static void
ipc_kmsg_deflate_descriptors(ikm_deflate_context_t * dctx,mach_msg_kdescriptor_t * desc_array,mach_msg_size_t desc_count)4213 ipc_kmsg_deflate_descriptors(
4214 	ikm_deflate_context_t  *dctx,
4215 	mach_msg_kdescriptor_t *desc_array,
4216 	mach_msg_size_t         desc_count)
4217 {
4218 	char           *udesc = (char *)(desc_array + desc_count);
4219 	mach_msg_body_t body  = {
4220 		.msgh_descriptor_count = desc_count,
4221 	};
4222 
4223 	for (mach_msg_size_t i = desc_count; i-- > 0;) {
4224 		const mach_msg_kdescriptor_t *kdesc = &desc_array[i];
4225 
4226 		switch (mach_msg_kdescriptor_type(kdesc)) {
4227 		case MACH_MSG_PORT_DESCRIPTOR:
4228 			udesc = ipc_kmsg_deflate_port_descriptor(udesc,
4229 			    &kdesc->kdesc_port);
4230 			break;
4231 		case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
4232 		case MACH_MSG_OOL_DESCRIPTOR:
4233 			udesc = ipc_kmsg_deflate_memory_descriptor(udesc,
4234 			    &kdesc->kdesc_memory, dctx->dctx_isU64);
4235 			break;
4236 		case MACH_MSG_OOL_PORTS_DESCRIPTOR:
4237 			udesc = ipc_kmsg_deflate_port_array_descriptor(udesc,
4238 			    &kdesc->kdesc_port_array, dctx->dctx_isU64);
4239 			break;
4240 		case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
4241 			udesc = ipc_kmsg_deflate_guarded_port_descriptor(udesc,
4242 			    &kdesc->kdesc_guarded_port, dctx->dctx_isU64);
4243 			break;
4244 		default:
4245 			__ipc_kmsg_descriptor_invalid_type_panic(kdesc);
4246 		}
4247 	}
4248 
4249 	/* adjust the context with how much the descriptors contracted */
4250 	dctx->dctx_uhdr      += udesc - (char *)desc_array;
4251 	dctx->dctx_uhdr_size -= udesc - (char *)desc_array;
4252 
4253 	/* update the descriptor count right before the array */
4254 	udesc = ipc_kmsg_deflate_put(udesc, &body);
4255 }
4256 
4257 static mach_msg_size_t
ipc_kmsg_descriptors_copyout_size(mach_msg_kdescriptor_t * kdesc,mach_msg_size_t count,vm_map_t map)4258 ipc_kmsg_descriptors_copyout_size(
4259 	mach_msg_kdescriptor_t *kdesc,
4260 	mach_msg_size_t         count,
4261 	vm_map_t                map)
4262 {
4263 	bool isU64 = (map->max_offset > VM_MAX_ADDRESS);
4264 	mach_msg_size_t size = 0;
4265 
4266 	for (mach_msg_size_t i = 0; i < count; i++) {
4267 		size += ikm_user_desc_size(kdesc[i].kdesc_header.type, isU64);
4268 	}
4269 
4270 	return size;
4271 }
4272 
4273 /*
4274  *	Routine:	ipc_kmsg_copyout_size
4275  *	Purpose:
4276  *		Compute the size of the message as copied out to the given
4277  *		map. If the destination map's pointers are a different size
4278  *		than the kernel's, we have to allow for expansion/
4279  *		contraction of the descriptors as appropriate.
4280  *	Conditions:
4281  *		Nothing locked.
4282  *	Returns:
4283  *		size of the message as it would be received.
4284  */
4285 
4286 mach_msg_size_t
ipc_kmsg_copyout_size(ipc_kmsg_t kmsg,vm_map_t map)4287 ipc_kmsg_copyout_size(
4288 	ipc_kmsg_t              kmsg,
4289 	vm_map_t                map)
4290 {
4291 	mach_msg_header_t *hdr   = ikm_header(kmsg);
4292 	mach_msg_size_t    size  = hdr->msgh_size - USER_HEADER_SIZE_DELTA;
4293 
4294 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
4295 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
4296 
4297 		size -= KERNEL_DESC_SIZE * kbase->msgb_dsc_count;
4298 		size += ipc_kmsg_descriptors_copyout_size(kbase->msgb_dsc_array,
4299 		    kbase->msgb_dsc_count, map);
4300 	}
4301 
4302 	return size;
4303 }
4304 
4305 /*
4306  *	Routine:	ipc_kmsg_copyout
4307  *	Purpose:
4308  *		"Copy-out" port rights and out-of-line memory
4309  *		in the message.
4310  *	Conditions:
4311  *		Nothing locked.
4312  *	Returns:
4313  *		MACH_MSG_SUCCESS	Copied out all rights and memory.
4314  *		MACH_RCV_HEADER_ERROR + special bits
4315  *			Rights and memory in the message are intact.
4316  *		MACH_RCV_BODY_ERROR + special bits
4317  *			The message header was successfully copied out.
4318  *			As much of the body was handled as possible.
4319  */
4320 
4321 mach_msg_return_t
ipc_kmsg_copyout(ipc_kmsg_t kmsg,ipc_space_t space,vm_map_t map,mach_msg_option64_t option)4322 ipc_kmsg_copyout(
4323 	ipc_kmsg_t              kmsg,
4324 	ipc_space_t             space,
4325 	vm_map_t                map,
4326 	mach_msg_option64_t     option)
4327 {
4328 	mach_msg_header_t *hdr = ikm_header(kmsg);
4329 	mach_msg_size_t    dsc_count;
4330 	mach_msg_return_t  mr;
4331 
4332 	dsc_count = ipc_kmsg_validate_signature(kmsg);
4333 
4334 	mr = ipc_kmsg_copyout_header(kmsg, hdr, space, option);
4335 	if (mr != MACH_MSG_SUCCESS) {
4336 		return mr;
4337 	}
4338 
4339 	if (dsc_count) {
4340 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
4341 
4342 		mr = ipc_kmsg_copyout_descriptors(kbase->msgb_dsc_array,
4343 		    dsc_count, space, map, option);
4344 	}
4345 
4346 	return mr;
4347 }
4348 
4349 /*
4350  *	Routine:	ipc_kmsg_copyout_pseudo
4351  *	Purpose:
4352  *		Does a pseudo-copyout of the message.
4353  *		This is like a regular copyout, except
4354  *		that the ports in the header are handled
4355  *		as if they are in the body.  They aren't reversed.
4356  *
4357  *		The error codes are a combination of special bits.
4358  *		The copyout proceeds despite errors.
4359  *	Conditions:
4360  *		Nothing locked.
4361  *	Returns:
4362  *		MACH_MSG_SUCCESS	Successful copyout.
4363  *		MACH_MSG_IPC_SPACE	No room for port right in name space.
4364  *		MACH_MSG_VM_SPACE	No room for memory in address space.
4365  *		MACH_MSG_IPC_KERNEL	Resource shortage handling port right.
4366  *		MACH_MSG_VM_KERNEL	Resource shortage handling memory.
4367  */
4368 
4369 mach_msg_return_t
ipc_kmsg_copyout_pseudo(ipc_kmsg_t kmsg,ipc_space_t space,vm_map_t map)4370 ipc_kmsg_copyout_pseudo(
4371 	ipc_kmsg_t              kmsg,
4372 	ipc_space_t             space,
4373 	vm_map_t                map)
4374 {
4375 	mach_msg_header_t *hdr = ikm_header(kmsg);
4376 	mach_msg_bits_t mbits = hdr->msgh_bits;
4377 	ipc_object_t dest = ip_to_object(hdr->msgh_remote_port);
4378 	ipc_object_t reply = ip_to_object(hdr->msgh_local_port);
4379 	ipc_object_t voucher = ip_to_object(ipc_kmsg_get_voucher_port(kmsg));
4380 	mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
4381 	mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
4382 	mach_msg_type_name_t voucher_type = MACH_MSGH_BITS_VOUCHER(mbits);
4383 	mach_port_name_t voucher_name = hdr->msgh_voucher_port;
4384 	mach_port_name_t dest_name, reply_name;
4385 	mach_msg_return_t mr;
4386 	mach_msg_size_t dsc_count;
4387 
4388 	/* Set ith_knote to ITH_KNOTE_PSEUDO */
4389 	current_thread()->ith_knote = ITH_KNOTE_PSEUDO;
4390 
4391 	dsc_count = ipc_kmsg_validate_signature(kmsg);
4392 
4393 	assert(IO_VALID(dest));
4394 
4395 #if 0
4396 	/*
4397 	 * If we did this here, it looks like we wouldn't need the undo logic
4398 	 * at the end of ipc_kmsg_send() in the error cases.  Not sure which
4399 	 * would be more elegant to keep.
4400 	 */
4401 	ipc_importance_clean(kmsg);
4402 #else
4403 	/* just assert it is already clean */
4404 	ipc_importance_assert_clean(kmsg);
4405 #endif
4406 
4407 	mr = ipc_kmsg_copyout_object(space, dest, dest_type, NULL, NULL, &dest_name);
4408 
4409 	if (!IO_VALID(reply)) {
4410 		reply_name = CAST_MACH_PORT_TO_NAME(reply);
4411 	} else if (ip_is_reply_port(ip_object_to_port(reply))) {
4412 		mach_msg_return_t reply_mr;
4413 		reply_mr = ipc_kmsg_copyout_reply_object(space, reply, &reply_type, &reply_name);
4414 		mr = mr | reply_mr;
4415 		if (reply_mr == MACH_MSG_SUCCESS) {
4416 			mbits = MACH_MSGH_BITS_SET(dest_type, reply_type, voucher_type, MACH_MSGH_BITS_OTHER(mbits));
4417 		}
4418 	} else {
4419 		mr = mr | ipc_kmsg_copyout_object(space, reply, reply_type, NULL, NULL, &reply_name);
4420 	}
4421 
4422 	hdr->msgh_bits = mbits & MACH_MSGH_BITS_USER;
4423 	hdr->msgh_remote_port = CAST_MACH_NAME_TO_PORT(dest_name);
4424 	hdr->msgh_local_port = CAST_MACH_NAME_TO_PORT(reply_name);
4425 
4426 	/* restore the voucher:
4427 	 * If it was copied in via move-send, have to put back a voucher send right.
4428 	 *
4429 	 * If it was copied in via copy-send, the header still contains the old voucher name.
4430 	 * Restore the type and discard the copied-in/pre-processed voucher.
4431 	 */
4432 	if (IO_VALID(voucher)) {
4433 		assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
4434 		if (kmsg->ikm_voucher_type == MACH_MSG_TYPE_MOVE_SEND) {
4435 			mr |= ipc_kmsg_copyout_object(space, voucher, voucher_type, NULL, NULL, &voucher_name);
4436 			hdr->msgh_voucher_port = voucher_name;
4437 		} else {
4438 			assert(kmsg->ikm_voucher_type == MACH_MSG_TYPE_COPY_SEND);
4439 			hdr->msgh_bits = MACH_MSGH_BITS_SET(dest_type, reply_type, MACH_MSG_TYPE_COPY_SEND,
4440 			    MACH_MSGH_BITS_OTHER(hdr->msgh_bits));
4441 			ipc_object_destroy(voucher, voucher_type);
4442 		}
4443 		ipc_kmsg_clear_voucher_port(kmsg);
4444 	}
4445 
4446 	if (dsc_count) {
4447 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
4448 
4449 		/* rdar://120614480 this MACH64_MSG_OPTION_NONE is wrong */
4450 		mr |= ipc_kmsg_copyout_descriptors(kbase->msgb_dsc_array,
4451 		    dsc_count, space, map, MACH64_MSG_OPTION_NONE);
4452 	}
4453 
4454 	current_thread()->ith_knote = ITH_KNOTE_NULL;
4455 
4456 	return mr;
4457 }
4458 
4459 /*
4460  *	Routine:	ipc_kmsg_copyout_dest_to_user
4461  *	Purpose:
4462  *		Copies out the destination port in the message.
4463  *		Destroys all other rights and memory in the message.
4464  *		Transforms the message into a bare header with trailer.
4465  *	Conditions:
4466  *		Nothing locked.
4467  */
4468 
4469 void
ipc_kmsg_copyout_dest_to_user(ipc_kmsg_t kmsg,ipc_space_t space)4470 ipc_kmsg_copyout_dest_to_user(
4471 	ipc_kmsg_t      kmsg,
4472 	ipc_space_t     space)
4473 {
4474 	mach_msg_bits_t mbits;
4475 	ipc_port_t dest;
4476 	ipc_object_t reply;
4477 	ipc_object_t voucher;
4478 	mach_msg_type_name_t dest_type;
4479 	mach_msg_type_name_t reply_type;
4480 	mach_msg_type_name_t voucher_type;
4481 	mach_port_name_t dest_name, reply_name, voucher_name;
4482 	mach_msg_header_t *hdr;
4483 	mach_msg_id_t msg_id;
4484 	mach_msg_size_t aux_size;
4485 	mach_msg_size_t dsc_count;
4486 
4487 	dsc_count = ipc_kmsg_validate_signature(kmsg);
4488 
4489 	hdr = ikm_header(kmsg);
4490 	mbits = hdr->msgh_bits;
4491 	dest = hdr->msgh_remote_port;
4492 	reply = ip_to_object(hdr->msgh_local_port);
4493 	voucher = ip_to_object(ipc_kmsg_get_voucher_port(kmsg));
4494 	voucher_name = hdr->msgh_voucher_port;
4495 	msg_id = hdr->msgh_id;
4496 	dest_type = MACH_MSGH_BITS_REMOTE(mbits);
4497 	reply_type = MACH_MSGH_BITS_LOCAL(mbits);
4498 	voucher_type = MACH_MSGH_BITS_VOUCHER(mbits);
4499 	aux_size = kmsg->ikm_aux_size;
4500 
4501 	assert(IP_VALID(dest));
4502 
4503 	ipc_importance_assert_clean(kmsg);
4504 
4505 	ip_mq_lock(dest);
4506 	if (ip_active(dest)) {
4507 		ipc_object_copyout_dest(space, ip_to_object(dest),
4508 		    dest_type, &dest_name);
4509 		/* dest is unlocked */
4510 	} else {
4511 		ip_mq_unlock(dest);
4512 		ip_release(dest);
4513 		dest_name = MACH_PORT_DEAD;
4514 	}
4515 
4516 	if (IO_VALID(reply)) {
4517 		ipc_object_destroy(reply, reply_type);
4518 		reply_name = MACH_PORT_NULL;
4519 	} else {
4520 		reply_name = CAST_MACH_PORT_TO_NAME(reply);
4521 	}
4522 
4523 	if (IO_VALID(voucher)) {
4524 		assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
4525 		ipc_object_destroy(voucher, voucher_type);
4526 		ipc_kmsg_clear_voucher_port(kmsg);
4527 		voucher_name = MACH_PORT_NULL;
4528 	}
4529 
4530 	if (mbits & MACH_MSGH_BITS_COMPLEX) {
4531 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
4532 
4533 		ipc_kmsg_clean_descriptors(kbase->msgb_dsc_array, dsc_count);
4534 	}
4535 
4536 	ipc_kmsg_free_allocations(kmsg);
4537 
4538 	/* and now reconstruct a message anew */
4539 
4540 	mbits = MACH_MSGH_BITS_SET(reply_type, dest_type, voucher_type, mbits);
4541 	*ikm_header(kmsg) = (mach_msg_header_t){
4542 		.msgh_bits         = mbits,
4543 		.msgh_size         = sizeof(mach_msg_header_t),
4544 		.msgh_local_port   = CAST_MACH_NAME_TO_PORT(dest_name),
4545 		.msgh_remote_port  = CAST_MACH_NAME_TO_PORT(reply_name),
4546 		.msgh_voucher_port = voucher_name,
4547 		.msgh_id           = msg_id,
4548 	};
4549 	ipc_kmsg_init_trailer_and_sign(kmsg, TASK_NULL);
4550 
4551 	/* put a minimal aux header if there was one */
4552 	if (aux_size) {
4553 		kmsg->ikm_aux_size = sizeof(mach_msg_aux_header_t);
4554 		*ikm_aux_header(kmsg) = (mach_msg_aux_header_t){
4555 			.msgdh_size = sizeof(mach_msg_aux_header_t),
4556 		};
4557 	}
4558 }
4559 
4560 /*
4561  *	Routine:	ipc_kmsg_copyout_dest_to_kernel
4562  *	Purpose:
4563  *		Copies out the destination and reply ports in the message.
4564  *		Leaves all other rights and memory in the message alone.
4565  *	Conditions:
4566  *		Nothing locked.
4567  *
4568  *	Derived from ipc_kmsg_copyout_dest_to_user.
4569  *	Use by mach_msg_rpc_from_kernel (which used to use copyout_dest).
4570  *	We really do want to save rights and memory.
4571  */
4572 
4573 void
ipc_kmsg_copyout_dest_to_kernel(ipc_kmsg_t kmsg,ipc_space_t space)4574 ipc_kmsg_copyout_dest_to_kernel(
4575 	ipc_kmsg_t      kmsg,
4576 	ipc_space_t     space)
4577 {
4578 	ipc_port_t dest;
4579 	mach_port_t reply;
4580 	mach_msg_type_name_t dest_type;
4581 	mach_msg_type_name_t reply_type;
4582 	mach_port_name_t dest_name;
4583 	mach_msg_header_t *hdr;
4584 
4585 	(void)ipc_kmsg_validate_signature(kmsg);
4586 
4587 	hdr = ikm_header(kmsg);
4588 	dest = hdr->msgh_remote_port;
4589 	reply = hdr->msgh_local_port;
4590 	dest_type = MACH_MSGH_BITS_REMOTE(hdr->msgh_bits);
4591 	reply_type = MACH_MSGH_BITS_LOCAL(hdr->msgh_bits);
4592 
4593 	assert(IP_VALID(dest));
4594 
4595 	ip_mq_lock(dest);
4596 	if (ip_active(dest)) {
4597 		ipc_object_copyout_dest(space, ip_to_object(dest),
4598 		    dest_type, &dest_name);
4599 		/* dest is unlocked */
4600 	} else {
4601 		ip_mq_unlock(dest);
4602 		ip_release(dest);
4603 		dest_name = MACH_PORT_DEAD;
4604 	}
4605 
4606 	/*
4607 	 * While MIG kernel users don't receive vouchers, the
4608 	 * msgh_voucher_port field is intended to be round-tripped through the
4609 	 * kernel if there is no voucher disposition set. Here we check for a
4610 	 * non-zero voucher disposition, and consume the voucher send right as
4611 	 * there is no possible way to specify MACH_RCV_VOUCHER semantics.
4612 	 */
4613 	mach_msg_type_name_t voucher_type;
4614 	voucher_type = MACH_MSGH_BITS_VOUCHER(hdr->msgh_bits);
4615 	if (voucher_type != MACH_MSGH_BITS_ZERO) {
4616 		ipc_port_t voucher = ipc_kmsg_get_voucher_port(kmsg);
4617 
4618 		assert(voucher_type == MACH_MSG_TYPE_MOVE_SEND);
4619 		/*
4620 		 * someone managed to send this kernel routine a message with
4621 		 * a voucher in it. Cleanup the reference in
4622 		 * kmsg->ikm_voucher.
4623 		 */
4624 		if (IP_VALID(voucher)) {
4625 			ipc_port_release_send(voucher);
4626 		}
4627 		hdr->msgh_voucher_port = 0;
4628 		ipc_kmsg_clear_voucher_port(kmsg);
4629 	}
4630 
4631 	hdr->msgh_bits =
4632 	    (MACH_MSGH_BITS_OTHER(hdr->msgh_bits) |
4633 	    MACH_MSGH_BITS(reply_type, dest_type));
4634 	hdr->msgh_local_port =  CAST_MACH_NAME_TO_PORT(dest_name);
4635 	hdr->msgh_remote_port = reply;
4636 }
4637 
4638 static void
ipc_kmsg_deflate_header(ikm_deflate_context_t * dctx,mach_msg_header_t * hdr)4639 ipc_kmsg_deflate_header(
4640 	ikm_deflate_context_t  *dctx,
4641 	mach_msg_header_t      *hdr)
4642 {
4643 	mach_msg_user_header_t uhdr = {
4644 		.msgh_bits         = hdr->msgh_bits,
4645 		.msgh_size         = dctx->dctx_uhdr_size + dctx->dctx_udata_size,
4646 		.msgh_remote_port  = CAST_MACH_PORT_TO_NAME(hdr->msgh_remote_port),
4647 		.msgh_local_port   = CAST_MACH_PORT_TO_NAME(hdr->msgh_local_port),
4648 		.msgh_voucher_port = hdr->msgh_voucher_port,
4649 		.msgh_id           = hdr->msgh_id,
4650 	};
4651 
4652 	/* the header will contract, take it into account */
4653 	dctx->dctx_uhdr      += USER_HEADER_SIZE_DELTA;
4654 	dctx->dctx_uhdr_size -= USER_HEADER_SIZE_DELTA;
4655 	uhdr.msgh_size       -= USER_HEADER_SIZE_DELTA;
4656 	memcpy(dctx->dctx_uhdr, &uhdr, sizeof(uhdr));
4657 }
4658 
4659 static void
ipc_kmsg_deflate_trailer(ikm_deflate_context_t * dctx,mach_msg_recv_result_t * msgr)4660 ipc_kmsg_deflate_trailer(
4661 	ikm_deflate_context_t  *dctx,
4662 	mach_msg_recv_result_t *msgr)
4663 {
4664 	mach_msg_max_trailer_t   *trailer = dctx->dctx_trailer;
4665 #ifdef __arm64__
4666 	mach_msg_max_trailer32_t *out32  = (mach_msg_max_trailer32_t *)trailer;
4667 	mach_msg_max_trailer64_t *out64  = (mach_msg_max_trailer64_t *)trailer;
4668 #else
4669 	mach_msg_max_trailer_t   *out32  = trailer;
4670 	mach_msg_max_trailer_t   *out64  = trailer;
4671 #endif /* __arm64__ */
4672 
4673 #define trailer_assert_same_field(field) \
4674 	static_assert(offsetof(typeof(*out32), field) == \
4675 	    offsetof(typeof(*out64), field)); \
4676 	static_assert(sizeof(out32->field) == sizeof(out64->field))
4677 
4678 	/*
4679 	 * These fields have been set by ipc_kmsg_init_trailer_and_sign(),
4680 	 * but alias in both 32 and 64 bit forms and need no munging:
4681 	 *
4682 	 *   msgh_trailer_type, msgh_trailer_size, msgh_sender, msgh_audit
4683 	 *
4684 	 * Update the size with the user requested one,
4685 	 * and update the message seqno.
4686 	 *
4687 	 * These cover:
4688 	 * - mach_msg_trailer_t           (msgh_trailer_type + msgh_trailer_size)
4689 	 * - mach_msg_seqno_trailer_t     (the above + msgh_seqno)
4690 	 * - mach_msg_security_trailer_t  (the above + msgh_sender)
4691 	 * - mach_msg_audit_trailer_t     (the above + msgh_audit)
4692 	 */
4693 	trailer_assert_same_field(msgh_trailer_type);
4694 	trailer_assert_same_field(msgh_trailer_size);
4695 	trailer_assert_same_field(msgh_seqno);
4696 	trailer_assert_same_field(msgh_sender);
4697 	trailer_assert_same_field(msgh_audit);
4698 
4699 	trailer->msgh_trailer_size = dctx->dctx_trailer_size;
4700 	trailer->msgh_seqno        = msgr->msgr_seqno;
4701 
4702 	/*
4703 	 * Lastly update fields that are 32bit versus 64bit dependent,
4704 	 * which are all after msgh_context (including this field).
4705 	 *
4706 	 * These cover:
4707 	 * - mach_msg_context_trailer_t   (the above + msgh_context)
4708 	 * - mach_msg_mac_trailer_t       (the above + msg_ad + msgh_labels)
4709 	 */
4710 
4711 	bzero((char *)trailer + sizeof(mach_msg_audit_trailer_t),
4712 	    MAX_TRAILER_SIZE - sizeof(mach_msg_audit_trailer_t));
4713 
4714 	if (dctx->dctx_isU64) {
4715 		out64->msgh_context = msgr->msgr_context;
4716 	} else {
4717 		out32->msgh_context = (typeof(out32->msgh_context))msgr->msgr_context;
4718 	}
4719 #undef trailer_assert_same_field
4720 }
4721 
4722 static ikm_deflate_context_t
ipc_kmsg_deflate(ipc_kmsg_t kmsg,mach_msg_recv_result_t * msgr,mach_msg_option64_t options,vm_map_t map)4723 ipc_kmsg_deflate(
4724 	ipc_kmsg_t              kmsg,     /* scalar or vector */
4725 	mach_msg_recv_result_t *msgr,
4726 	mach_msg_option64_t     options,
4727 	vm_map_t                map)
4728 {
4729 	mach_msg_header_t      *hdr  = ikm_header(kmsg);
4730 	ikm_deflate_context_t   dctx = {
4731 		.dctx_uhdr       = (char *)hdr,
4732 		.dctx_uhdr_size  = hdr->msgh_size,
4733 
4734 		.dctx_aux_hdr    = ikm_aux_header(kmsg),
4735 		.dctx_aux_size   = kmsg->ikm_aux_size,
4736 
4737 		.dctx_isU64      = (map->max_offset > VM_MAX_ADDRESS),
4738 	};
4739 
4740 	/*
4741 	 * If we aren't pseudo-receiving, deflate the trailer
4742 	 * before where it is is mangled beyond recognition.
4743 	 */
4744 	if (msgr->msgr_recv_name != MSGR_PSEUDO_RECEIVE) {
4745 		dctx.dctx_trailer      = ipc_kmsg_get_trailer(kmsg);
4746 		dctx.dctx_trailer_size = ipc_kmsg_trailer_size(options, map);
4747 	}
4748 
4749 	/*
4750 	 * If the message isn't linear,
4751 	 * split into uhdr=header+descriptors and udata=body+trailer
4752 	 */
4753 	if (!ikm_is_linear(kmsg)) {
4754 		mach_msg_size_t kdata_size = ikm_kdata_size(hdr);
4755 
4756 		dctx.dctx_udata_size = dctx.dctx_uhdr_size - kdata_size;
4757 		if (dctx.dctx_udata_size || dctx.dctx_trailer_size) {
4758 			dctx.dctx_udata      = kmsg->ikm_udata;
4759 			dctx.dctx_uhdr_size  = kdata_size;
4760 		}
4761 	}
4762 
4763 	/*
4764 	 * /!\ past this point, very few ipc_kmsg methods are allowed /!\
4765 	 *
4766 	 * The kmsg layout will be mangled in order to copy the bytes out,
4767 	 * and once that is done, destroying the message is the only thing
4768 	 * allowed.
4769 	 */
4770 
4771 	if (msgr->msgr_recv_name != MSGR_PSEUDO_RECEIVE) {
4772 		ipc_kmsg_deflate_trailer(&dctx, msgr);
4773 	}
4774 
4775 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
4776 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
4777 
4778 		ipc_kmsg_deflate_descriptors(&dctx,
4779 		    kbase->msgb_dsc_array, kbase->msgb_dsc_count);
4780 	}
4781 
4782 	ipc_kmsg_deflate_header(&dctx, hdr);
4783 
4784 	return dctx;
4785 }
4786 
4787 
4788 /*
4789  *	Routine:	ipc_kmsg_put_to_user
4790  *	Purpose:
4791  *		Copies a scalar or vector message buffer to a user message.
4792  *		Frees the message buffer.
4793  *
4794  *		1. If user has allocated space for aux data,
4795  *		   mach_msg_validate_data_vectors() guarantees that
4796  *		   recv_aux_addr is non-zero, and recv_aux_size
4797  *		   is at least sizeof(mach_msg_aux_header_t).
4798  *
4799  *		   In case the kmsg is a scalar or a vector without auxiliary
4800  *		   data, copy out an empty aux header to recv_aux_addr
4801  *		   which serves as EOF.
4802  *
4803  *		2. If the user has not allocated space for aux data,
4804  *		   silently drop the aux payload on reception.
4805  *
4806  *		3. If MACH64_RCV_LINEAR_VECTOR is set, use recv_msg_addr as
4807  *		   the combined buffer for message proper and aux data.
4808  *		   recv_aux_addr and recv_aux_size must be passed as
4809  *		   zeros and are ignored.
4810  *
4811  *	Conditions:
4812  *		Nothing locked. kmsg is freed upon return.
4813  *
4814  *	Returns:
4815  *		MACH_RCV_INVALID_DATA    Couldn't copy to user message.
4816  *		the incoming "mr"        Copied data out of message buffer.
4817  */
4818 mach_msg_return_t
ipc_kmsg_put_to_user(ipc_kmsg_t kmsg,mach_msg_recv_bufs_t * recv_bufs,mach_msg_recv_result_t * msgr,mach_msg_option64_t options,vm_map_t map,mach_msg_return_t mr)4819 ipc_kmsg_put_to_user(
4820 	ipc_kmsg_t              kmsg,     /* scalar or vector */
4821 	mach_msg_recv_bufs_t   *recv_bufs,
4822 	mach_msg_recv_result_t *msgr,
4823 	mach_msg_option64_t     options,
4824 	vm_map_t                map,
4825 	mach_msg_return_t       mr)
4826 {
4827 	mach_msg_aux_header_t   eof_aux = { .msgdh_size = 0 };
4828 	mach_vm_address_t       msg_rcv_addr = recv_bufs->recv_msg_addr;
4829 	mach_vm_address_t       aux_rcv_addr = recv_bufs->recv_aux_addr;
4830 	mach_msg_size_t         usize = 0;
4831 	ikm_deflate_context_t   dctx;
4832 
4833 	/*
4834 	 * After this, the kmsg() is mangled beyond recognition,
4835 	 * and calling things like ikm_header() etc.. will have
4836 	 * undefined behavior.
4837 	 */
4838 	dctx = ipc_kmsg_deflate(kmsg, msgr, options, map);
4839 
4840 	msgr->msgr_msg_size     = dctx.dctx_uhdr_size + dctx.dctx_udata_size;
4841 	msgr->msgr_trailer_size = dctx.dctx_trailer_size;
4842 	msgr->msgr_aux_size     = dctx.dctx_aux_size;
4843 
4844 	usize = msgr->msgr_msg_size + msgr->msgr_trailer_size;
4845 
4846 	/*
4847 	 * Validate our parameters, and compute the actual copy out addresses
4848 	 */
4849 
4850 	if (options & MACH64_RCV_LINEAR_VECTOR) {
4851 		assert(options & MACH64_MSG_VECTOR);
4852 
4853 		if (usize + dctx.dctx_aux_size > recv_bufs->recv_msg_size) {
4854 			mr = MACH_RCV_INVALID_DATA;
4855 			goto out;
4856 		}
4857 		if (options & MACH64_RCV_STACK) {
4858 			msg_rcv_addr += recv_bufs->recv_msg_size -
4859 			    (usize + dctx.dctx_aux_size);
4860 		}
4861 		aux_rcv_addr = msg_rcv_addr + usize;
4862 	} else {
4863 		assert(!(options & MACH64_RCV_STACK));
4864 
4865 		if (msgr->msgr_msg_size > recv_bufs->recv_msg_size) {
4866 			mr = MACH_RCV_INVALID_DATA;
4867 			goto out;
4868 		}
4869 
4870 		/*
4871 		 * (81193887) some clients stomp their own stack due to mis-sized
4872 		 * combined send/receives where the receive buffer didn't account
4873 		 * for the trailer size.
4874 		 *
4875 		 * At the very least, avoid smashing their stack
4876 		 */
4877 		if (usize > recv_bufs->recv_msg_size) {
4878 			dctx.dctx_trailer_size -= recv_bufs->recv_msg_size - usize;
4879 			usize = recv_bufs->recv_msg_size;
4880 		}
4881 
4882 		/*
4883 		 * If user has a buffer for aux data, at least copy out
4884 		 * an empty header which serves as an EOF.
4885 		 *
4886 		 * We don't need to do so for linear vector because
4887 		 * it's used in kevent context and we will return
4888 		 * msgr_aux_size as 0 on ext[3] to signify empty aux data.
4889 		 *
4890 		 * See: filt_machportprocess().
4891 		 */
4892 		if (aux_rcv_addr && !dctx.dctx_aux_hdr) {
4893 			dctx.dctx_aux_hdr  = &eof_aux;
4894 			dctx.dctx_aux_size = sizeof(eof_aux);
4895 			msgr->msgr_aux_size  = sizeof(eof_aux);
4896 		}
4897 
4898 		/*
4899 		 * If a receiver tries to receive a message with an aux vector,
4900 		 * but didn't provide one, we silently drop it for backward
4901 		 * compatibility reasons.
4902 		 */
4903 		if (dctx.dctx_aux_size > recv_bufs->recv_aux_size) {
4904 			dctx.dctx_aux_hdr  = NULL;
4905 			dctx.dctx_aux_size = 0;
4906 			msgr->msgr_aux_size  = 0;
4907 			aux_rcv_addr         = 0;
4908 		}
4909 	}
4910 
4911 
4912 	/*
4913 	 * Now that we measured twice, time to copyout all pieces.
4914 	 */
4915 
4916 	if (dctx.dctx_udata) {
4917 		mach_msg_size_t uhdr_size = dctx.dctx_uhdr_size;
4918 
4919 		if (copyoutmsg(dctx.dctx_uhdr, msg_rcv_addr, uhdr_size) ||
4920 		    copyoutmsg(dctx.dctx_udata, msg_rcv_addr + uhdr_size,
4921 		    usize - uhdr_size)) {
4922 			mr = MACH_RCV_INVALID_DATA;
4923 			goto out;
4924 		}
4925 	} else {
4926 		if (copyoutmsg(dctx.dctx_uhdr, msg_rcv_addr, usize)) {
4927 			mr = MACH_RCV_INVALID_DATA;
4928 			goto out;
4929 		}
4930 	}
4931 
4932 	if (dctx.dctx_aux_size &&
4933 	    copyoutmsg(dctx.dctx_aux_hdr, aux_rcv_addr, dctx.dctx_aux_size)) {
4934 		mr = MACH_RCV_INVALID_DATA;
4935 		goto out;
4936 	}
4937 
4938 out:
4939 	if (mr == MACH_RCV_INVALID_DATA) {
4940 		msgr->msgr_msg_size     = 0;
4941 		msgr->msgr_trailer_size = 0;
4942 		msgr->msgr_aux_size     = 0;
4943 	}
4944 
4945 	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_LINK) | DBG_FUNC_NONE,
4946 	    recv_bufs->recv_msg_addr, VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
4947 	    /* this is on the receive/copyout path */ 1, 0, 0);
4948 
4949 	ipc_kmsg_free(kmsg);
4950 
4951 	return mr;
4952 }
4953 
4954 /** @} */
4955 #pragma mark ipc_kmsg kernel interfaces (get/put, copyin_from_kernel, send)
4956 
4957 /*
4958  *	Routine:	ipc_kmsg_get_from_kernel
4959  *	Purpose:
4960  *		Allocates a new kernel message buffer.
4961  *		Copies a kernel message to the message buffer.
4962  *		Only resource errors are allowed.
4963  *	Conditions:
4964  *		Nothing locked.
4965  *		Ports in header are ipc_port_t.
4966  *	Returns:
4967  *		MACH_MSG_SUCCESS	Acquired a message buffer.
4968  *		MACH_SEND_NO_BUFFER	Couldn't allocate a message buffer.
4969  */
4970 
4971 mach_msg_return_t
ipc_kmsg_get_from_kernel(mach_msg_header_t * msg,mach_msg_size_t size,mach_msg_option64_t options,ipc_kmsg_t * kmsgp)4972 ipc_kmsg_get_from_kernel(
4973 	mach_msg_header_t      *msg,
4974 	mach_msg_size_t         size,
4975 	mach_msg_option64_t     options,
4976 	ipc_kmsg_t             *kmsgp)
4977 {
4978 	mach_msg_kbase_t  *src_base;
4979 	ipc_kmsg_t         kmsg;
4980 	mach_msg_header_t *hdr;
4981 	mach_msg_size_t    desc_count, kdata_sz;
4982 
4983 	assert(size >= sizeof(mach_msg_header_t));
4984 	assert((size & 3) == 0);
4985 
4986 	if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
4987 		src_base   = mach_msg_header_to_kbase(msg);
4988 		desc_count = src_base->msgb_dsc_count;
4989 		kdata_sz   = ikm_kdata_size(desc_count, true);
4990 	} else {
4991 		desc_count = 0;
4992 		kdata_sz   = ikm_kdata_size(desc_count, false);
4993 	}
4994 
4995 	assert(size >= kdata_sz);
4996 	if (size < kdata_sz) {
4997 		return MACH_SEND_TOO_LARGE;
4998 	}
4999 
5000 	kmsg = ipc_kmsg_alloc(size, 0, desc_count, IPC_KMSG_ALLOC_KERNEL);
5001 	/* kmsg can be non-linear */
5002 
5003 	if (kmsg == IKM_NULL) {
5004 		return MACH_SEND_NO_BUFFER;
5005 	}
5006 
5007 	hdr = ikm_header(kmsg);
5008 	if (ikm_is_linear(kmsg)) {
5009 		memcpy(hdr, msg, size);
5010 	} else {
5011 		memcpy(hdr, msg, kdata_sz);
5012 		memcpy(kmsg->ikm_udata, (char *)msg + kdata_sz, size - kdata_sz);
5013 	}
5014 	hdr->msgh_size = size;
5015 
5016 	if (desc_count) {
5017 		mach_msg_kbase_t *dst_base = mach_msg_header_to_kbase(hdr);
5018 
5019 		if (options & MACH64_POLICY_KERNEL_EXTENSION) {
5020 			ipc_kmsg_sign_descriptors(dst_base->msgb_dsc_array,
5021 			    desc_count);
5022 		} else {
5023 			ipc_kmsg_relocate_descriptors(dst_base->msgb_dsc_array,
5024 			    src_base->msgb_dsc_array, desc_count);
5025 		}
5026 	}
5027 
5028 	*kmsgp = kmsg;
5029 	return MACH_MSG_SUCCESS;
5030 }
5031 
5032 static void
ipc_kmsg_copyin_port_from_kernel(mach_msg_header_t * hdr,ipc_port_t port,ipc_port_t remote,mach_msg_type_name_t disp)5033 ipc_kmsg_copyin_port_from_kernel(
5034 	mach_msg_header_t      *hdr,
5035 	ipc_port_t              port,
5036 	ipc_port_t              remote,
5037 	mach_msg_type_name_t    disp)
5038 {
5039 	ipc_object_copyin_from_kernel(ip_to_object(port), disp);
5040 	/*
5041 	 * avoid circularity when the destination is also
5042 	 * the kernel.  This check should be changed into an
5043 	 * assert when the new kobject model is in place since
5044 	 * ports will not be used in kernel to kernel chats
5045 	 */
5046 
5047 	/* do not lock remote port, use raw pointer comparison */
5048 	if (!ip_in_space_noauth(remote, ipc_space_kernel)) {
5049 		/* remote port could be dead, in-transit or in an ipc space */
5050 		if (disp == MACH_MSG_TYPE_MOVE_RECEIVE &&
5051 		    ipc_port_check_circularity(port, remote)) {
5052 			hdr->msgh_bits |= MACH_MSGH_BITS_CIRCULAR;
5053 		}
5054 	}
5055 }
5056 
5057 /*
5058  *	Routine:	ipc_kmsg_copyin_from_kernel
5059  *	Purpose:
5060  *		"Copy-in" port rights and out-of-line memory
5061  *		in a message sent from the kernel.
5062  *
5063  *		Because the message comes from the kernel,
5064  *		the implementation assumes there are no errors
5065  *		or peculiarities in the message.
5066  *	Conditions:
5067  *		Nothing locked.
5068  */
5069 
5070 mach_msg_return_t
ipc_kmsg_copyin_from_kernel(ipc_kmsg_t kmsg)5071 ipc_kmsg_copyin_from_kernel(
5072 	ipc_kmsg_t      kmsg)
5073 {
5074 	mach_msg_header_t   *hdr = ikm_header(kmsg);
5075 	mach_msg_bits_t      bits = hdr->msgh_bits;
5076 	mach_msg_type_name_t rname = MACH_MSGH_BITS_REMOTE(bits);
5077 	mach_msg_type_name_t lname = MACH_MSGH_BITS_LOCAL(bits);
5078 	mach_msg_type_name_t vname = MACH_MSGH_BITS_VOUCHER(bits);
5079 	ipc_port_t           remote = hdr->msgh_remote_port;
5080 	ipc_object_t         local = ip_to_object(hdr->msgh_local_port);
5081 	ipc_object_t         voucher = ip_to_object(ipc_kmsg_get_voucher_port(kmsg));
5082 
5083 	/* translate the destination and reply ports */
5084 	if (!IP_VALID(remote)) {
5085 		return MACH_SEND_INVALID_DEST;
5086 	}
5087 
5088 	ipc_object_copyin_from_kernel(ip_to_object(remote), rname);
5089 	if (IO_VALID(local)) {
5090 		ipc_object_copyin_from_kernel(local, lname);
5091 	}
5092 
5093 	if (IO_VALID(voucher)) {
5094 		ipc_object_copyin_from_kernel(voucher, vname);
5095 	}
5096 
5097 	/*
5098 	 *	The common case is a complex message with no reply port,
5099 	 *	because that is what the memory_object interface uses.
5100 	 */
5101 
5102 	if (bits == (MACH_MSGH_BITS_COMPLEX |
5103 	    MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0))) {
5104 		bits = (MACH_MSGH_BITS_COMPLEX |
5105 		    MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0));
5106 
5107 		hdr->msgh_bits = bits;
5108 	} else {
5109 		bits = (MACH_MSGH_BITS_OTHER(bits) |
5110 		    MACH_MSGH_BITS_SET_PORTS(ipc_object_copyin_type(rname),
5111 		    ipc_object_copyin_type(lname), ipc_object_copyin_type(vname)));
5112 
5113 		hdr->msgh_bits = bits;
5114 	}
5115 
5116 	ipc_kmsg_set_qos_kernel(kmsg);
5117 
5118 	/* Add trailer and signature to the message */
5119 	ipc_kmsg_init_trailer_and_sign(kmsg, TASK_NULL);
5120 
5121 	if (bits & MACH_MSGH_BITS_COMPLEX) {
5122 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(hdr);
5123 		mach_msg_size_t   count = kbase->msgb_dsc_count;
5124 		mach_msg_kdescriptor_t *kdesc = kbase->msgb_dsc_array;
5125 
5126 		/*
5127 		 * Check if the remote port accepts ports in the body.
5128 		 */
5129 		if (remote->ip_no_grant) {
5130 			for (mach_msg_size_t i = 0; i < count; i++) {
5131 				switch (mach_msg_kdescriptor_type(&kdesc[i])) {
5132 				case MACH_MSG_PORT_DESCRIPTOR:
5133 				case MACH_MSG_OOL_PORTS_DESCRIPTOR:
5134 				case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
5135 					/* no descriptors have been copied in yet */
5136 					ipc_kmsg_clean_header(kmsg);
5137 					return MACH_SEND_NO_GRANT_DEST;
5138 				}
5139 			}
5140 		}
5141 
5142 		for (mach_msg_size_t i = 0; i < count; i++) {
5143 			switch (mach_msg_kdescriptor_type(&kdesc[i])) {
5144 			case MACH_MSG_PORT_DESCRIPTOR: {
5145 				mach_msg_port_descriptor_t *dsc = &kdesc[i].kdesc_port;
5146 				mach_msg_type_name_t disp = dsc->disposition;
5147 				ipc_port_t           port = dsc->name;
5148 
5149 				dsc->disposition = ipc_object_copyin_type(disp);
5150 				if (IP_VALID(port)) {
5151 					ipc_kmsg_copyin_port_from_kernel(hdr,
5152 					    port, remote, disp);
5153 				}
5154 				break;
5155 			}
5156 			case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
5157 			case MACH_MSG_OOL_DESCRIPTOR: {
5158 				/*
5159 				 * The sender should supply ready-made memory, i.e.
5160 				 * a vm_map_copy_t, so we don't need to do anything.
5161 				 */
5162 				break;
5163 			}
5164 			case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
5165 				mach_msg_ool_ports_descriptor_t *dsc = &kdesc[i].kdesc_port_array;
5166 				mach_msg_type_name_t disp  = dsc->disposition;
5167 				mach_port_array_t    array = dsc->address;
5168 
5169 				dsc->disposition = ipc_object_copyin_type(disp);
5170 
5171 				for (mach_msg_size_t j = 0; j < dsc->count; j++) {
5172 					ipc_port_t port = array[j].port;
5173 
5174 					if (IP_VALID(port)) {
5175 						ipc_kmsg_copyin_port_from_kernel(hdr,
5176 						    port, remote, disp);
5177 					}
5178 				}
5179 				break;
5180 			}
5181 			case MACH_MSG_GUARDED_PORT_DESCRIPTOR: {
5182 				mach_msg_guarded_port_descriptor_t *dsc = &kdesc[i].kdesc_guarded_port;
5183 				mach_msg_type_name_t disp = dsc->disposition;
5184 				ipc_port_t           port = dsc->name;
5185 
5186 				dsc->disposition = ipc_object_copyin_type(disp);
5187 				assert(dsc->flags == 0);
5188 
5189 				if (IP_VALID(port)) {
5190 					ipc_kmsg_copyin_port_from_kernel(hdr,
5191 					    port, remote, disp);
5192 				}
5193 				break;
5194 			}
5195 			default:
5196 				__ipc_kmsg_descriptor_invalid_type_panic(kdesc);
5197 			}
5198 		}
5199 	}
5200 
5201 	return MACH_MSG_SUCCESS;
5202 }
5203 
5204 /*
5205  *	Routine:	ipc_kmsg_send
5206  *	Purpose:
5207  *		Send a message.  The message holds a reference
5208  *		for the destination port in the msgh_remote_port field.
5209  *
5210  *		If unsuccessful, the caller still has possession of
5211  *		the message and must do something with it.  If successful,
5212  *		the message is queued, given to a receiver, destroyed,
5213  *		or handled directly by the kernel via mach_msg.
5214  *	Conditions:
5215  *		Nothing locked.
5216  *	Returns:
5217  *		MACH_MSG_SUCCESS	       The message was accepted.
5218  *		MACH_SEND_TIMED_OUT	       Caller still has message.
5219  *		MACH_SEND_INTERRUPTED	   Caller still has message.
5220  *		MACH_SEND_INVALID_DEST	   Caller still has message.
5221  *      MACH_SEND_INVALID_OPTIONS  Caller still has message.
5222  */
5223 mach_msg_return_t
ipc_kmsg_send(ipc_kmsg_t kmsg,mach_msg_option64_t options,mach_msg_timeout_t send_timeout)5224 ipc_kmsg_send(
5225 	ipc_kmsg_t              kmsg,
5226 	mach_msg_option64_t     options,
5227 	mach_msg_timeout_t      send_timeout)
5228 {
5229 	ipc_port_t port;
5230 	thread_t th = current_thread();
5231 	mach_msg_return_t error = MACH_MSG_SUCCESS;
5232 	boolean_t kernel_reply = FALSE;
5233 	mach_msg_header_t *hdr;
5234 
5235 	/* Check if honor qlimit flag is set on thread. */
5236 	if ((th->options & TH_OPT_HONOR_QLIMIT) == TH_OPT_HONOR_QLIMIT) {
5237 		/* Remove the MACH_SEND_ALWAYS flag to honor queue limit. */
5238 		options &= (~MACH64_SEND_ALWAYS);
5239 		/* Add the timeout flag since the message queue might be full. */
5240 		options |= MACH64_SEND_TIMEOUT;
5241 		th->options &= (~TH_OPT_HONOR_QLIMIT);
5242 	}
5243 
5244 #if IMPORTANCE_INHERITANCE
5245 	bool did_importance = false;
5246 #if IMPORTANCE_TRACE
5247 	mach_msg_id_t imp_msgh_id = -1;
5248 	int           sender_pid  = -1;
5249 #endif /* IMPORTANCE_TRACE */
5250 #endif /* IMPORTANCE_INHERITANCE */
5251 
5252 	hdr = ikm_header(kmsg);
5253 	/* don't allow the creation of a circular loop */
5254 	if (hdr->msgh_bits & MACH_MSGH_BITS_CIRCULAR) {
5255 		ipc_kmsg_destroy(kmsg, IPC_KMSG_DESTROY_ALL);
5256 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, MACH_MSGH_BITS_CIRCULAR);
5257 		return MACH_MSG_SUCCESS;
5258 	}
5259 
5260 	ipc_voucher_send_preprocessing(kmsg);
5261 
5262 	port = hdr->msgh_remote_port;
5263 	assert(IP_VALID(port));
5264 	ip_mq_lock(port);
5265 
5266 	/*
5267 	 * If the destination has been guarded with a reply context, and the
5268 	 * sender is consuming a send-once right, then assume this is a reply
5269 	 * to an RPC and we need to validate that this sender is currently in
5270 	 * the correct context.
5271 	 */
5272 	if (enforce_strict_reply && port->ip_reply_context != 0 &&
5273 	    ((options & MACH64_SEND_KERNEL) == 0) &&
5274 	    MACH_MSGH_BITS_REMOTE(hdr->msgh_bits) == MACH_MSG_TYPE_PORT_SEND_ONCE) {
5275 		error = ipc_kmsg_validate_reply_context_locked(options,
5276 		    port, th->ith_voucher, th->ith_voucher_name);
5277 		if (error != MACH_MSG_SUCCESS) {
5278 			ip_mq_unlock(port);
5279 			return error;
5280 		}
5281 	}
5282 
5283 #if IMPORTANCE_INHERITANCE
5284 retry:
5285 #endif /* IMPORTANCE_INHERITANCE */
5286 	/*
5287 	 *	Can't deliver to a dead port.
5288 	 *	However, we can pretend it got sent
5289 	 *	and was then immediately destroyed.
5290 	 */
5291 	if (!ip_active(port)) {
5292 		ip_mq_unlock(port);
5293 #if MACH_FLIPC
5294 		if (MACH_NODE_VALID(kmsg->ikm_node) && FPORT_VALID(port->ip_messages.imq_fport)) {
5295 			flipc_msg_ack(kmsg->ikm_node, &port->ip_messages, FALSE);
5296 		}
5297 #endif
5298 		if (did_importance) {
5299 			/*
5300 			 * We're going to pretend we delivered this message
5301 			 * successfully, and just eat the kmsg. However, the
5302 			 * kmsg is actually visible via the importance_task!
5303 			 * We need to cleanup this linkage before we destroy
5304 			 * the message, and more importantly before we set the
5305 			 * msgh_remote_port to NULL. See: 34302571
5306 			 */
5307 			ipc_importance_clean(kmsg);
5308 		}
5309 		ip_release(port);  /* JMM - Future: release right, not just ref */
5310 		ipc_kmsg_destroy(kmsg, IPC_KMSG_DESTROY_SKIP_REMOTE);
5311 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, MACH_SEND_INVALID_DEST);
5312 		return MACH_MSG_SUCCESS;
5313 	}
5314 
5315 	if (ip_in_space(port, ipc_space_kernel)) {
5316 		require_ip_active(port);
5317 		port->ip_messages.imq_seqno++;
5318 		ip_mq_unlock(port);
5319 
5320 		counter_inc(&current_task()->messages_sent);
5321 
5322 		/*
5323 		 * Call the server routine, and get the reply message to send.
5324 		 */
5325 		kmsg = ipc_kobject_server(port, kmsg, options);
5326 		if (kmsg == IKM_NULL) {
5327 			return MACH_MSG_SUCCESS;
5328 		}
5329 		/* reload hdr since kmsg changed */
5330 		hdr = ikm_header(kmsg);
5331 
5332 		ipc_kmsg_init_trailer_and_sign(kmsg, TASK_NULL);
5333 
5334 		/* restart the KMSG_INFO tracing for the reply message */
5335 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_START);
5336 		port = hdr->msgh_remote_port;
5337 		assert(IP_VALID(port));
5338 		ip_mq_lock(port);
5339 		/* fall thru with reply - same options */
5340 		kernel_reply = TRUE;
5341 		if (!ip_active(port)) {
5342 			error = MACH_SEND_INVALID_DEST;
5343 		}
5344 	}
5345 
5346 #if IMPORTANCE_INHERITANCE
5347 	/*
5348 	 * Need to see if this message needs importance donation and/or
5349 	 * propagation.  That routine can drop the port lock temporarily.
5350 	 * If it does we'll have to revalidate the destination.
5351 	 */
5352 	if (!did_importance) {
5353 		did_importance = true;
5354 		if (ipc_importance_send(kmsg, options)) {
5355 			goto retry;
5356 		}
5357 	}
5358 #endif /* IMPORTANCE_INHERITANCE */
5359 
5360 	if (error != MACH_MSG_SUCCESS) {
5361 		ip_mq_unlock(port);
5362 	} else {
5363 		/*
5364 		 * We have a valid message and a valid reference on the port.
5365 		 * call mqueue_send() on its message queue.
5366 		 */
5367 		ipc_special_reply_port_msg_sent(port);
5368 
5369 		error = ipc_mqueue_send_locked(&port->ip_messages, kmsg,
5370 		    options, send_timeout);
5371 		/* port unlocked */
5372 	}
5373 
5374 #if IMPORTANCE_INHERITANCE
5375 	if (did_importance) {
5376 		__unused int importance_cleared = 0;
5377 		switch (error) {
5378 		case MACH_SEND_TIMED_OUT:
5379 		case MACH_SEND_NO_BUFFER:
5380 		case MACH_SEND_INTERRUPTED:
5381 		case MACH_SEND_INVALID_DEST:
5382 			/*
5383 			 * We still have the kmsg and its
5384 			 * reference on the port.  But we
5385 			 * have to back out the importance
5386 			 * boost.
5387 			 *
5388 			 * The port could have changed hands,
5389 			 * be inflight to another destination,
5390 			 * etc...  But in those cases our
5391 			 * back-out will find the new owner
5392 			 * (and all the operations that
5393 			 * transferred the right should have
5394 			 * applied their own boost adjustments
5395 			 * to the old owner(s)).
5396 			 */
5397 			importance_cleared = 1;
5398 			ipc_importance_clean(kmsg);
5399 			break;
5400 
5401 		case MACH_MSG_SUCCESS:
5402 		default:
5403 			break;
5404 		}
5405 #if IMPORTANCE_TRACE
5406 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_END,
5407 		    task_pid(current_task()), sender_pid, imp_msgh_id, importance_cleared, 0);
5408 #endif /* IMPORTANCE_TRACE */
5409 	}
5410 #endif /* IMPORTANCE_INHERITANCE */
5411 
5412 	/*
5413 	 * If the port has been destroyed while we wait, treat the message
5414 	 * as a successful delivery (like we do for an inactive port).
5415 	 */
5416 	if (error == MACH_SEND_INVALID_DEST) {
5417 #if MACH_FLIPC
5418 		if (MACH_NODE_VALID(kmsg->ikm_node) && FPORT_VALID(port->ip_messages.imq_fport)) {
5419 			flipc_msg_ack(kmsg->ikm_node, &port->ip_messages, FALSE);
5420 		}
5421 #endif
5422 		ip_release(port); /* JMM - Future: release right, not just ref */
5423 		ipc_kmsg_destroy(kmsg, IPC_KMSG_DESTROY_SKIP_REMOTE);
5424 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, MACH_SEND_INVALID_DEST);
5425 		return MACH_MSG_SUCCESS;
5426 	}
5427 
5428 	if (error != MACH_MSG_SUCCESS && kernel_reply) {
5429 		/*
5430 		 * Kernel reply messages that fail can't be allowed to
5431 		 * pseudo-receive on error conditions. We need to just treat
5432 		 * the message as a successful delivery.
5433 		 */
5434 #if MACH_FLIPC
5435 		if (MACH_NODE_VALID(kmsg->ikm_node) && FPORT_VALID(port->ip_messages.imq_fport)) {
5436 			flipc_msg_ack(kmsg->ikm_node, &port->ip_messages, FALSE);
5437 		}
5438 #endif
5439 		ip_release(port); /* JMM - Future: release right, not just ref */
5440 		ipc_kmsg_destroy(kmsg, IPC_KMSG_DESTROY_SKIP_REMOTE);
5441 		KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, error);
5442 		return MACH_MSG_SUCCESS;
5443 	}
5444 	return error;
5445 }
5446 
5447 /*
5448  *	Routine:	ipc_kmsg_put_to_kernel
5449  *	Purpose:
5450  *		Copies a message buffer to a kernel message.
5451  *		Frees the message buffer.
5452  *		No errors allowed.
5453  *	Conditions:
5454  *		Nothing locked.
5455  */
5456 void
ipc_kmsg_put_to_kernel(mach_msg_header_t * msg,mach_msg_option64_t options,ipc_kmsg_t kmsg,mach_msg_size_t rcv_size)5457 ipc_kmsg_put_to_kernel(
5458 	mach_msg_header_t      *msg,
5459 	mach_msg_option64_t     options,
5460 	ipc_kmsg_t              kmsg,
5461 	mach_msg_size_t         rcv_size) /* includes trailer size */
5462 {
5463 	mach_msg_header_t *hdr = ikm_header(kmsg);
5464 	mach_msg_kbase_t  *src_base;
5465 	mach_msg_size_t    desc_count, kdata_sz;
5466 
5467 	assert(kmsg->ikm_aux_size == 0);
5468 	assert(rcv_size >= hdr->msgh_size);
5469 
5470 	if (hdr->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
5471 		src_base   = mach_msg_header_to_kbase(hdr);
5472 		desc_count = src_base->msgb_dsc_count;
5473 		kdata_sz   = ikm_kdata_size(desc_count, true);
5474 	} else {
5475 		desc_count = 0;
5476 		kdata_sz   = ikm_kdata_size(desc_count, false);
5477 	}
5478 
5479 	if (ikm_is_linear(kmsg)) {
5480 		memcpy(msg, hdr, rcv_size);
5481 	} else {
5482 		memcpy(msg, hdr, kdata_sz);
5483 		memcpy((char *)msg + kdata_sz,
5484 		    kmsg->ikm_udata, rcv_size - kdata_sz);
5485 	}
5486 
5487 	if (desc_count) {
5488 		mach_msg_kbase_t *dst_base = mach_msg_header_to_kbase(msg);
5489 
5490 		if (options & MACH64_POLICY_KERNEL_EXTENSION) {
5491 			ipc_kmsg_strip_descriptors(dst_base->msgb_dsc_array,
5492 			    src_base->msgb_dsc_array, desc_count);
5493 		} else {
5494 			ipc_kmsg_relocate_descriptors(dst_base->msgb_dsc_array,
5495 			    src_base->msgb_dsc_array, desc_count);
5496 		}
5497 	}
5498 
5499 	ipc_kmsg_free(kmsg);
5500 }
5501 
5502 /** @} */
5503 #pragma mark ipc_kmsg tracing
5504 
5505 #define KMSG_TRACE_FLAG_TRACED     0x000001
5506 #define KMSG_TRACE_FLAG_COMPLEX    0x000002
5507 #define KMSG_TRACE_FLAG_OOLMEM     0x000004
5508 #define KMSG_TRACE_FLAG_VCPY       0x000008
5509 #define KMSG_TRACE_FLAG_PCPY       0x000010
5510 #define KMSG_TRACE_FLAG_SND64      0x000020
5511 #define KMSG_TRACE_FLAG_RAISEIMP   0x000040
5512 #define KMSG_TRACE_FLAG_APP_SRC    0x000080
5513 #define KMSG_TRACE_FLAG_APP_DST    0x000100
5514 #define KMSG_TRACE_FLAG_DAEMON_SRC 0x000200
5515 #define KMSG_TRACE_FLAG_DAEMON_DST 0x000400
5516 #define KMSG_TRACE_FLAG_DST_NDFLTQ 0x000800
5517 #define KMSG_TRACE_FLAG_SRC_NDFLTQ 0x001000
5518 #define KMSG_TRACE_FLAG_DST_SONCE  0x002000
5519 #define KMSG_TRACE_FLAG_SRC_SONCE  0x004000
5520 #define KMSG_TRACE_FLAG_CHECKIN    0x008000
5521 #define KMSG_TRACE_FLAG_ONEWAY     0x010000
5522 #define KMSG_TRACE_FLAG_IOKIT      0x020000
5523 #define KMSG_TRACE_FLAG_SNDRCV     0x040000
5524 #define KMSG_TRACE_FLAG_DSTQFULL   0x080000
5525 #define KMSG_TRACE_FLAG_VOUCHER    0x100000
5526 #define KMSG_TRACE_FLAG_TIMER      0x200000
5527 #define KMSG_TRACE_FLAG_SEMA       0x400000
5528 #define KMSG_TRACE_FLAG_DTMPOWNER  0x800000
5529 #define KMSG_TRACE_FLAG_GUARDED_DESC 0x1000000
5530 
5531 #define KMSG_TRACE_FLAGS_MASK      0x1ffffff
5532 #define KMSG_TRACE_FLAGS_SHIFT     8
5533 
5534 #define KMSG_TRACE_ID_SHIFT        32
5535 
5536 #define KMSG_TRACE_PORTS_MASK      0xff
5537 #define KMSG_TRACE_PORTS_SHIFT     0
5538 
5539 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
5540 
5541 void
ipc_kmsg_trace_send(ipc_kmsg_t kmsg,mach_msg_option64_t option)5542 ipc_kmsg_trace_send(ipc_kmsg_t kmsg, mach_msg_option64_t option)
5543 {
5544 	task_t send_task = TASK_NULL;
5545 	ipc_port_t dst_port, src_port;
5546 	boolean_t is_task_64bit;
5547 	mach_msg_header_t *msg;
5548 	mach_msg_trailer_t *trailer;
5549 
5550 	int kotype = 0;
5551 	uint32_t msg_size = 0;
5552 	uint64_t msg_flags = KMSG_TRACE_FLAG_TRACED;
5553 	uint32_t num_ports = 0;
5554 	uint32_t send_pid, dst_pid;
5555 
5556 	/*
5557 	 * check to see not only if ktracing is enabled, but if we will
5558 	 * _actually_ emit the KMSG_INFO tracepoint. This saves us a
5559 	 * significant amount of processing (and a port lock hold) in
5560 	 * the non-tracing case.
5561 	 */
5562 	if (__probable((kdebug_enable & KDEBUG_TRACE) == 0)) {
5563 		return;
5564 	}
5565 	if (!kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO))) {
5566 		return;
5567 	}
5568 
5569 	msg = ikm_header(kmsg);
5570 
5571 	dst_port = msg->msgh_remote_port;
5572 	if (!IPC_PORT_VALID(dst_port)) {
5573 		return;
5574 	}
5575 
5576 	/*
5577 	 * Message properties / options
5578 	 */
5579 	if ((option & (MACH_SEND_MSG | MACH_RCV_MSG)) == (MACH_SEND_MSG | MACH_RCV_MSG)) {
5580 		msg_flags |= KMSG_TRACE_FLAG_SNDRCV;
5581 	}
5582 
5583 	if (msg->msgh_id >= is_iokit_subsystem.start &&
5584 	    msg->msgh_id < is_iokit_subsystem.end + 100) {
5585 		msg_flags |= KMSG_TRACE_FLAG_IOKIT;
5586 	}
5587 	/* magic XPC checkin message id (XPC_MESSAGE_ID_CHECKIN) from libxpc */
5588 	else if (msg->msgh_id == 0x77303074u /* w00t */) {
5589 		msg_flags |= KMSG_TRACE_FLAG_CHECKIN;
5590 	}
5591 
5592 	if (msg->msgh_bits & MACH_MSGH_BITS_RAISEIMP) {
5593 		msg_flags |= KMSG_TRACE_FLAG_RAISEIMP;
5594 	}
5595 
5596 	if (unsafe_convert_port_to_voucher(ipc_kmsg_get_voucher_port(kmsg))) {
5597 		msg_flags |= KMSG_TRACE_FLAG_VOUCHER;
5598 	}
5599 
5600 	/*
5601 	 * Sending task / port
5602 	 */
5603 	send_task = current_task();
5604 	send_pid = task_pid(send_task);
5605 
5606 	if (send_pid != 0) {
5607 		if (task_is_daemon(send_task)) {
5608 			msg_flags |= KMSG_TRACE_FLAG_DAEMON_SRC;
5609 		} else if (task_is_app(send_task)) {
5610 			msg_flags |= KMSG_TRACE_FLAG_APP_SRC;
5611 		}
5612 	}
5613 
5614 	is_task_64bit = (send_task->map->max_offset > VM_MAX_ADDRESS);
5615 	if (is_task_64bit) {
5616 		msg_flags |= KMSG_TRACE_FLAG_SND64;
5617 	}
5618 
5619 	src_port = msg->msgh_local_port;
5620 	if (src_port) {
5621 		if (src_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
5622 			msg_flags |= KMSG_TRACE_FLAG_SRC_NDFLTQ;
5623 		}
5624 		switch (MACH_MSGH_BITS_LOCAL(msg->msgh_bits)) {
5625 		case MACH_MSG_TYPE_MOVE_SEND_ONCE:
5626 			msg_flags |= KMSG_TRACE_FLAG_SRC_SONCE;
5627 			break;
5628 		default:
5629 			break;
5630 		}
5631 	} else {
5632 		msg_flags |= KMSG_TRACE_FLAG_ONEWAY;
5633 	}
5634 
5635 
5636 	/*
5637 	 * Destination task / port
5638 	 */
5639 	ip_mq_lock(dst_port);
5640 	if (!ip_active(dst_port)) {
5641 		/* dst port is being torn down */
5642 		dst_pid = (uint32_t)0xfffffff0;
5643 	} else if (dst_port->ip_tempowner) {
5644 		msg_flags |= KMSG_TRACE_FLAG_DTMPOWNER;
5645 		if (IIT_NULL != ip_get_imp_task(dst_port)) {
5646 			dst_pid = task_pid(dst_port->ip_imp_task->iit_task);
5647 		} else {
5648 			dst_pid = (uint32_t)0xfffffff1;
5649 		}
5650 	} else if (!ip_in_a_space(dst_port)) {
5651 		/* dst_port is otherwise in-transit */
5652 		dst_pid = (uint32_t)0xfffffff2;
5653 	} else {
5654 		if (ip_in_space(dst_port, ipc_space_kernel)) {
5655 			dst_pid = 0;
5656 		} else {
5657 			ipc_space_t dst_space;
5658 			dst_space = ip_get_receiver(dst_port);
5659 			if (dst_space && is_active(dst_space)) {
5660 				dst_pid = task_pid(dst_space->is_task);
5661 				if (task_is_daemon(dst_space->is_task)) {
5662 					msg_flags |= KMSG_TRACE_FLAG_DAEMON_DST;
5663 				} else if (task_is_app(dst_space->is_task)) {
5664 					msg_flags |= KMSG_TRACE_FLAG_APP_DST;
5665 				}
5666 			} else {
5667 				/* receiving task is being torn down */
5668 				dst_pid = (uint32_t)0xfffffff3;
5669 			}
5670 		}
5671 	}
5672 
5673 	if (dst_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
5674 		msg_flags |= KMSG_TRACE_FLAG_DST_NDFLTQ;
5675 	}
5676 	if (imq_full(&dst_port->ip_messages)) {
5677 		msg_flags |= KMSG_TRACE_FLAG_DSTQFULL;
5678 	}
5679 
5680 	kotype = ip_kotype(dst_port);
5681 
5682 	ip_mq_unlock(dst_port);
5683 
5684 	switch (kotype) {
5685 	case IKOT_SEMAPHORE:
5686 		msg_flags |= KMSG_TRACE_FLAG_SEMA;
5687 		break;
5688 	case IKOT_TIMER:
5689 	case IKOT_CLOCK:
5690 		msg_flags |= KMSG_TRACE_FLAG_TIMER;
5691 		break;
5692 	case IKOT_MAIN_DEVICE:
5693 	case IKOT_IOKIT_CONNECT:
5694 	case IKOT_IOKIT_OBJECT:
5695 	case IKOT_IOKIT_IDENT:
5696 	case IKOT_UEXT_OBJECT:
5697 		msg_flags |= KMSG_TRACE_FLAG_IOKIT;
5698 		break;
5699 	default:
5700 		break;
5701 	}
5702 
5703 	switch (MACH_MSGH_BITS_REMOTE(msg->msgh_bits)) {
5704 	case MACH_MSG_TYPE_PORT_SEND_ONCE:
5705 		msg_flags |= KMSG_TRACE_FLAG_DST_SONCE;
5706 		break;
5707 	default:
5708 		break;
5709 	}
5710 
5711 
5712 	/*
5713 	 * Message size / content
5714 	 */
5715 	msg_size = msg->msgh_size - sizeof(mach_msg_header_t);
5716 
5717 	if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
5718 		mach_msg_kbase_t *kbase = mach_msg_header_to_kbase(msg);
5719 		mach_msg_kdescriptor_t *kdesc;
5720 		mach_msg_descriptor_type_t dtype;
5721 
5722 		msg_flags |= KMSG_TRACE_FLAG_COMPLEX;
5723 
5724 		for (mach_msg_size_t i = 0; i < kbase->msgb_dsc_count; i++) {
5725 			kdesc = &kbase->msgb_dsc_array[i];
5726 			dtype = mach_msg_kdescriptor_type(kdesc);
5727 
5728 			switch (dtype) {
5729 			case MACH_MSG_PORT_DESCRIPTOR:
5730 				num_ports++;
5731 				break;
5732 			case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
5733 			case MACH_MSG_OOL_DESCRIPTOR: {
5734 				mach_msg_ool_descriptor_t *dsc = &kdesc->kdesc_memory;
5735 
5736 				msg_flags |= KMSG_TRACE_FLAG_OOLMEM;
5737 				msg_size += dsc->size;
5738 				if (dsc->size > msg_ool_size_small &&
5739 				    (dsc->copy == MACH_MSG_PHYSICAL_COPY) &&
5740 				    !dsc->deallocate) {
5741 					msg_flags |= KMSG_TRACE_FLAG_PCPY;
5742 				} else if (dsc->size <= msg_ool_size_small) {
5743 					msg_flags |= KMSG_TRACE_FLAG_PCPY;
5744 				} else {
5745 					msg_flags |= KMSG_TRACE_FLAG_VCPY;
5746 				}
5747 			} break;
5748 			case MACH_MSG_OOL_PORTS_DESCRIPTOR:
5749 				num_ports += kdesc->kdesc_port_array.count;
5750 				break;
5751 			case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
5752 				num_ports++;
5753 				msg_flags |= KMSG_TRACE_FLAG_GUARDED_DESC;
5754 				break;
5755 			default:
5756 				break;
5757 			}
5758 			msg_size -= ikm_user_desc_size(dtype, is_task_64bit);
5759 		}
5760 	}
5761 
5762 	/*
5763 	 * Trailer contents
5764 	 */
5765 	trailer = (mach_msg_trailer_t *)ipc_kmsg_get_trailer(kmsg);
5766 	if (trailer->msgh_trailer_size <= sizeof(mach_msg_security_trailer_t)) {
5767 		mach_msg_security_trailer_t *strailer;
5768 		strailer = (mach_msg_security_trailer_t *)trailer;
5769 		/*
5770 		 * verify the sender PID: replies from the kernel often look
5771 		 * like self-talk because the sending port is not reset.
5772 		 */
5773 		if (memcmp(&strailer->msgh_sender,
5774 		    &KERNEL_SECURITY_TOKEN,
5775 		    sizeof(KERNEL_SECURITY_TOKEN)) == 0) {
5776 			send_pid = 0;
5777 			msg_flags &= ~(KMSG_TRACE_FLAG_APP_SRC | KMSG_TRACE_FLAG_DAEMON_SRC);
5778 		}
5779 	}
5780 
5781 	KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END,
5782 	    (uintptr_t)send_pid,
5783 	    (uintptr_t)dst_pid,
5784 	    (uintptr_t)(((uint64_t)msg->msgh_id << KMSG_TRACE_ID_SHIFT) | msg_size),
5785 	    (uintptr_t)(
5786 		    ((msg_flags & KMSG_TRACE_FLAGS_MASK) << KMSG_TRACE_FLAGS_SHIFT) |
5787 		    ((num_ports & KMSG_TRACE_PORTS_MASK) << KMSG_TRACE_PORTS_SHIFT)
5788 		    )
5789 	    );
5790 }
5791 
5792 #endif
5793