1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: ipc/ipc_pset.c
60 * Author: Rich Draves
61 * Date: 1989
62 *
63 * Functions to manipulate IPC port sets.
64 */
65
66 #include <mach/port.h>
67 #include <mach/kern_return.h>
68 #include <mach/message.h>
69 #include <ipc/ipc_mqueue.h>
70 #include <ipc/ipc_object.h>
71 #include <ipc/ipc_policy.h>
72 #include <ipc/ipc_pset.h>
73 #include <ipc/ipc_right.h>
74 #include <ipc/ipc_space.h>
75 #include <ipc/ipc_port.h>
76 #include <ipc/ipc_kmsg.h>
77 #include <kern/policy_internal.h>
78
79 #include <kern/kern_types.h>
80
81 #include <vm/vm_map.h>
82 #include <libkern/section_keywords.h>
83 #include <pthread/priority_private.h>
84
85 /* processor_set stole ipc_pset_init */
86 static void
ipc_port_set_init(ipc_pset_t pset,mach_port_name_t name,int policy)87 ipc_port_set_init(ipc_pset_t pset, mach_port_name_t name, int policy)
88 {
89 waitq_init(&pset->ips_wqset, WQT_PORT_SET, policy | SYNC_POLICY_FIFO);
90 klist_init(&pset->ips_klist);
91 pset->ips_wqset.wqset_index = MACH_PORT_INDEX(name);
92 }
93
94 /*
95 * Routine: ipc_pset_alloc
96 * Purpose:
97 * Allocate a port set.
98 * Conditions:
99 * Nothing locked. If successful, the port set is returned
100 * locked. (The caller doesn't have a reference.)
101 * Returns:
102 * KERN_SUCCESS The port set is allocated.
103 * KERN_INVALID_TASK The space is dead.
104 * KERN_NO_SPACE No room for an entry in the space.
105 */
106
107 kern_return_t
ipc_pset_alloc(ipc_space_t space,mach_port_name_t * namep,ipc_pset_t * psetp)108 ipc_pset_alloc(
109 ipc_space_t space,
110 mach_port_name_t *namep,
111 ipc_pset_t *psetp)
112 {
113 ipc_pset_t pset;
114 mach_port_name_t name;
115 kern_return_t kr;
116
117 kr = ipc_object_alloc(space, IOT_PORT_SET,
118 MACH_PORT_TYPE_PORT_SET, 0,
119 &name, (ipc_object_t *) &pset);
120 if (kr != KERN_SUCCESS) {
121 return kr;
122 }
123 /* space is locked */
124
125 ipc_port_set_init(pset, name, SYNC_POLICY_INIT_LOCKED);
126 /* port set is locked */
127
128 is_write_unlock(space);
129
130 *namep = name;
131 *psetp = pset;
132 return KERN_SUCCESS;
133 }
134
135 /*
136 * Routine: ipc_pset_alloc_name
137 * Purpose:
138 * Allocate a port set, with a specific name.
139 * Conditions:
140 * Nothing locked. If successful, the port set is returned
141 * locked. (The caller doesn't have a reference.)
142 * Returns:
143 * KERN_SUCCESS The port set is allocated.
144 * KERN_INVALID_TASK The space is dead.
145 * KERN_NAME_EXISTS The name already denotes a right.
146 */
147
148 kern_return_t
ipc_pset_alloc_name(ipc_space_t space,mach_port_name_t name,ipc_pset_t * psetp)149 ipc_pset_alloc_name(
150 ipc_space_t space,
151 mach_port_name_t name,
152 ipc_pset_t *psetp)
153 {
154 return ipc_object_alloc_name(space, IOT_PORT_SET,
155 MACH_PORT_TYPE_PORT_SET, 0,
156 name, (ipc_object_t *)psetp, ^(ipc_object_t object){
157 ipc_port_set_init(ips_object_to_pset(object), name,
158 SYNC_POLICY_INIT_LOCKED);
159 });
160 }
161
162
163 /*
164 * Routine: ipc_pset_alloc_special
165 * Purpose:
166 * Allocate a port set in a special space.
167 * The new port set is returned with one ref.
168 * If unsuccessful, IPS_NULL is returned.
169 * Conditions:
170 * Nothing locked.
171 */
172 ipc_pset_t
ipc_pset_alloc_special(__assert_only ipc_space_t space)173 ipc_pset_alloc_special(
174 __assert_only ipc_space_t space)
175 {
176 ipc_pset_t pset;
177
178 assert(space != IS_NULL);
179 assert(!is_active(space));
180
181 pset = ips_object_to_pset(io_alloc(IOT_PORT_SET, Z_WAITOK | Z_ZERO));
182 if (pset == IPS_NULL) {
183 return IPS_NULL;
184 }
185
186 os_atomic_init(&pset->ips_object.io_bits, io_makebits(IOT_PORT_SET));
187 os_atomic_init(&pset->ips_object.io_references, 1);
188
189 ipc_port_set_init(pset, MACH_PORT_SPECIAL_DEFAULT, 0);
190
191 return pset;
192 }
193
194
195 /*
196 * Routine: ipc_pset_destroy
197 * Purpose:
198 * Destroys a port_set.
199 * Conditions:
200 * The port_set is locked and alive.
201 * The caller has a reference, which is consumed.
202 * Afterwards, the port_set is unlocked and dead.
203 */
204
205 void
ipc_pset_destroy(ipc_space_t space,ipc_pset_t pset)206 ipc_pset_destroy(
207 ipc_space_t space,
208 ipc_pset_t pset)
209 {
210 waitq_link_list_t free_l = { };
211
212 assert(ips_active(pset));
213
214 io_bits_andnot(ips_to_object(pset), IO_BITS_ACTIVE);
215
216 /*
217 * Set all waiters on the portset running to
218 * discover the change.
219 *
220 * Then under the same lock hold, deinit the waitq-set,
221 * which will remove all the member message queues,
222 * linkages and clean up preposts.
223 */
224 ipc_mqueue_changed(space, &pset->ips_wqset);
225 waitq_invalidate(&pset->ips_wqset);
226 waitq_set_unlink_all_locked(&pset->ips_wqset, &free_l);
227
228 ips_mq_unlock(pset);
229
230 ips_release(pset); /* consume the ref our caller gave us */
231
232 waitq_link_free_list(WQT_PORT_SET, &free_l);
233 }
234
235 /*
236 * Routine: ipc_pset_finalize
237 * Purpose:
238 * Called on last reference deallocate to
239 * free any remaining data associated with the pset.
240 * Conditions:
241 * Nothing locked.
242 */
243 void
ipc_pset_finalize(ipc_pset_t pset)244 ipc_pset_finalize(
245 ipc_pset_t pset)
246 {
247 waitq_deinit(&pset->ips_wqset);
248 }
249
250
251 #pragma mark - kevent support
252
253 /*
254 * Kqueue EVFILT_MACHPORT support
255 *
256 * - kn_ipc_{port,pset} points to the monitored ipc port or pset. If the knote
257 * is using a kqwl, it is eligible to participate in sync IPC overrides.
258 *
259 * For the first such sync IPC message in the port, we set up the port's
260 * turnstile to directly push on the kqwl's turnstile (which is in turn set up
261 * during filt_machportattach). If userspace responds to the message, the
262 * turnstile push is severed the point of reply. If userspace returns without
263 * responding to the message, we sever the turnstile push at the
264 * point of reenabling the knote to deliver the next message. This is why the
265 * knote needs to remember the port. For more details, see also
266 * filt_machport_turnstile_complete.
267 *
268 * If there are multiple other sync IPC messages in the port, messages 2 to n
269 * redirect their turnstile push to the kqwl through an intermediatry "knote"
270 * turnstile which in turn, pushes on the kqwl turnstile. This knote turnstile
271 * is stored in the kn_hook. See also filt_machport_turnstile_prepare_lazily.
272 *
273 * - (in/out) ext[0] holds a mach_vm_address_t to a userspace buffer
274 * that can be used to direct-deliver messages when
275 * MACH_RCV_MSG is set in kn_sfflags
276 *
277 * - (in/out) ext[1] holds a mach_msg_size_t representing the size
278 * of the userspace buffer held in ext[0].
279 *
280 * - (out) ext[2] is used to deliver qos information
281 * about the send queue to userspace.
282 *
283 * - (abused) ext[3] is used in kernel to hold a reference to the first port
284 * with a turnstile that participate to sync IPC override. For more details,
285 * see filt_machport_stash_port
286 *
287 * - kn_hook is optionally a "knote" turnstile. It is used as the inheritor
288 * of turnstiles for rights copied out as part of direct message delivery
289 * when they can participate to sync IPC override.
290 *
291 * It is used to atomically neuter the sync IPC override when the knote is
292 * re-enabled.
293 *
294 */
295
296 #include <sys/event.h>
297 #include <sys/errno.h>
298
299 static int
filt_pset_filter_result(ipc_pset_t pset)300 filt_pset_filter_result(ipc_pset_t pset)
301 {
302 ips_mq_lock_held(pset);
303
304 if (!waitq_is_valid(&pset->ips_wqset)) {
305 return 0;
306 }
307
308 return waitq_set_first_prepost(&pset->ips_wqset, WQS_PREPOST_PEEK) ?
309 FILTER_ACTIVE : 0;
310 }
311
312 static int
filt_port_filter_result(struct knote * kn,ipc_port_t port)313 filt_port_filter_result(struct knote *kn, ipc_port_t port)
314 {
315 struct kqueue *kqwl = knote_get_kq(kn);
316 ipc_kmsg_t first;
317 int result = 0;
318
319 ip_mq_lock_held(port);
320
321 if (kn->kn_sfflags & MACH_RCV_MSG) {
322 result = FILTER_RESET_EVENT_QOS;
323 }
324
325 if (!waitq_is_valid(&port->ip_waitq)) {
326 return result;
327 }
328
329 if (port->ip_kernel_iotier_override != kqueue_get_iotier_override(kqwl)) {
330 kqueue_set_iotier_override(kqwl, port->ip_kernel_iotier_override);
331 result |= FILTER_ADJUST_EVENT_IOTIER_BIT;
332 }
333
334 first = ipc_kmsg_queue_first(&port->ip_messages.imq_messages);
335 if (!first) {
336 return result;
337 }
338
339 result = FILTER_ACTIVE;
340 if (kn->kn_sfflags & MACH_RCV_MSG) {
341 result |= FILTER_ADJUST_EVENT_QOS(first->ikm_qos_override);
342 }
343
344 #if CONFIG_PREADOPT_TG
345 struct thread_group *tg = ipc_kmsg_get_thread_group(first);
346 if (tg) {
347 struct kqueue *kq = knote_get_kq(kn);
348 kqueue_set_preadopted_thread_group(kq, tg,
349 first->ikm_qos_override);
350 }
351 #endif
352
353 return result;
354 }
355
356 struct turnstile *
filt_ipc_kqueue_turnstile(struct knote * kn)357 filt_ipc_kqueue_turnstile(struct knote *kn)
358 {
359 assert(kn->kn_filter == EVFILT_MACHPORT || kn->kn_filter == EVFILT_WORKLOOP);
360 return kqueue_turnstile(knote_get_kq(kn));
361 }
362
363 bool
filt_machport_kqueue_has_turnstile(struct knote * kn)364 filt_machport_kqueue_has_turnstile(struct knote *kn)
365 {
366 assert(kn->kn_filter == EVFILT_MACHPORT);
367 return ((kn->kn_sfflags & MACH_RCV_MSG) || (kn->kn_sfflags & MACH_RCV_SYNC_PEEK))
368 && (kn->kn_flags & EV_DISPATCH);
369 }
370
371 /*
372 * Stashes a port that participate to sync IPC override on the knote until the
373 * knote is re-enabled.
374 *
375 * It returns:
376 * - the turnstile to use as an inheritor for the stashed port
377 * - the kind of stash that happened as PORT_SYNC_* value among:
378 * o not stashed (no sync IPC support)
379 * o stashed in the knote (in kn_ext[3])
380 * o to be hooked to the kn_hook knote
381 */
382 struct turnstile *
filt_machport_stash_port(struct knote * kn,ipc_port_t port,int * link)383 filt_machport_stash_port(struct knote *kn, ipc_port_t port, int *link)
384 {
385 struct turnstile *ts = TURNSTILE_NULL;
386
387 if (kn->kn_filter == EVFILT_WORKLOOP) {
388 assert(kn->kn_ipc_port == NULL);
389 kn->kn_ipc_port = port;
390 ip_reference(port);
391 if (link) {
392 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
393 }
394 ts = filt_ipc_kqueue_turnstile(kn);
395 } else if (!filt_machport_kqueue_has_turnstile(kn)) {
396 if (link) {
397 *link = PORT_SYNC_LINK_NO_LINKAGE;
398 }
399 } else if (kn->kn_ext[3] == 0) {
400 ip_reference(port);
401 kn->kn_ext[3] = (uintptr_t)port;
402 ts = filt_ipc_kqueue_turnstile(kn);
403 if (link) {
404 *link = PORT_SYNC_LINK_WORKLOOP_KNOTE;
405 }
406 } else {
407 ts = (struct turnstile *)knote_kn_hook_get_raw(kn);
408 if (link) {
409 *link = PORT_SYNC_LINK_WORKLOOP_STASH;
410 }
411 }
412
413 return ts;
414 }
415
416 /*
417 * Lazily prepare a turnstile so that filt_machport_stash_port()
418 * can be called with the mqueue lock held.
419 *
420 * It will allocate a turnstile in kn_hook if:
421 * - the knote supports sync IPC override,
422 * - we already stashed a port in kn_ext[3],
423 * - the object that will be copied out has a chance to ask to be stashed.
424 *
425 * It is setup so that its inheritor is the workloop turnstile that has been
426 * allocated when this knote was attached.
427 */
428 void
filt_machport_turnstile_prepare_lazily(struct knote * kn,mach_msg_type_name_t msgt_name,ipc_port_t port)429 filt_machport_turnstile_prepare_lazily(
430 struct knote *kn,
431 mach_msg_type_name_t msgt_name,
432 ipc_port_t port)
433 {
434 /* This is called from within filt_machportprocess */
435 assert((kn->kn_status & KN_SUPPRESSED) && (kn->kn_status & KN_LOCKED));
436
437 if (!filt_machport_kqueue_has_turnstile(kn)) {
438 return;
439 }
440
441 if (kn->kn_ext[3] == 0 || knote_kn_hook_get_raw(kn)) {
442 return;
443 }
444
445 struct turnstile *ts = filt_ipc_kqueue_turnstile(kn);
446 if ((msgt_name == MACH_MSG_TYPE_PORT_SEND_ONCE && port->ip_specialreply) ||
447 (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE)) {
448 struct turnstile *kn_ts = turnstile_alloc();
449 struct turnstile *ts_store;
450 kn_ts = turnstile_prepare((uintptr_t)kn, &ts_store, kn_ts, TURNSTILE_KNOTE);
451 knote_kn_hook_set_raw(kn, ts_store);
452
453 turnstile_update_inheritor(kn_ts, ts,
454 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE);
455 turnstile_cleanup();
456 }
457 }
458
459 static void
filt_machport_turnstile_complete_port(struct knote * kn,ipc_port_t port)460 filt_machport_turnstile_complete_port(struct knote *kn, ipc_port_t port)
461 {
462 struct turnstile *ts = TURNSTILE_NULL;
463
464 ip_mq_lock(port);
465 if (port->ip_specialreply) {
466 /*
467 * If the reply has been sent to the special reply port already,
468 * then the special reply port may already be reused to do something
469 * entirely different.
470 *
471 * However, the only reason for it to still point to this knote is
472 * that it's still waiting for a reply, so when this is the case,
473 * neuter the linkage.
474 */
475 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
476 port->ip_sync_inheritor_knote == kn) {
477 ipc_port_adjust_special_reply_port_locked(port, NULL,
478 (IPC_PORT_ADJUST_SR_NONE | IPC_PORT_ADJUST_SR_ENABLE_EVENT), FALSE);
479 /* port unlocked */
480 } else {
481 ip_mq_unlock(port);
482 }
483 } else {
484 /*
485 * For receive rights, if their IMQ_KNOTE() is still this
486 * knote, then sever the link.
487 */
488 if (port->ip_sync_link_state == PORT_SYNC_LINK_WORKLOOP_KNOTE &&
489 port->ip_messages.imq_inheritor_knote == kn) {
490 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
491 ts = port_send_turnstile(port);
492 }
493 if (ts) {
494 turnstile_reference(ts);
495 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
496 TURNSTILE_IMMEDIATE_UPDATE);
497 }
498 ip_mq_unlock(port);
499
500 if (ts) {
501 turnstile_update_inheritor_complete(ts,
502 TURNSTILE_INTERLOCK_NOT_HELD);
503 turnstile_deallocate(ts);
504 }
505 }
506
507 ip_release(port);
508 }
509
510 void
filt_wldetach_sync_ipc(struct knote * kn)511 filt_wldetach_sync_ipc(struct knote *kn)
512 {
513 ipc_port_t port = kn->kn_ipc_port;
514 filt_machport_turnstile_complete_port(kn, port);
515 kn->kn_ipc_port = IP_NULL;
516 }
517
518 /*
519 * Other half of filt_machport_turnstile_prepare_lazily()
520 *
521 * This is serialized by the knote state machine.
522 */
523 static void
filt_machport_turnstile_complete(struct knote * kn)524 filt_machport_turnstile_complete(struct knote *kn)
525 {
526 if (kn->kn_ext[3]) {
527 ipc_port_t port = (ipc_port_t)kn->kn_ext[3];
528 filt_machport_turnstile_complete_port(kn, port);
529 kn->kn_ext[3] = 0;
530 }
531
532 struct turnstile *ts = knote_kn_hook_get_raw(kn);
533 if (ts) {
534 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
535 TURNSTILE_IMMEDIATE_UPDATE);
536 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
537
538 struct turnstile *ts_store = ts;
539 turnstile_complete((uintptr_t)kn, (struct turnstile **)&ts_store, &ts, TURNSTILE_KNOTE);
540 knote_kn_hook_set_raw(kn, ts_store);
541
542 turnstile_cleanup();
543
544 assert(ts);
545 turnstile_deallocate(ts);
546 }
547 }
548
549 static void
filt_machport_link(struct klist * klist,struct knote * kn)550 filt_machport_link(struct klist *klist, struct knote *kn)
551 {
552 struct knote *hd = SLIST_FIRST(klist);
553
554 if (hd && filt_machport_kqueue_has_turnstile(kn)) {
555 SLIST_INSERT_AFTER(hd, kn, kn_selnext);
556 } else {
557 SLIST_INSERT_HEAD(klist, kn, kn_selnext);
558 }
559 }
560
561 static void
filt_machport_unlink(struct klist * klist,struct knote * kn)562 filt_machport_unlink(struct klist *klist, struct knote *kn)
563 {
564 struct knote **knprev;
565
566 KNOTE_DETACH(klist, kn);
567
568 /* make sure the first knote is a knote we can push on */
569 SLIST_FOREACH_PREVPTR(kn, knprev, klist, kn_selnext) {
570 if (filt_machport_kqueue_has_turnstile(kn)) {
571 *knprev = SLIST_NEXT(kn, kn_selnext);
572 SLIST_INSERT_HEAD(klist, kn, kn_selnext);
573 break;
574 }
575 }
576 }
577
578 int
filt_wlattach_sync_ipc(struct knote * kn)579 filt_wlattach_sync_ipc(struct knote *kn)
580 {
581 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
582 ipc_space_t space = current_space();
583 ipc_entry_bits_t bits;
584 ipc_object_t object;
585 ipc_port_t port = IP_NULL;
586 int error = 0;
587
588 if (ipc_right_lookup_read(space, name, &bits, &object) != KERN_SUCCESS) {
589 return ENOENT;
590 }
591 /* object is locked and active */
592
593 if (bits & MACH_PORT_TYPE_RECEIVE) {
594 port = ip_object_to_port(object);
595 if (port->ip_specialreply || ip_is_kobject(port)) {
596 error = ENOENT;
597 }
598 } else if (bits & MACH_PORT_TYPE_SEND_ONCE) {
599 port = ip_object_to_port(object);
600 if (!port->ip_specialreply) {
601 error = ENOENT;
602 }
603 } else {
604 error = ENOENT;
605 }
606 if (error) {
607 io_unlock(object);
608 return error;
609 }
610
611 if (port->ip_sync_link_state == PORT_SYNC_LINK_ANY) {
612 io_unlock(object);
613 /*
614 * We cannot start a sync IPC inheritance chain, only further one
615 * Note: this can also happen if the inheritance chain broke
616 * because the original requestor died.
617 */
618 return ENOENT;
619 }
620
621 if (port->ip_specialreply) {
622 ipc_port_adjust_special_reply_port_locked(port, kn,
623 IPC_PORT_ADJUST_SR_LINK_WORKLOOP, FALSE);
624 } else {
625 ipc_port_adjust_port_locked(port, kn, FALSE);
626 }
627
628 /* make sure the port was stashed */
629 assert(kn->kn_ipc_port == port);
630
631 /* port has been unlocked by ipc_port_adjust_* */
632
633 return 0;
634 }
635
636 static int
filt_psetattach(struct knote * kn,ipc_pset_t pset)637 filt_psetattach(struct knote *kn, ipc_pset_t pset)
638 {
639 int result = 0;
640
641 ips_reference(pset);
642 kn->kn_ipc_pset = pset;
643
644 filt_machport_link(&pset->ips_klist, kn);
645 result = filt_pset_filter_result(pset);
646 ips_mq_unlock(pset);
647
648 return result;
649 }
650
651 static int
filt_portattach(struct knote * kn,ipc_port_t port)652 filt_portattach(struct knote *kn, ipc_port_t port)
653 {
654 struct turnstile *send_turnstile = TURNSTILE_NULL;
655 int result = 0;
656
657 if (port->ip_specialreply) {
658 /*
659 * Registering for kevents on special reply ports
660 * isn't supported for two reasons:
661 *
662 * 1. it really makes very little sense for a port that
663 * is supposed to be used synchronously
664 *
665 * 2. their ports's ip_klist field will be used to
666 * store the receive turnstile, so we can't possibly
667 * attach them anyway.
668 */
669 ip_mq_unlock(port);
670 knote_set_error(kn, ENOTSUP);
671 return 0;
672 }
673
674 ip_reference(port);
675 kn->kn_ipc_port = port;
676 if (port->ip_sync_link_state != PORT_SYNC_LINK_ANY) {
677 /*
678 * We're attaching a port that used to have an IMQ_KNOTE,
679 * clobber this state, we'll fixup its turnstile inheritor below.
680 */
681 ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
682 }
683
684 filt_machport_link(&port->ip_klist, kn);
685 result = filt_port_filter_result(kn, port);
686
687 /*
688 * Update the port's turnstile inheritor
689 *
690 * Unlike filt_machportdetach(), we don't have to care about races for
691 * turnstile_workloop_pusher_info(): filt_machport_link() doesn't affect
692 * already pushing knotes, and if the current one becomes the new
693 * pusher, it'll only be visible when turnstile_workloop_pusher_info()
694 * returns.
695 */
696 send_turnstile = port_send_turnstile(port);
697 if (send_turnstile) {
698 turnstile_reference(send_turnstile);
699 ipc_port_send_update_inheritor(port, send_turnstile,
700 TURNSTILE_IMMEDIATE_UPDATE);
701
702 /*
703 * rdar://problem/48861190
704 *
705 * When a listener connection resumes a peer,
706 * updating the inheritor above has moved the push
707 * from the current thread to the workloop.
708 *
709 * However, we haven't told the workloop yet
710 * that it needs a thread request, and we risk
711 * to be preeempted as soon as we drop the space
712 * lock below.
713 *
714 * To avoid this disable preemption and let kevent
715 * reenable it after it takes the kqlock.
716 */
717 disable_preemption();
718 result |= FILTER_THREADREQ_NODEFEER;
719 }
720
721 ip_mq_unlock(port);
722
723 if (send_turnstile) {
724 turnstile_update_inheritor_complete(send_turnstile,
725 TURNSTILE_INTERLOCK_NOT_HELD);
726 turnstile_deallocate(send_turnstile);
727 }
728
729 return result;
730 }
731
732 static int
filt_machportattach(struct knote * kn,__unused struct kevent_qos_s * kev)733 filt_machportattach(struct knote *kn, __unused struct kevent_qos_s *kev)
734 {
735 mach_port_name_t name = (mach_port_name_t)kn->kn_id;
736 ipc_space_t space = current_space();
737 ipc_entry_bits_t bits;
738 ipc_object_t object;
739 kern_return_t kr;
740
741 kn->kn_flags &= ~EV_EOF;
742 kn->kn_ext[3] = 0;
743
744 if (filt_machport_kqueue_has_turnstile(kn)) {
745 /*
746 * If the filter is likely to support sync IPC override,
747 * and it happens to be attaching to a workloop,
748 * make sure the workloop has an allocated turnstile.
749 */
750 kqueue_alloc_turnstile(knote_get_kq(kn));
751 }
752
753 kr = ipc_right_lookup_read(space, name, &bits, &object);
754
755 if (kr != KERN_SUCCESS) {
756 knote_set_error(kn, ENOENT);
757 return 0;
758 }
759 /* object is locked and active */
760
761 if (bits & MACH_PORT_TYPE_PORT_SET) {
762 kn->kn_filtid = EVFILTID_MACH_PORT_SET;
763 return filt_psetattach(kn, ips_object_to_pset(object));
764 }
765
766 if (bits & MACH_PORT_TYPE_RECEIVE) {
767 kn->kn_filtid = EVFILTID_MACH_PORT;
768 return filt_portattach(kn, ip_object_to_port(object));
769 }
770
771 io_unlock(object);
772 knote_set_error(kn, ENOTSUP);
773 return 0;
774 }
775
776 static void
filt_psetdetach(struct knote * kn)777 filt_psetdetach(struct knote *kn)
778 {
779 ipc_pset_t pset = kn->kn_ipc_pset;
780
781 filt_machport_turnstile_complete(kn);
782
783 ips_mq_lock(pset);
784
785 if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) {
786 /*
787 * ipc_mqueue_changed() already unhooked this knote from the waitq,
788 */
789 } else {
790 filt_machport_unlink(&pset->ips_klist, kn);
791 }
792
793 kn->kn_ipc_pset = IPS_NULL;
794 ips_mq_unlock(pset);
795 ips_release(pset);
796 }
797
798 static void
filt_portdetach(struct knote * kn)799 filt_portdetach(struct knote *kn)
800 {
801 ipc_port_t port = kn->kn_ipc_port;
802 struct turnstile *send_turnstile = TURNSTILE_NULL;
803
804 filt_machport_turnstile_complete(kn);
805
806 ip_mq_lock(port);
807 if ((kn->kn_status & KN_VANISHED) || (kn->kn_flags & EV_EOF)) {
808 /*
809 * ipc_mqueue_changed() already unhooked this knote from the waitq,
810 */
811 } else {
812 /*
813 * When the knote being detached is the first one in the list,
814 * then unlinking the knote *and* updating the turnstile inheritor
815 * need to happen atomically with respect to the callers of
816 * turnstile_workloop_pusher_info().
817 *
818 * The caller of turnstile_workloop_pusher_info() will use the kq req
819 * lock (and hence the kqlock), so we just need to hold the kqlock too.
820 */
821 assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
822 if (kn == SLIST_FIRST(&port->ip_klist)) {
823 send_turnstile = port_send_turnstile(port);
824 }
825 filt_machport_unlink(&port->ip_klist, kn);
826 struct kqueue *kq = knote_get_kq(kn);
827 kqueue_set_iotier_override(kq, THROTTLE_LEVEL_END);
828 }
829
830 if (send_turnstile) {
831 turnstile_reference(send_turnstile);
832 ipc_port_send_update_inheritor(port, send_turnstile,
833 TURNSTILE_IMMEDIATE_UPDATE);
834 }
835
836 /* Clear the knote pointer once the knote has been removed from turnstile */
837 kn->kn_ipc_port = IP_NULL;
838 ip_mq_unlock(port);
839
840 if (send_turnstile) {
841 turnstile_update_inheritor_complete(send_turnstile,
842 TURNSTILE_INTERLOCK_NOT_HELD);
843 turnstile_deallocate(send_turnstile);
844 }
845
846 ip_release(port);
847 }
848
849 /*
850 * filt_{pset,port}event - deliver events into the mach port filter
851 *
852 * Mach port message arrival events are currently only posted via the
853 * kqueue filter routine for ports.
854 *
855 * If there is a message at the head of the queue,
856 * we indicate that the knote should go active. If
857 * the message is to be direct-received, we adjust the
858 * QoS of the knote according the requested and override
859 * QoS of that first message.
860 *
861 * When the knote is for a port-set, the hint is non 0
862 * and is the waitq which is posting.
863 */
864 static int
filt_psetevent(struct knote * kn __unused,long hint __assert_only)865 filt_psetevent(struct knote *kn __unused, long hint __assert_only)
866 {
867 /*
868 * When called for a port-set,
869 * the posting port waitq is locked.
870 *
871 * waitq_set_first_prepost()
872 * in filt_machport_filter_result()
873 * would try to lock it and be very sad.
874 *
875 * Just trust what we know to be true.
876 */
877 assert(hint != 0);
878 return FILTER_ACTIVE;
879 }
880
881 static int
filt_portevent(struct knote * kn,long hint __assert_only)882 filt_portevent(struct knote *kn, long hint __assert_only)
883 {
884 assert(hint == 0);
885 return filt_port_filter_result(kn, kn->kn_ipc_port);
886 }
887
888 void
ipc_pset_prepost(struct waitq_set * wqs,struct waitq * waitq)889 ipc_pset_prepost(struct waitq_set *wqs, struct waitq *waitq)
890 {
891 KNOTE(&ips_from_waitq(wqs)->ips_klist, (long)waitq);
892 }
893
894 static void
filt_machporttouch(struct knote * kn,struct kevent_qos_s * kev)895 filt_machporttouch(struct knote *kn, struct kevent_qos_s *kev)
896 {
897 /*
898 * Specificying MACH_RCV_MSG or MACH_RCV_SYNC_PEEK during attach results in
899 * allocation of a turnstile. Modifying the filter flags to include these
900 * flags later, without a turnstile being allocated, leads to
901 * inconsistencies.
902 */
903 if ((kn->kn_sfflags ^ kev->fflags) & (MACH_RCV_MSG | MACH_RCV_SYNC_PEEK)) {
904 kev->flags |= EV_ERROR;
905 kev->data = EINVAL;
906 return;
907 }
908
909 /* copy in new settings and save off new input fflags */
910 kn->kn_sfflags = kev->fflags;
911 kn->kn_ext[0] = kev->ext[0];
912 kn->kn_ext[1] = kev->ext[1];
913
914 if (kev->flags & EV_ENABLE) {
915 /*
916 * If the knote is being enabled, make sure there's no lingering
917 * IPC overrides from the previous message delivery.
918 */
919 filt_machport_turnstile_complete(kn);
920 }
921 }
922
923 static int
filt_psettouch(struct knote * kn,struct kevent_qos_s * kev)924 filt_psettouch(struct knote *kn, struct kevent_qos_s *kev)
925 {
926 ipc_pset_t pset = kn->kn_ipc_pset;
927 int result = 0;
928
929 filt_machporttouch(kn, kev);
930 if (kev->flags & EV_ERROR) {
931 return 0;
932 }
933
934 ips_mq_lock(pset);
935 result = filt_pset_filter_result(pset);
936 ips_mq_unlock(pset);
937
938 return result;
939 }
940
941 static int
filt_porttouch(struct knote * kn,struct kevent_qos_s * kev)942 filt_porttouch(struct knote *kn, struct kevent_qos_s *kev)
943 {
944 ipc_port_t port = kn->kn_ipc_port;
945 int result = 0;
946
947 filt_machporttouch(kn, kev);
948 if (kev->flags & EV_ERROR) {
949 return 0;
950 }
951
952 ip_mq_lock(port);
953 result = filt_port_filter_result(kn, port);
954 ip_mq_unlock(port);
955
956 return result;
957 }
958
959 static int
filt_machportprocess(struct knote * kn,struct kevent_qos_s * kev,ipc_object_t object,ipc_object_type_t otype)960 filt_machportprocess(
961 struct knote *kn,
962 struct kevent_qos_s *kev,
963 ipc_object_t object,
964 ipc_object_type_t otype)
965 {
966 thread_t self = current_thread();
967 kevent_ctx_t kectx = NULL;
968
969 wait_result_t wresult;
970 mach_msg_option64_t option64;
971 mach_vm_address_t msg_addr;
972 mach_msg_size_t max_msg_size;
973 mach_msg_recv_result_t msgr;
974
975 int result = FILTER_ACTIVE;
976
977 /* Capture current state */
978 knote_fill_kevent(kn, kev, MACH_PORT_NULL);
979
980 /* Clear port reference, use ext3 as size of msg aux data */
981 kev->ext[3] = 0;
982
983 /* If already deallocated/moved return one last EOF event */
984 if (kev->flags & EV_EOF) {
985 return FILTER_ACTIVE | FILTER_RESET_EVENT_QOS;
986 }
987
988 /*
989 * Only honor supported receive options. If no options are
990 * provided, just force a MACH_RCV_LARGE to detect the
991 * name of the port and sizeof the waiting message.
992 *
993 * Extend kn_sfflags to 64 bits.
994 *
995 * Add MACH_RCV_TIMEOUT to never wait (in case someone concurrently
996 * dequeued the message that made this knote active already).
997 */
998 option64 = kn->kn_sfflags & (MACH_RCV_MSG | MACH_RCV_LARGE |
999 MACH_RCV_LARGE_IDENTITY | MACH_RCV_TRAILER_MASK |
1000 MACH_RCV_VOUCHER | MACH_MSG_STRICT_REPLY);
1001 option64 = ipc_current_user_policy(current_task(), option64);
1002
1003 if (option64 & MACH_RCV_MSG) {
1004 msg_addr = (mach_vm_address_t) kn->kn_ext[0];
1005 max_msg_size = (mach_msg_size_t) kn->kn_ext[1];
1006
1007 /*
1008 * Copy out the incoming message as vector, and append aux data
1009 * immediately after the message proper (if any) and report its
1010 * size on ext3.
1011 *
1012 * Note: MACH64_RCV_LINEAR_VECTOR is how the receive machinery
1013 * knows this comes from kevent (see comment in
1014 * mach_msg_receive_too_large()).
1015 */
1016 option64 |= (MACH64_MSG_VECTOR | MACH64_RCV_LINEAR_VECTOR);
1017
1018 /*
1019 * If the kevent didn't specify a buffer and length, carve a buffer
1020 * from the filter processing data according to the flags.
1021 */
1022 if (max_msg_size == 0) {
1023 kectx = kevent_get_context(self);
1024 msg_addr = (mach_vm_address_t)kectx->kec_data_out;
1025 max_msg_size = (mach_msg_size_t)kectx->kec_data_resid;
1026 option64 |= (MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY);
1027 /* Receive vector linearly onto stack */
1028 if (kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) {
1029 option64 |= MACH64_RCV_STACK;
1030 }
1031 }
1032 } else {
1033 /* just detect the port name (if a set) and size of the first message */
1034 option64 = MACH_RCV_LARGE;
1035 msg_addr = 0;
1036 max_msg_size = 0;
1037 }
1038 option64 |= MACH_RCV_TIMEOUT; /* never wait */
1039
1040 /*
1041 * Set up to receive a message or the notification of a
1042 * too large message. But never allow this call to wait.
1043 * If the user provided aditional options, like trailer
1044 * options, pass those through here. But we don't support
1045 * scatter lists through this interface.
1046 *
1047 * Note: while in filt_machportprocess(),
1048 * the knote has a reference on `object` that we can borrow.
1049 */
1050
1051 /* Set up message proper receive params on thread */
1052 bzero(&self->ith_receive, sizeof(self->ith_receive));
1053 self->ith_recv_bufs = (mach_msg_recv_bufs_t){
1054 .recv_msg_addr = msg_addr,
1055 .recv_msg_size = max_msg_size,
1056 };
1057 self->ith_object = object;
1058 self->ith_option = option64;
1059 self->ith_knote = kn;
1060
1061 ipc_object_lock(object, otype);
1062
1063 wresult = ipc_mqueue_receive_on_thread_and_unlock(io_waitq(object),
1064 MACH_MSG_TIMEOUT_NONE, THREAD_INTERRUPTIBLE, self);
1065 /* port unlocked */
1066
1067 /* If we timed out, or the process is exiting, just zero. */
1068 if (wresult == THREAD_RESTART || self->ith_state == MACH_RCV_TIMED_OUT) {
1069 assert(self->turnstile != TURNSTILE_NULL);
1070 self->ith_knote = ITH_KNOTE_NULL;
1071 return 0;
1072 }
1073
1074 assert(wresult == THREAD_NOT_WAITING);
1075 assert(self->ith_state != MACH_RCV_IN_PROGRESS);
1076
1077 /*
1078 * If we weren't attempting to receive a message
1079 * directly, we need to return the port name in
1080 * the kevent structure.
1081 */
1082 if ((option64 & MACH_RCV_MSG) != MACH_RCV_MSG) {
1083 assert(self->ith_state == MACH_RCV_TOO_LARGE);
1084 assert(self->ith_kmsg == IKM_NULL);
1085 kev->data = self->ith_receiver_name;
1086 self->ith_knote = ITH_KNOTE_NULL;
1087 return result;
1088 }
1089
1090 #if CONFIG_PREADOPT_TG
1091 /* If we're the first EVFILT_MACHPORT knote that is being processed for this
1092 * kqwl, then make sure to preadopt the thread group from the kmsg we're
1093 * about to receive. This is to make sure that we fix up the preadoption
1094 * thread group correctly on the receive side for the first message.
1095 */
1096 struct kqueue *kq = knote_get_kq(kn);
1097
1098 if (self->ith_kmsg) {
1099 struct thread_group *tg = ipc_kmsg_get_thread_group(self->ith_kmsg);
1100
1101 kqueue_process_preadopt_thread_group(self, kq, tg);
1102 }
1103 #endif
1104 if (otype == IOT_PORT) {
1105 ipc_port_t port = ip_object_to_port(object);
1106 struct kqueue *kqwl = knote_get_kq(kn);
1107 if (port->ip_kernel_iotier_override != kqueue_get_iotier_override(kqwl)) {
1108 /*
1109 * Lock the port to make sure port->ip_kernel_iotier_override does
1110 * not change while updating the kqueue override, else kqueue could
1111 * have old iotier value.
1112 */
1113 ip_mq_lock(port);
1114 kqueue_set_iotier_override(kqwl, port->ip_kernel_iotier_override);
1115 ip_mq_unlock(port);
1116 result |= FILTER_ADJUST_EVENT_IOTIER_BIT;
1117 }
1118 }
1119
1120 /*
1121 * Attempt to receive the message directly, returning
1122 * the results in the fflags field.
1123 */
1124 io_reference(object);
1125 kev->fflags = mach_msg_receive_results(&msgr);
1126
1127 /* kmsg and object reference consumed */
1128
1129 /*
1130 * if the user asked for the identity of ports containing a
1131 * a too-large message, return it in the data field (as we
1132 * do for messages we didn't try to receive).
1133 */
1134 kev->ext[1] = msgr.msgr_msg_size + msgr.msgr_trailer_size;
1135 kev->ext[3] = msgr.msgr_aux_size; /* Only lower 32 bits of ext3 are used */
1136 if (kev->fflags == MACH_RCV_TOO_LARGE &&
1137 (option64 & MACH_RCV_LARGE_IDENTITY)) {
1138 kev->data = msgr.msgr_recv_name;
1139 } else {
1140 kev->data = MACH_PORT_NULL;
1141 }
1142
1143 /*
1144 * If we used a data buffer carved out from the filt_process data,
1145 * store the address used in the knote and adjust the residual and
1146 * other parameters for future use.
1147 */
1148 if (kectx && kev->fflags != MACH_RCV_TOO_LARGE) {
1149 mach_vm_size_t size = msgr.msgr_msg_size +
1150 msgr.msgr_trailer_size + msgr.msgr_aux_size;
1151
1152 assert(kectx->kec_data_resid >= size);
1153 kectx->kec_data_resid -= size;
1154 if ((kectx->kec_process_flags & KEVENT_FLAG_STACK_DATA) == 0) {
1155 kev->ext[0] = kectx->kec_data_out;
1156 kectx->kec_data_out += size;
1157 } else {
1158 assert(option64 & MACH64_RCV_STACK);
1159 kev->ext[0] = kectx->kec_data_out + kectx->kec_data_resid;
1160 }
1161 }
1162
1163 /*
1164 * Apply message-based QoS values to output kevent as prescribed.
1165 * The kev->ext[2] field gets (msg-qos << 32) | (override-qos).
1166 */
1167 if (kev->fflags == MACH_MSG_SUCCESS) {
1168 kev->ext[2] = ((uint64_t)msgr.msgr_priority << 32) |
1169 _pthread_priority_make_from_thread_qos(msgr.msgr_qos_ovrd, 0, 0);
1170 }
1171
1172 self->ith_knote = ITH_KNOTE_NULL;
1173 return result;
1174 }
1175
1176 static int
filt_psetprocess(struct knote * kn,struct kevent_qos_s * kev)1177 filt_psetprocess(struct knote *kn, struct kevent_qos_s *kev)
1178 {
1179 ipc_object_t io = ips_to_object(kn->kn_ipc_pset);
1180
1181 return filt_machportprocess(kn, kev, io, IOT_PORT_SET);
1182 }
1183
1184 static int
filt_portprocess(struct knote * kn,struct kevent_qos_s * kev)1185 filt_portprocess(struct knote *kn, struct kevent_qos_s *kev)
1186 {
1187 ipc_object_t io = ip_to_object(kn->kn_ipc_port);
1188
1189 return filt_machportprocess(kn, kev, io, IOT_PORT);
1190 }
1191
1192 static void
filt_machportsanitizedcopyout(struct knote * kn,struct kevent_qos_s * kev)1193 filt_machportsanitizedcopyout(struct knote *kn, struct kevent_qos_s *kev)
1194 {
1195 *kev = *(struct kevent_qos_s *)&kn->kn_kevent;
1196
1197 // We may have stashed the address to the port that is pushing on the sync
1198 // IPC so clear it out.
1199 kev->ext[3] = 0;
1200 }
1201
1202 const struct filterops machport_attach_filtops = {
1203 .f_adjusts_qos = true,
1204 .f_extended_codes = true,
1205 .f_attach = filt_machportattach,
1206 .f_sanitized_copyout = filt_machportsanitizedcopyout,
1207 };
1208
1209 const struct filterops mach_port_filtops = {
1210 .f_adjusts_qos = true,
1211 .f_extended_codes = true,
1212 .f_detach = filt_portdetach,
1213 .f_event = filt_portevent,
1214 .f_touch = filt_porttouch,
1215 .f_process = filt_portprocess,
1216 .f_sanitized_copyout = filt_machportsanitizedcopyout,
1217 };
1218
1219 const struct filterops mach_port_set_filtops = {
1220 .f_adjusts_qos = true,
1221 .f_extended_codes = true,
1222 .f_detach = filt_psetdetach,
1223 .f_event = filt_psetevent,
1224 .f_touch = filt_psettouch,
1225 .f_process = filt_psetprocess,
1226 .f_sanitized_copyout = filt_machportsanitizedcopyout,
1227 };
1228