xref: /xnu-12377.41.6/bsd/kern/uipc_domain.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 1998-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. All advertising materials mentioning features or use of this software
42  *    must display the following acknowledgement:
43  *	This product includes software developed by the University of
44  *	California, Berkeley and its contributors.
45  * 4. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)uipc_domain.c	8.3 (Berkeley) 2/14/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 #include <sys/protosw.h>
67 #include <sys/domain.h>
68 #include <sys/mcache.h>
69 #include <sys/mbuf.h>
70 #include <sys/time.h>
71 #include <sys/kernel.h>
72 #include <sys/systm.h>
73 #include <sys/proc_internal.h>
74 #include <sys/sysctl.h>
75 #include <sys/syslog.h>
76 #include <sys/queue.h>
77 
78 #include <kern/uipc_domain.h>
79 
80 #include <net/dlil.h>
81 #include <net/nwk_wq.h>
82 #include <net/sockaddr_utils.h>
83 
84 #include <mach/boolean.h>
85 #include <pexpert/pexpert.h>
86 
87 #include <net/sockaddr_utils.h>
88 
89 #if __has_ptrcheck
90 #include <machine/trap.h> /* Needed by bound-checks-soft when enabled. */
91 #endif /* __has_ptrcheck */
92 
93 /* Eventhandler context for protocol events */
94 struct eventhandler_lists_ctxt protoctl_evhdlr_ctxt;
95 
96 static void pr_init_old(struct protosw *, struct domain *);
97 static void init_proto(struct protosw *, struct domain *);
98 static void attach_proto(struct protosw *, struct domain *);
99 static void detach_proto(struct protosw *, struct domain *);
100 static void dom_init_old(struct domain *);
101 static void init_domain(struct domain *);
102 static void attach_domain(struct domain *);
103 static void detach_domain(struct domain *);
104 static struct protosw *pffindprotonotype_locked(int, int, int);
105 static struct domain *pffinddomain_locked(int);
106 
107 static boolean_t domain_timeout_run;    /* domain timer is scheduled to run */
108 static boolean_t domain_draining;
109 static void domain_sched_timeout(void);
110 static void domain_timeout(void *);
111 
112 static LCK_GRP_DECLARE(domain_proto_mtx_grp, "domain");
113 static LCK_ATTR_DECLARE(domain_proto_mtx_attr, 0, 0);
114 static LCK_MTX_DECLARE_ATTR(domain_proto_mtx,
115     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
116 static LCK_MTX_DECLARE_ATTR(domain_timeout_mtx,
117     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
118 
119 uint64_t _net_uptime;
120 uint64_t _net_uptime_ms;
121 uint64_t _net_uptime_us;
122 
123 #if (DEVELOPMENT || DEBUG)
124 
125 SYSCTL_DECL(_kern_ipc);
126 
127 static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
128 
129 SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
130     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
131     0, 0,
132     sysctl_do_drain_domains, "I", "force manual drain domains");
133 
134 #endif /* DEVELOPMENT || DEBUG */
135 
136 static void
pr_init_old(struct protosw * pp,struct domain * dp)137 pr_init_old(struct protosw *pp, struct domain *dp)
138 {
139 #pragma unused(dp)
140 	VERIFY(pp->pr_flags & PR_OLD);
141 	VERIFY(pp->pr_old != NULL);
142 
143 	if (pp->pr_old->pr_init != NULL) {
144 		pp->pr_old->pr_init();
145 	}
146 }
147 
148 static void
init_proto(struct protosw * pp,struct domain * dp)149 init_proto(struct protosw *pp, struct domain *dp)
150 {
151 	VERIFY(pp->pr_flags & PR_ATTACHED);
152 
153 	if (!(pp->pr_flags & PR_INITIALIZED)) {
154 		TAILQ_INIT(&pp->pr_filter_head);
155 		if (pp->pr_init != NULL) {
156 			pp->pr_init(pp, dp);
157 		}
158 		pp->pr_flags |= PR_INITIALIZED;
159 	}
160 }
161 
162 static void
attach_proto(struct protosw * pp,struct domain * dp)163 attach_proto(struct protosw *pp, struct domain *dp)
164 {
165 	domain_proto_mtx_lock_assert_held();
166 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
167 	VERIFY(pp->pr_domain == NULL);
168 	VERIFY(pp->pr_protosw == NULL);
169 
170 	TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry);
171 	pp->pr_flags |= PR_ATTACHED;
172 	pp->pr_domain = dp;
173 	pp->pr_protosw = pp;
174 
175 	/* do some cleaning up on user request callbacks */
176 	pru_sanitize(pp->pr_usrreqs);
177 }
178 
179 static void
detach_proto(struct protosw * pp,struct domain * dp)180 detach_proto(struct protosw *pp, struct domain *dp)
181 {
182 	domain_proto_mtx_lock_assert_held();
183 	VERIFY(pp->pr_flags & PR_ATTACHED);
184 	VERIFY(pp->pr_domain == dp);
185 	VERIFY(pp->pr_protosw == pp);
186 
187 	TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry);
188 	pp->pr_flags &= ~PR_ATTACHED;
189 	pp->pr_domain = NULL;
190 	pp->pr_protosw = NULL;
191 }
192 
193 static void
dom_init_old(struct domain * dp)194 dom_init_old(struct domain *dp)
195 {
196 	VERIFY(dp->dom_flags & DOM_OLD);
197 	VERIFY(dp->dom_old != NULL);
198 
199 	if (dp->dom_old->dom_init != NULL) {
200 		dp->dom_old->dom_init();
201 	}
202 }
203 
204 static void
init_domain(struct domain * dp)205 init_domain(struct domain *dp)
206 {
207 	VERIFY(dp->dom_flags & DOM_ATTACHED);
208 
209 	if (!(dp->dom_flags & DOM_INITIALIZED)) {
210 		lck_mtx_init(&dp->dom_mtx_s, &domain_proto_mtx_grp,
211 		    &domain_proto_mtx_attr);
212 		dp->dom_mtx = &dp->dom_mtx_s;
213 		TAILQ_INIT(&dp->dom_protosw);
214 		if (dp->dom_init != NULL) {
215 			dp->dom_init(dp);
216 		}
217 		dp->dom_flags |= DOM_INITIALIZED;
218 	}
219 
220 	/* Recompute for new protocol */
221 	if (max_linkhdr < 16) {        /* XXX - Sheesh; everything's ether? */
222 		max_linkhdr = 16;
223 	}
224 	max_linkhdr = (int)P2ROUNDUP(max_linkhdr, sizeof(uint32_t));
225 
226 	if (dp->dom_protohdrlen > max_protohdr) {
227 		max_protohdr = dp->dom_protohdrlen;
228 	}
229 	max_protohdr = (int)P2ROUNDUP(max_protohdr, sizeof(uint32_t));
230 
231 	max_hdr = max_linkhdr + max_protohdr;
232 	max_datalen = MHLEN - max_hdr;
233 }
234 
235 static void
attach_domain(struct domain * dp)236 attach_domain(struct domain *dp)
237 {
238 	domain_proto_mtx_lock_assert_held();
239 	VERIFY(!(dp->dom_flags & DOM_ATTACHED));
240 
241 	TAILQ_INSERT_TAIL(&domains, dp, dom_entry);
242 	dp->dom_flags |= DOM_ATTACHED;
243 }
244 
245 static void
detach_domain(struct domain * dp)246 detach_domain(struct domain *dp)
247 {
248 	domain_proto_mtx_lock_assert_held();
249 	VERIFY(dp->dom_flags & DOM_ATTACHED);
250 
251 	TAILQ_REMOVE(&domains, dp, dom_entry);
252 	dp->dom_flags &= ~DOM_ATTACHED;
253 
254 	if (dp->dom_flags & DOM_OLD) {
255 		struct domain_old *odp = dp->dom_old;
256 
257 		VERIFY(odp != NULL);
258 		odp->dom_next = NULL;
259 		odp->dom_mtx = NULL;
260 	}
261 }
262 
263 /*
264  * Exported (private) routine, indirection of net_add_domain.
265  */
266 void
net_add_domain_old(struct domain_old * odp)267 net_add_domain_old(struct domain_old *odp)
268 {
269 	struct domain *dp;
270 	domain_guard_t guard __single;
271 
272 	VERIFY(odp != NULL);
273 
274 	guard = domain_guard_deploy();
275 	if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) {
276 		/*
277 		 * There is really nothing better than to panic here,
278 		 * as the caller would not have been able to handle
279 		 * any failures otherwise.
280 		 */
281 		panic("%s: domain (%d,%s) already exists for %s", __func__,
282 		    dp->dom_family, dp->dom_name, odp->dom_name);
283 		/* NOTREACHED */
284 	}
285 
286 	/* Make sure nothing is currently pointing to the odp. */
287 	TAILQ_FOREACH(dp, &domains, dom_entry) {
288 		if (dp->dom_old == odp) {
289 			panic("%s: domain %p (%d,%s) is already "
290 			    "associated with %p (%d,%s)\n", __func__,
291 			    odp, odp->dom_family, odp->dom_name, dp,
292 			    dp->dom_family, dp->dom_name);
293 			/* NOTREACHED */
294 		}
295 	}
296 
297 	if (odp->dom_protosw != NULL) {
298 		panic("%s: domain (%d,%s) protocols need to added "
299 		    "via net_add_proto\n", __func__, odp->dom_family,
300 		    odp->dom_name);
301 		/* NOTREACHED */
302 	}
303 
304 	dp = kalloc_type(struct domain, Z_WAITOK | Z_ZERO | Z_NOFAIL);
305 
306 	/* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */
307 	dp->dom_family          = odp->dom_family;
308 	dp->dom_flags           = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD;
309 	dp->dom_name            = odp->dom_name;
310 	dp->dom_init            = dom_init_old;
311 	dp->dom_externalize     = odp->dom_externalize;
312 	dp->dom_dispose         = odp->dom_dispose;
313 	dp->dom_rtattach        = odp->dom_rtattach;
314 	dp->dom_rtoffset        = odp->dom_rtoffset;
315 	dp->dom_maxrtkey        = odp->dom_maxrtkey;
316 	dp->dom_protohdrlen     = odp->dom_protohdrlen;
317 	dp->dom_old             = odp;
318 
319 	attach_domain(dp);
320 	init_domain(dp);
321 
322 	/* Point the mutex back to the internal structure's */
323 	odp->dom_mtx            = dp->dom_mtx;
324 	domain_guard_release(guard);
325 }
326 
327 /*
328  * Exported (private) routine, indirection of net_del_domain.
329  */
330 int
net_del_domain_old(struct domain_old * odp)331 net_del_domain_old(struct domain_old *odp)
332 {
333 	struct domain *dp1 __single, *dp2 __single;
334 	int error = 0;
335 	domain_guard_t guard __single;
336 
337 	VERIFY(odp != NULL);
338 
339 	guard = domain_guard_deploy();
340 	if (odp->dom_refs != 0) {
341 		error = EBUSY;
342 		goto done;
343 	}
344 
345 	TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) {
346 		if (!(dp1->dom_flags & DOM_OLD)) {
347 			continue;
348 		}
349 		VERIFY(dp1->dom_old != NULL);
350 		if (odp == dp1->dom_old) {
351 			break;
352 		}
353 	}
354 	if (dp1 != NULL) {
355 		struct protosw *pp1 __single, *pp2 __single;
356 
357 		VERIFY(dp1->dom_flags & DOM_OLD);
358 		VERIFY(dp1->dom_old == odp);
359 
360 		/* Remove all protocols attached to this domain */
361 		TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
362 			detach_proto(pp1, dp1);
363 			if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) {
364 				kfree_type(struct pr_usrreqs, pp1->pr_usrreqs);
365 			}
366 			if (pp1->pr_flags & PR_OLD) {
367 				kfree_type(struct protosw, pp1);
368 			}
369 		}
370 
371 		detach_domain(dp1);
372 		kfree_type(struct domain, dp1);
373 	} else {
374 		error = EPFNOSUPPORT;
375 	}
376 done:
377 	domain_guard_release(guard);
378 	return error;
379 }
380 
381 /*
382  * Internal routine, not exported.
383  *
384  * net_add_proto - link a protosw into a domain's protosw chain
385  *
386  * NOTE: Caller must have acquired domain_proto_mtx
387  */
388 int
net_add_proto(struct protosw * pp,struct domain * dp,int doinit)389 net_add_proto(struct protosw *pp, struct domain *dp, int doinit)
390 {
391 	struct protosw *pp1;
392 
393 	/*
394 	 * This could be called as part of initializing the domain,
395 	 * and thus DOM_INITIALIZED may not be set (yet).
396 	 */
397 	domain_proto_mtx_lock_assert_held();
398 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
399 
400 	/* pr_domain is set only after the protocol is attached */
401 	if (pp->pr_domain != NULL) {
402 		panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!",
403 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
404 		/* NOTREACHED */
405 	}
406 
407 	if (pp->pr_usrreqs == NULL) {
408 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
409 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
410 		/* NOTREACHED */
411 	}
412 
413 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
414 		if (pp1->pr_type == pp->pr_type &&
415 		    pp1->pr_protocol == pp->pr_protocol) {
416 			return EEXIST;
417 		}
418 	}
419 
420 	attach_proto(pp, dp);
421 	if (doinit) {
422 		net_init_proto(pp, dp);
423 	}
424 
425 	return 0;
426 }
427 
428 void
net_init_proto(struct protosw * pp,struct domain * dp)429 net_init_proto(struct protosw *pp, struct domain *dp)
430 {
431 	/*
432 	 * This could be called as part of initializing the domain,
433 	 * and thus DOM_INITIALIZED may not be set (yet).  The protocol
434 	 * must have been attached via net_addr_protosw() by now.
435 	 */
436 	domain_proto_mtx_lock_assert_held();
437 	VERIFY(pp->pr_flags & PR_ATTACHED);
438 
439 	init_proto(pp, dp);
440 }
441 
442 /*
443  * Exported (private) routine, indirection of net_add_proto.
444  */
445 int
net_add_proto_old(struct protosw_old * opp,struct domain_old * odp)446 net_add_proto_old(struct protosw_old *opp, struct domain_old *odp)
447 {
448 	struct pr_usrreqs_old *opru;
449 	struct pr_usrreqs *pru __single = NULL;
450 	struct protosw *pp __single = NULL, *pp1;
451 	int error = 0;
452 	struct domain *dp;
453 	domain_guard_t guard __single;
454 
455 	/*
456 	 * This could be called as part of initializing the domain,
457 	 * and thus DOM_INITIALIZED may not be set (yet).
458 	 */
459 	guard = domain_guard_deploy();
460 
461 	/* Make sure the domain has been added via net_add_domain */
462 	TAILQ_FOREACH(dp, &domains, dom_entry) {
463 		if (!(dp->dom_flags & DOM_OLD)) {
464 			continue;
465 		}
466 		if (dp->dom_old == odp) {
467 			break;
468 		}
469 	}
470 	if (dp == NULL) {
471 		error = EINVAL;
472 		goto done;
473 	}
474 
475 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
476 		if (pp1->pr_type == opp->pr_type &&
477 		    pp1->pr_protocol == opp->pr_protocol) {
478 			error = EEXIST;
479 			goto done;
480 		}
481 	}
482 
483 	if ((opru = opp->pr_usrreqs) == NULL) {
484 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
485 		    __func__, odp->dom_family, odp->dom_name, opp->pr_protocol);
486 		/* NOTREACHED */
487 	}
488 
489 	pru = kalloc_type(struct pr_usrreqs, Z_WAITOK | Z_ZERO | Z_NOFAIL);
490 
491 	pru->pru_flags          = PRUF_OLD;
492 	pru->pru_abort          = opru->pru_abort;
493 	pru->pru_accept         = opru->pru_accept;
494 	pru->pru_attach         = opru->pru_attach;
495 	pru->pru_bind           = opru->pru_bind;
496 	pru->pru_connect        = opru->pru_connect;
497 	pru->pru_connect2       = opru->pru_connect2;
498 	pru->pru_control        = opru->pru_control;
499 	pru->pru_detach         = opru->pru_detach;
500 	pru->pru_disconnect     = opru->pru_disconnect;
501 	pru->pru_listen         = opru->pru_listen;
502 	pru->pru_peeraddr       = opru->pru_peeraddr;
503 	pru->pru_rcvd           = opru->pru_rcvd;
504 	pru->pru_rcvoob         = opru->pru_rcvoob;
505 	pru->pru_send           = opru->pru_send;
506 	pru->pru_sense          = opru->pru_sense;
507 	pru->pru_shutdown       = opru->pru_shutdown;
508 	pru->pru_sockaddr       = opru->pru_sockaddr;
509 	pru->pru_sosend         = opru->pru_sosend;
510 	pru->pru_soreceive      = opru->pru_soreceive;
511 	pru->pru_sopoll         = opru->pru_sopoll;
512 
513 	pp = kalloc_type(struct protosw, Z_WAITOK | Z_ZERO | Z_NOFAIL);
514 
515 	/*
516 	 * Protocol fast and slow timers are now deprecated.
517 	 */
518 	if (opp->pr_unused != NULL) {
519 		printf("%s: domain (%d,%s), proto %d: pr_fasttimo is "
520 		    "deprecated and won't be called\n", __func__,
521 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
522 	}
523 	if (opp->pr_unused2 != NULL) {
524 		printf("%s: domain (%d,%s), proto %d: pr_slowtimo is "
525 		    "deprecated and won't be called\n", __func__,
526 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
527 	}
528 
529 	/* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */
530 	pp->pr_type             = opp->pr_type;
531 	pp->pr_protocol         = opp->pr_protocol;
532 	pp->pr_flags            = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD;
533 	pp->pr_input            = opp->pr_input;
534 	pp->pr_output           = opp->pr_output;
535 	pp->pr_ctlinput         = opp->pr_ctlinput;
536 	pp->pr_ctloutput        = opp->pr_ctloutput;
537 	pp->pr_usrreqs          = pru;
538 	pp->pr_init             = pr_init_old;
539 	pp->pr_drain            = opp->pr_drain;
540 	pp->pr_lock             = opp->pr_lock;
541 	pp->pr_unlock           = opp->pr_unlock;
542 	pp->pr_getlock          = opp->pr_getlock;
543 	pp->pr_old              = opp;
544 
545 	/* attach as well as initialize */
546 	attach_proto(pp, dp);
547 	net_init_proto(pp, dp);
548 done:
549 	if (error != 0) {
550 		printf("%s: domain (%d,%s), proto %d: failed to attach, "
551 		    "error %d\n", __func__, odp->dom_family,
552 		    odp->dom_name, opp->pr_protocol, error);
553 
554 		kfree_type(struct pr_usrreqs, pru);
555 		kfree_type(struct protosw, pp);
556 	}
557 
558 	domain_guard_release(guard);
559 	return error;
560 }
561 
562 /*
563  * Internal routine, not exported.
564  *
565  * net_del_proto - remove a protosw from a domain's protosw chain.
566  * Search the protosw chain for the element with matching data.
567  * Then unlink and return.
568  *
569  * NOTE: Caller must have acquired domain_proto_mtx
570  */
571 int
net_del_proto(int type,int protocol,struct domain * dp)572 net_del_proto(int type, int protocol, struct domain *dp)
573 {
574 	struct protosw *pp __single;
575 
576 	/*
577 	 * This could be called as part of initializing the domain,
578 	 * and thus DOM_INITIALIZED may not be set (yet).
579 	 */
580 	domain_proto_mtx_lock_assert_held();
581 
582 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
583 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
584 			break;
585 		}
586 	}
587 	if (pp == NULL) {
588 		return ENXIO;
589 	}
590 
591 	detach_proto(pp, dp);
592 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
593 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
594 	}
595 	if (pp->pr_flags & PR_OLD) {
596 		kfree_type(struct protosw, pp);
597 	}
598 
599 	return 0;
600 }
601 
602 /*
603  * Exported (private) routine, indirection of net_del_proto.
604  */
605 int
net_del_proto_old(int type,int protocol,struct domain_old * odp)606 net_del_proto_old(int type, int protocol, struct domain_old *odp)
607 {
608 	int error = 0;
609 	struct protosw *pp __single;
610 	struct domain *dp;
611 	domain_guard_t guard __single;
612 
613 	/*
614 	 * This could be called as part of initializing the domain,
615 	 * and thus DOM_INITIALIZED may not be set (yet).
616 	 */
617 	guard = domain_guard_deploy();
618 
619 	/* Make sure the domain has been added via net_add_domain */
620 	TAILQ_FOREACH(dp, &domains, dom_entry) {
621 		if (!(dp->dom_flags & DOM_OLD)) {
622 			continue;
623 		}
624 		if (dp->dom_old == odp) {
625 			break;
626 		}
627 	}
628 	if (dp == NULL) {
629 		error = ENXIO;
630 		goto done;
631 	}
632 
633 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
634 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
635 			break;
636 		}
637 	}
638 	if (pp == NULL) {
639 		error = ENXIO;
640 		goto done;
641 	}
642 	detach_proto(pp, dp);
643 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
644 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
645 	}
646 	if (pp->pr_flags & PR_OLD) {
647 		kfree_type(struct protosw, pp);
648 	}
649 
650 done:
651 	domain_guard_release(guard);
652 	return error;
653 }
654 
655 static void
domain_sched_timeout(void)656 domain_sched_timeout(void)
657 {
658 	LCK_MTX_ASSERT(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED);
659 
660 	if (!domain_timeout_run && domain_draining) {
661 		domain_timeout_run = TRUE;
662 		timeout(domain_timeout, NULL, hz);
663 	}
664 }
665 
666 void
net_drain_domains(void)667 net_drain_domains(void)
668 {
669 	lck_mtx_lock(&domain_timeout_mtx);
670 	domain_draining = TRUE;
671 	domain_sched_timeout();
672 	lck_mtx_unlock(&domain_timeout_mtx);
673 }
674 
675 extern struct domain inet6domain_s;
676 #if IPSEC
677 extern struct domain keydomain_s;
678 #endif
679 
680 extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s;
681 extern struct domain systemdomain_s, localdomain_s;
682 extern struct domain vsockdomain_s;
683 
684 #if MULTIPATH
685 extern struct domain mpdomain_s;
686 #endif /* MULTIPATH */
687 
688 static void
domain_timeout(void * arg)689 domain_timeout(void *arg)
690 {
691 #pragma unused(arg)
692 	struct protosw *pp;
693 	struct domain *dp;
694 	domain_guard_t guard __single;
695 
696 	lck_mtx_lock(&domain_timeout_mtx);
697 	if (domain_draining) {
698 		domain_draining = FALSE;
699 		lck_mtx_unlock(&domain_timeout_mtx);
700 
701 		guard = domain_guard_deploy();
702 		TAILQ_FOREACH(dp, &domains, dom_entry) {
703 			TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
704 				if (pp->pr_drain != NULL) {
705 					(*pp->pr_drain)();
706 				}
707 			}
708 		}
709 		domain_guard_release(guard);
710 
711 		lck_mtx_lock(&domain_timeout_mtx);
712 	}
713 
714 	/* re-arm the timer if there's work to do */
715 	domain_timeout_run = FALSE;
716 	domain_sched_timeout();
717 	lck_mtx_unlock(&domain_timeout_mtx);
718 }
719 
720 void
domaininit(void)721 domaininit(void)
722 {
723 	struct domain *dp;
724 	domain_guard_t guard __single;
725 
726 	eventhandler_lists_ctxt_init(&protoctl_evhdlr_ctxt);
727 
728 	guard = domain_guard_deploy();
729 	/*
730 	 * Add all the static domains to the domains list.  route domain
731 	 * gets added and initialized last, since we need it to attach
732 	 * rt_tables[] to everything that's already there.  This also
733 	 * means that domains added after this point won't get their
734 	 * dom_rtattach() called on rt_tables[].
735 	 */
736 	attach_domain(&inetdomain_s);
737 	attach_domain(&inet6domain_s);
738 #if MULTIPATH
739 	attach_domain(&mpdomain_s);
740 #endif /* MULTIPATH */
741 	attach_domain(&systemdomain_s);
742 	attach_domain(&localdomain_s);
743 #if IPSEC
744 	attach_domain(&keydomain_s);
745 #endif /* IPSEC */
746 	attach_domain(&ndrvdomain_s);
747 	attach_domain(&vsockdomain_s);
748 	attach_domain(&routedomain_s);  /* must be last domain */
749 
750 	/*
751 	 * Now ask them all to init (XXX including the routing domain,
752 	 * see above)
753 	 */
754 	TAILQ_FOREACH(dp, &domains, dom_entry)
755 	init_domain(dp);
756 
757 	domain_guard_release(guard);
758 }
759 
760 static __inline__ struct domain *
pffinddomain_locked(int pf)761 pffinddomain_locked(int pf)
762 {
763 	struct domain *dp;
764 
765 	domain_proto_mtx_lock_assert_held();
766 
767 	TAILQ_FOREACH(dp, &domains, dom_entry) {
768 		if (dp->dom_family == pf) {
769 			break;
770 		}
771 	}
772 	return dp;
773 }
774 
775 struct protosw *
pffindtype(int family,int type)776 pffindtype(int family, int type)
777 {
778 	struct protosw *pp = NULL;
779 	struct domain *dp;
780 	domain_guard_t guard __single;
781 
782 	guard = domain_guard_deploy();
783 	if ((dp = pffinddomain_locked(family)) == NULL) {
784 		goto done;
785 	}
786 
787 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
788 		if (pp->pr_type != 0 && pp->pr_type == type) {
789 			goto done;
790 		}
791 	}
792 done:
793 	domain_guard_release(guard);
794 	return pp;
795 }
796 
797 /*
798  * Internal routine, not exported.
799  */
800 struct domain *
pffinddomain(int pf)801 pffinddomain(int pf)
802 {
803 	struct domain *dp;
804 	domain_guard_t guard __single;
805 
806 	guard = domain_guard_deploy();
807 	dp = pffinddomain_locked(pf);
808 	domain_guard_release(guard);
809 	return dp;
810 }
811 
812 /*
813  * Exported (private) routine, indirection of pffinddomain.
814  */
815 struct domain_old *
pffinddomain_old(int pf)816 pffinddomain_old(int pf)
817 {
818 	struct domain_old *odp = NULL;
819 	struct domain *dp;
820 	domain_guard_t guard __single;
821 
822 	guard = domain_guard_deploy();
823 	if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD)) {
824 		odp = dp->dom_old;
825 	}
826 	domain_guard_release(guard);
827 	return odp;
828 }
829 
830 /*
831  * Internal routine, not exported.
832  */
833 struct protosw *
pffindproto(int family,int protocol,int type)834 pffindproto(int family, int protocol, int type)
835 {
836 	struct protosw *pp;
837 	domain_guard_t guard __single;
838 
839 	guard = domain_guard_deploy();
840 	pp = pffindproto_locked(family, protocol, type);
841 	domain_guard_release(guard);
842 	return pp;
843 }
844 
845 struct protosw *
pffindproto_locked(int family,int protocol,int type)846 pffindproto_locked(int family, int protocol, int type)
847 {
848 	struct protosw *maybe = NULL;
849 	struct protosw *pp;
850 	struct domain *dp;
851 
852 	domain_proto_mtx_lock_assert_held();
853 
854 	if (family == 0) {
855 		return 0;
856 	}
857 
858 	dp = pffinddomain_locked(family);
859 	if (dp == NULL) {
860 		return NULL;
861 	}
862 
863 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
864 		if ((pp->pr_protocol == protocol) && (pp->pr_type == type)) {
865 			return pp;
866 		}
867 
868 		if (type == SOCK_RAW && pp->pr_type == SOCK_RAW &&
869 		    pp->pr_protocol == 0 && maybe == NULL) {
870 			maybe = pp;
871 		}
872 	}
873 	return maybe;
874 }
875 
876 /*
877  * Exported (private) routine, indirection of pffindproto.
878  */
879 struct protosw_old *
pffindproto_old(int family,int protocol,int type)880 pffindproto_old(int family, int protocol, int type)
881 {
882 	struct protosw_old *opr = NULL;
883 	struct protosw *pp;
884 	domain_guard_t guard __single;
885 
886 	guard = domain_guard_deploy();
887 	if ((pp = pffindproto_locked(family, protocol, type)) != NULL &&
888 	    (pp->pr_flags & PR_OLD)) {
889 		opr = pp->pr_old;
890 	}
891 	domain_guard_release(guard);
892 	return opr;
893 }
894 
895 static struct protosw *
pffindprotonotype_locked(int family,int protocol,int type)896 pffindprotonotype_locked(int family, int protocol, int type)
897 {
898 #pragma unused(type)
899 	struct domain *dp;
900 	struct protosw *pp;
901 
902 	domain_proto_mtx_lock_assert_held();
903 
904 	if (family == 0) {
905 		return 0;
906 	}
907 
908 	dp = pffinddomain_locked(family);
909 	if (dp == NULL) {
910 		return NULL;
911 	}
912 
913 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
914 		if (pp->pr_protocol == protocol) {
915 			return pp;
916 		}
917 	}
918 	return NULL;
919 }
920 
921 struct protosw *
pffindprotonotype(int family,int protocol)922 pffindprotonotype(int family, int protocol)
923 {
924 	struct protosw *pp;
925 	domain_guard_t guard __single;
926 
927 	if (protocol == 0) {
928 		return NULL;
929 	}
930 
931 	guard = domain_guard_deploy();
932 	pp = pffindprotonotype_locked(family, protocol, 0);
933 	domain_guard_release(guard);
934 	return pp;
935 }
936 
937 void
pfctlinput(int cmd,struct sockaddr * sa)938 pfctlinput(int cmd, struct sockaddr *sa)
939 {
940 	pfctlinput2(cmd, sa, NULL);
941 }
942 
943 void
pfctlinput2(int cmd,struct sockaddr * sa,void * ctlparam)944 pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
945 {
946 	struct domain *dp;
947 	struct protosw *pp;
948 	domain_guard_t guard __single;
949 
950 	if (sa == NULL) {
951 		return;
952 	}
953 
954 	guard = domain_guard_deploy();
955 	TAILQ_FOREACH(dp, &domains, dom_entry) {
956 		TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
957 			if (pp->pr_ctlinput != NULL) {
958 				(*pp->pr_ctlinput)(cmd, sa, ctlparam, NULL);
959 			}
960 		}
961 	}
962 	domain_guard_release(guard);
963 }
964 
965 void
net_update_uptime_with_time(const struct timeval * tvp)966 net_update_uptime_with_time(const struct timeval *tvp)
967 {
968 	uint64_t tmp;
969 	uint64_t seconds = tvp->tv_sec;;
970 	uint64_t milliseconds = ((uint64_t)tvp->tv_sec * 1000) + ((uint64_t)tvp->tv_usec / 1000);
971 	uint64_t microseconds = ((uint64_t)tvp->tv_sec * USEC_PER_SEC) + (uint64_t)tvp->tv_usec;
972 
973 	/*
974 	 * Round up the timer to the nearest integer value because otherwise
975 	 * we might setup networking timers that are off by almost 1 second.
976 	 */
977 	if (tvp->tv_usec > 500000) {
978 		seconds++;
979 	}
980 
981 	tmp = os_atomic_load(&_net_uptime, relaxed);
982 	if (tmp < seconds) {
983 		os_atomic_cmpxchg(&_net_uptime, tmp, seconds, relaxed);
984 
985 		/*
986 		 * No loop needed. If we are racing with another thread, let's give
987 		 * the other one the priority.
988 		 */
989 	}
990 
991 	/* update milliseconds variant */
992 	tmp = os_atomic_load(&_net_uptime_ms, relaxed);
993 	if (tmp < milliseconds) {
994 		os_atomic_cmpxchg(&_net_uptime_ms, tmp, milliseconds, relaxed);
995 	}
996 
997 	/* update microseconds variant */
998 	tmp = os_atomic_load(&_net_uptime_us, relaxed);
999 	if (tmp < microseconds) {
1000 		os_atomic_cmpxchg(&_net_uptime_us, tmp, microseconds, relaxed);
1001 	}
1002 }
1003 
1004 void
net_update_uptime(void)1005 net_update_uptime(void)
1006 {
1007 	struct timeval tv;
1008 
1009 	microuptime(&tv);
1010 
1011 	net_update_uptime_with_time(&tv);
1012 }
1013 
1014 /*
1015  * Convert our uin64_t net_uptime to a struct timeval.
1016  */
1017 void
net_uptime2timeval(struct timeval * tv)1018 net_uptime2timeval(struct timeval *tv)
1019 {
1020 	if (tv == NULL) {
1021 		return;
1022 	}
1023 
1024 	tv->tv_usec = 0;
1025 	tv->tv_sec = (time_t)net_uptime();
1026 }
1027 
1028 void
domain_proto_mtx_lock_assert_held(void)1029 domain_proto_mtx_lock_assert_held(void)
1030 {
1031 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1032 }
1033 
1034 void
domain_proto_mtx_lock_assert_notheld(void)1035 domain_proto_mtx_lock_assert_notheld(void)
1036 {
1037 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1038 }
1039 
1040 domain_guard_t
domain_guard_deploy(void)1041 domain_guard_deploy(void)
1042 {
1043 	net_thread_marks_t marks __single;
1044 
1045 	marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN);
1046 	if (marks != net_thread_marks_none) {
1047 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1048 		lck_mtx_lock(&domain_proto_mtx);
1049 	} else {
1050 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1051 	}
1052 
1053 	return (domain_guard_t)(const void*)marks;
1054 }
1055 
1056 void
domain_guard_release(domain_guard_t guard)1057 domain_guard_release(domain_guard_t guard)
1058 {
1059 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)guard;
1060 
1061 	if (marks != net_thread_marks_none) {
1062 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1063 		lck_mtx_unlock(&domain_proto_mtx);
1064 		net_thread_marks_pop(marks);
1065 	} else {
1066 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1067 	}
1068 }
1069 
1070 domain_unguard_t
domain_unguard_deploy(void)1071 domain_unguard_deploy(void)
1072 {
1073 	net_thread_marks_t marks __single;
1074 
1075 	marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN);
1076 	if (marks != net_thread_marks_none) {
1077 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1078 		lck_mtx_unlock(&domain_proto_mtx);
1079 	} else {
1080 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1081 	}
1082 
1083 	return (domain_unguard_t)(const void*)marks;
1084 }
1085 
1086 void
domain_unguard_release(domain_unguard_t unguard)1087 domain_unguard_release(domain_unguard_t unguard)
1088 {
1089 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)unguard;
1090 
1091 	if (marks != net_thread_marks_none) {
1092 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1093 		lck_mtx_lock(&domain_proto_mtx);
1094 		net_thread_unmarks_pop(marks);
1095 	} else {
1096 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1097 	}
1098 }
1099 
1100 #if SKYWALK
1101 /* The following is used to enqueue work items for interface events */
1102 struct protoctl_event {
1103 	struct ifnet *ifp;
1104 	union sockaddr_in_4_6 laddr;
1105 	union sockaddr_in_4_6 raddr;
1106 	uint32_t protoctl_event_code;
1107 	struct protoctl_ev_val val;
1108 	uint16_t lport;
1109 	uint16_t rport;
1110 	uint8_t protocol;
1111 };
1112 
1113 struct protoctl_event_nwk_wq_entry {
1114 	struct nwk_wq_entry nwk_wqe;
1115 	struct protoctl_event protoctl_ev_arg;
1116 };
1117 
1118 static void
protoctl_event_callback(struct nwk_wq_entry * nwk_item)1119 protoctl_event_callback(struct nwk_wq_entry *nwk_item)
1120 {
1121 	struct protoctl_event_nwk_wq_entry *p_ev __single = NULL;
1122 
1123 	p_ev = __unsafe_forge_single(struct protoctl_event_nwk_wq_entry *,
1124 	    __container_of(nwk_item, struct protoctl_event_nwk_wq_entry, nwk_wqe));
1125 
1126 	/* Call this before we walk the tree */
1127 	EVENTHANDLER_INVOKE(&protoctl_evhdlr_ctxt, protoctl_event,
1128 	    p_ev->protoctl_ev_arg.ifp, SA(&p_ev->protoctl_ev_arg.laddr),
1129 	    SA(&p_ev->protoctl_ev_arg.raddr),
1130 	    p_ev->protoctl_ev_arg.lport, p_ev->protoctl_ev_arg.rport,
1131 	    p_ev->protoctl_ev_arg.protocol, p_ev->protoctl_ev_arg.protoctl_event_code,
1132 	    &p_ev->protoctl_ev_arg.val);
1133 
1134 	kfree_type(struct protoctl_event_nwk_wq_entry, p_ev);
1135 }
1136 
1137 /* XXX Some PRC events needs extra verification like sequence number checking */
1138 void
protoctl_event_enqueue_nwk_wq_entry(struct ifnet * ifp,struct sockaddr * p_laddr,struct sockaddr * p_raddr,uint16_t lport,uint16_t rport,uint8_t protocol,uint32_t protoctl_event_code,struct protoctl_ev_val * p_protoctl_ev_val)1139 protoctl_event_enqueue_nwk_wq_entry(struct ifnet *ifp, struct sockaddr *p_laddr,
1140     struct sockaddr *p_raddr, uint16_t lport, uint16_t rport, uint8_t protocol,
1141     uint32_t protoctl_event_code, struct protoctl_ev_val *p_protoctl_ev_val)
1142 {
1143 	struct protoctl_event_nwk_wq_entry *p_protoctl_ev = NULL;
1144 
1145 	evhlog(debug, "%s: eventhandler enqueuing event of type=protoctl_event event_code=%d",
1146 	    __func__, protocol);
1147 
1148 	p_protoctl_ev = kalloc_type(struct protoctl_event_nwk_wq_entry,
1149 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1150 
1151 	p_protoctl_ev->protoctl_ev_arg.ifp = ifp;
1152 
1153 	if (p_laddr != NULL) {
1154 		VERIFY(p_laddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.laddr));
1155 		struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.laddr.sin6;
1156 		SOCKADDR_COPY(SIN6(p_laddr), dst, p_laddr->sa_len);
1157 	}
1158 
1159 	if (p_raddr != NULL) {
1160 		VERIFY(p_raddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.raddr));
1161 		struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.raddr.sin6;
1162 		SOCKADDR_COPY(SIN6(p_raddr), dst, p_raddr->sa_len);
1163 	}
1164 
1165 	p_protoctl_ev->protoctl_ev_arg.lport = lport;
1166 	p_protoctl_ev->protoctl_ev_arg.rport = rport;
1167 	p_protoctl_ev->protoctl_ev_arg.protocol = protocol;
1168 	p_protoctl_ev->protoctl_ev_arg.protoctl_event_code = protoctl_event_code;
1169 
1170 	if (p_protoctl_ev_val != NULL) {
1171 		bcopy(p_protoctl_ev_val, &(p_protoctl_ev->protoctl_ev_arg.val),
1172 		    sizeof(*p_protoctl_ev_val));
1173 	}
1174 	p_protoctl_ev->nwk_wqe.func = protoctl_event_callback;
1175 
1176 	nwk_wq_enqueue(&p_protoctl_ev->nwk_wqe);
1177 }
1178 #endif /* SKYWALK */
1179 
1180 #if (DEVELOPMENT || DEBUG)
1181 
1182 static int
1183 sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
1184 {
1185 #pragma unused(arg1, arg2)
1186 	int error;
1187 	int dummy = 0;
1188 
1189 	error = sysctl_handle_int(oidp, &dummy, 0, req);
1190 	if (error || req->newptr == USER_ADDR_NULL) {
1191 		return error;
1192 	}
1193 
1194 	net_drain_domains();
1195 
1196 	return 0;
1197 }
1198 
1199 #endif /* DEVELOPMENT || DEBUG */
1200