xref: /xnu-11215.41.3/bsd/kern/uipc_domain.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 1998-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. All advertising materials mentioning features or use of this software
42  *    must display the following acknowledgement:
43  *	This product includes software developed by the University of
44  *	California, Berkeley and its contributors.
45  * 4. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)uipc_domain.c	8.3 (Berkeley) 2/14/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 #include <sys/protosw.h>
67 #include <sys/domain.h>
68 #include <sys/mcache.h>
69 #include <sys/mbuf.h>
70 #include <sys/time.h>
71 #include <sys/kernel.h>
72 #include <sys/systm.h>
73 #include <sys/proc_internal.h>
74 #include <sys/sysctl.h>
75 #include <sys/syslog.h>
76 #include <sys/queue.h>
77 
78 #include <net/dlil.h>
79 #include <net/nwk_wq.h>
80 #include <net/sockaddr_utils.h>
81 
82 #include <mach/boolean.h>
83 #include <pexpert/pexpert.h>
84 
85 #include <net/sockaddr_utils.h>
86 
87 #if __has_ptrcheck
88 #include <machine/trap.h> /* Needed by bound-checks-soft when enabled. */
89 #endif /* __has_ptrcheck */
90 
91 /* Eventhandler context for protocol events */
92 struct eventhandler_lists_ctxt protoctl_evhdlr_ctxt;
93 
94 static void pr_init_old(struct protosw *, struct domain *);
95 static void init_proto(struct protosw *, struct domain *);
96 static void attach_proto(struct protosw *, struct domain *);
97 static void detach_proto(struct protosw *, struct domain *);
98 static void dom_init_old(struct domain *);
99 static void init_domain(struct domain *);
100 static void attach_domain(struct domain *);
101 static void detach_domain(struct domain *);
102 static struct protosw *pffindprotonotype_locked(int, int, int);
103 static struct domain *pffinddomain_locked(int);
104 
105 static boolean_t domain_timeout_run;    /* domain timer is scheduled to run */
106 static boolean_t domain_draining;
107 static void domain_sched_timeout(void);
108 static void domain_timeout(void *);
109 
110 static LCK_GRP_DECLARE(domain_proto_mtx_grp, "domain");
111 static LCK_ATTR_DECLARE(domain_proto_mtx_attr, 0, 0);
112 static LCK_MTX_DECLARE_ATTR(domain_proto_mtx,
113     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
114 static LCK_MTX_DECLARE_ATTR(domain_timeout_mtx,
115     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
116 
117 uint64_t _net_uptime;
118 uint64_t _net_uptime_ms;
119 uint64_t _net_uptime_us;
120 
121 #if (DEVELOPMENT || DEBUG)
122 
123 SYSCTL_DECL(_kern_ipc);
124 
125 static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
126 
127 SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
128     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
129     0, 0,
130     sysctl_do_drain_domains, "I", "force manual drain domains");
131 
132 #endif /* DEVELOPMENT || DEBUG */
133 
134 static void
pr_init_old(struct protosw * pp,struct domain * dp)135 pr_init_old(struct protosw *pp, struct domain *dp)
136 {
137 #pragma unused(dp)
138 	VERIFY(pp->pr_flags & PR_OLD);
139 	VERIFY(pp->pr_old != NULL);
140 
141 	if (pp->pr_old->pr_init != NULL) {
142 		pp->pr_old->pr_init();
143 	}
144 }
145 
146 static void
init_proto(struct protosw * pp,struct domain * dp)147 init_proto(struct protosw *pp, struct domain *dp)
148 {
149 	VERIFY(pp->pr_flags & PR_ATTACHED);
150 
151 	if (!(pp->pr_flags & PR_INITIALIZED)) {
152 		TAILQ_INIT(&pp->pr_filter_head);
153 		if (pp->pr_init != NULL) {
154 			pp->pr_init(pp, dp);
155 		}
156 		pp->pr_flags |= PR_INITIALIZED;
157 	}
158 }
159 
160 static void
attach_proto(struct protosw * pp,struct domain * dp)161 attach_proto(struct protosw *pp, struct domain *dp)
162 {
163 	domain_proto_mtx_lock_assert_held();
164 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
165 	VERIFY(pp->pr_domain == NULL);
166 	VERIFY(pp->pr_protosw == NULL);
167 
168 	TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry);
169 	pp->pr_flags |= PR_ATTACHED;
170 	pp->pr_domain = dp;
171 	pp->pr_protosw = pp;
172 
173 	/* do some cleaning up on user request callbacks */
174 	pru_sanitize(pp->pr_usrreqs);
175 }
176 
177 static void
detach_proto(struct protosw * pp,struct domain * dp)178 detach_proto(struct protosw *pp, struct domain *dp)
179 {
180 	domain_proto_mtx_lock_assert_held();
181 	VERIFY(pp->pr_flags & PR_ATTACHED);
182 	VERIFY(pp->pr_domain == dp);
183 	VERIFY(pp->pr_protosw == pp);
184 
185 	TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry);
186 	pp->pr_flags &= ~PR_ATTACHED;
187 	pp->pr_domain = NULL;
188 	pp->pr_protosw = NULL;
189 }
190 
191 static void
dom_init_old(struct domain * dp)192 dom_init_old(struct domain *dp)
193 {
194 	VERIFY(dp->dom_flags & DOM_OLD);
195 	VERIFY(dp->dom_old != NULL);
196 
197 	if (dp->dom_old->dom_init != NULL) {
198 		dp->dom_old->dom_init();
199 	}
200 }
201 
202 static void
init_domain(struct domain * dp)203 init_domain(struct domain *dp)
204 {
205 	VERIFY(dp->dom_flags & DOM_ATTACHED);
206 
207 	if (!(dp->dom_flags & DOM_INITIALIZED)) {
208 		lck_mtx_init(&dp->dom_mtx_s, &domain_proto_mtx_grp,
209 		    &domain_proto_mtx_attr);
210 		dp->dom_mtx = &dp->dom_mtx_s;
211 		TAILQ_INIT(&dp->dom_protosw);
212 		if (dp->dom_init != NULL) {
213 			dp->dom_init(dp);
214 		}
215 		dp->dom_flags |= DOM_INITIALIZED;
216 	}
217 
218 	/* Recompute for new protocol */
219 	if (max_linkhdr < 16) {        /* XXX - Sheesh; everything's ether? */
220 		max_linkhdr = 16;
221 	}
222 	max_linkhdr = (int)P2ROUNDUP(max_linkhdr, sizeof(uint32_t));
223 
224 	if (dp->dom_protohdrlen > max_protohdr) {
225 		max_protohdr = dp->dom_protohdrlen;
226 	}
227 	max_protohdr = (int)P2ROUNDUP(max_protohdr, sizeof(uint32_t));
228 
229 	max_hdr = max_linkhdr + max_protohdr;
230 	max_datalen = MHLEN - max_hdr;
231 }
232 
233 static void
attach_domain(struct domain * dp)234 attach_domain(struct domain *dp)
235 {
236 	domain_proto_mtx_lock_assert_held();
237 	VERIFY(!(dp->dom_flags & DOM_ATTACHED));
238 
239 	TAILQ_INSERT_TAIL(&domains, dp, dom_entry);
240 	dp->dom_flags |= DOM_ATTACHED;
241 }
242 
243 static void
detach_domain(struct domain * dp)244 detach_domain(struct domain *dp)
245 {
246 	domain_proto_mtx_lock_assert_held();
247 	VERIFY(dp->dom_flags & DOM_ATTACHED);
248 
249 	TAILQ_REMOVE(&domains, dp, dom_entry);
250 	dp->dom_flags &= ~DOM_ATTACHED;
251 
252 	if (dp->dom_flags & DOM_OLD) {
253 		struct domain_old *odp = dp->dom_old;
254 
255 		VERIFY(odp != NULL);
256 		odp->dom_next = NULL;
257 		odp->dom_mtx = NULL;
258 	}
259 }
260 
261 /*
262  * Exported (private) routine, indirection of net_add_domain.
263  */
264 void
net_add_domain_old(struct domain_old * odp)265 net_add_domain_old(struct domain_old *odp)
266 {
267 	struct domain *dp;
268 	domain_guard_t guard __single;
269 
270 	VERIFY(odp != NULL);
271 
272 	guard = domain_guard_deploy();
273 	if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) {
274 		/*
275 		 * There is really nothing better than to panic here,
276 		 * as the caller would not have been able to handle
277 		 * any failures otherwise.
278 		 */
279 		panic("%s: domain (%d,%s) already exists for %s", __func__,
280 		    dp->dom_family, dp->dom_name, odp->dom_name);
281 		/* NOTREACHED */
282 	}
283 
284 	/* Make sure nothing is currently pointing to the odp. */
285 	TAILQ_FOREACH(dp, &domains, dom_entry) {
286 		if (dp->dom_old == odp) {
287 			panic("%s: domain %p (%d,%s) is already "
288 			    "associated with %p (%d,%s)\n", __func__,
289 			    odp, odp->dom_family, odp->dom_name, dp,
290 			    dp->dom_family, dp->dom_name);
291 			/* NOTREACHED */
292 		}
293 	}
294 
295 	if (odp->dom_protosw != NULL) {
296 		panic("%s: domain (%d,%s) protocols need to added "
297 		    "via net_add_proto\n", __func__, odp->dom_family,
298 		    odp->dom_name);
299 		/* NOTREACHED */
300 	}
301 
302 	dp = kalloc_type(struct domain, Z_WAITOK | Z_ZERO | Z_NOFAIL);
303 
304 	/* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */
305 	dp->dom_family          = odp->dom_family;
306 	dp->dom_flags           = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD;
307 	dp->dom_name            = odp->dom_name;
308 	dp->dom_init            = dom_init_old;
309 	dp->dom_externalize     = odp->dom_externalize;
310 	dp->dom_dispose         = odp->dom_dispose;
311 	dp->dom_rtattach        = odp->dom_rtattach;
312 	dp->dom_rtoffset        = odp->dom_rtoffset;
313 	dp->dom_maxrtkey        = odp->dom_maxrtkey;
314 	dp->dom_protohdrlen     = odp->dom_protohdrlen;
315 	dp->dom_old             = odp;
316 
317 	attach_domain(dp);
318 	init_domain(dp);
319 
320 	/* Point the mutex back to the internal structure's */
321 	odp->dom_mtx            = dp->dom_mtx;
322 	domain_guard_release(guard);
323 }
324 
325 /*
326  * Exported (private) routine, indirection of net_del_domain.
327  */
328 int
net_del_domain_old(struct domain_old * odp)329 net_del_domain_old(struct domain_old *odp)
330 {
331 	struct domain *dp1 __single, *dp2 __single;
332 	int error = 0;
333 	domain_guard_t guard __single;
334 
335 	VERIFY(odp != NULL);
336 
337 	guard = domain_guard_deploy();
338 	if (odp->dom_refs != 0) {
339 		error = EBUSY;
340 		goto done;
341 	}
342 
343 	TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) {
344 		if (!(dp1->dom_flags & DOM_OLD)) {
345 			continue;
346 		}
347 		VERIFY(dp1->dom_old != NULL);
348 		if (odp == dp1->dom_old) {
349 			break;
350 		}
351 	}
352 	if (dp1 != NULL) {
353 		struct protosw *pp1 __single, *pp2 __single;
354 
355 		VERIFY(dp1->dom_flags & DOM_OLD);
356 		VERIFY(dp1->dom_old == odp);
357 
358 		/* Remove all protocols attached to this domain */
359 		TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
360 			detach_proto(pp1, dp1);
361 			if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) {
362 				kfree_type(struct pr_usrreqs, pp1->pr_usrreqs);
363 			}
364 			if (pp1->pr_flags & PR_OLD) {
365 				kfree_type(struct protosw, pp1);
366 			}
367 		}
368 
369 		detach_domain(dp1);
370 		kfree_type(struct domain, dp1);
371 	} else {
372 		error = EPFNOSUPPORT;
373 	}
374 done:
375 	domain_guard_release(guard);
376 	return error;
377 }
378 
379 /*
380  * Internal routine, not exported.
381  *
382  * net_add_proto - link a protosw into a domain's protosw chain
383  *
384  * NOTE: Caller must have acquired domain_proto_mtx
385  */
386 int
net_add_proto(struct protosw * pp,struct domain * dp,int doinit)387 net_add_proto(struct protosw *pp, struct domain *dp, int doinit)
388 {
389 	struct protosw *pp1;
390 
391 	/*
392 	 * This could be called as part of initializing the domain,
393 	 * and thus DOM_INITIALIZED may not be set (yet).
394 	 */
395 	domain_proto_mtx_lock_assert_held();
396 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
397 
398 	/* pr_domain is set only after the protocol is attached */
399 	if (pp->pr_domain != NULL) {
400 		panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!",
401 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
402 		/* NOTREACHED */
403 	}
404 
405 	if (pp->pr_usrreqs == NULL) {
406 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
407 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
408 		/* NOTREACHED */
409 	}
410 
411 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
412 		if (pp1->pr_type == pp->pr_type &&
413 		    pp1->pr_protocol == pp->pr_protocol) {
414 			return EEXIST;
415 		}
416 	}
417 
418 	attach_proto(pp, dp);
419 	if (doinit) {
420 		net_init_proto(pp, dp);
421 	}
422 
423 	return 0;
424 }
425 
426 void
net_init_proto(struct protosw * pp,struct domain * dp)427 net_init_proto(struct protosw *pp, struct domain *dp)
428 {
429 	/*
430 	 * This could be called as part of initializing the domain,
431 	 * and thus DOM_INITIALIZED may not be set (yet).  The protocol
432 	 * must have been attached via net_addr_protosw() by now.
433 	 */
434 	domain_proto_mtx_lock_assert_held();
435 	VERIFY(pp->pr_flags & PR_ATTACHED);
436 
437 	init_proto(pp, dp);
438 }
439 
440 /*
441  * Exported (private) routine, indirection of net_add_proto.
442  */
443 int
net_add_proto_old(struct protosw_old * opp,struct domain_old * odp)444 net_add_proto_old(struct protosw_old *opp, struct domain_old *odp)
445 {
446 	struct pr_usrreqs_old *opru;
447 	struct pr_usrreqs *pru __single = NULL;
448 	struct protosw *pp __single = NULL, *pp1;
449 	int error = 0;
450 	struct domain *dp;
451 	domain_guard_t guard __single;
452 
453 	/*
454 	 * This could be called as part of initializing the domain,
455 	 * and thus DOM_INITIALIZED may not be set (yet).
456 	 */
457 	guard = domain_guard_deploy();
458 
459 	/* Make sure the domain has been added via net_add_domain */
460 	TAILQ_FOREACH(dp, &domains, dom_entry) {
461 		if (!(dp->dom_flags & DOM_OLD)) {
462 			continue;
463 		}
464 		if (dp->dom_old == odp) {
465 			break;
466 		}
467 	}
468 	if (dp == NULL) {
469 		error = EINVAL;
470 		goto done;
471 	}
472 
473 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
474 		if (pp1->pr_type == opp->pr_type &&
475 		    pp1->pr_protocol == opp->pr_protocol) {
476 			error = EEXIST;
477 			goto done;
478 		}
479 	}
480 
481 	if ((opru = opp->pr_usrreqs) == NULL) {
482 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
483 		    __func__, odp->dom_family, odp->dom_name, opp->pr_protocol);
484 		/* NOTREACHED */
485 	}
486 
487 	pru = kalloc_type(struct pr_usrreqs, Z_WAITOK | Z_ZERO | Z_NOFAIL);
488 
489 	pru->pru_flags          = PRUF_OLD;
490 	pru->pru_abort          = opru->pru_abort;
491 	pru->pru_accept         = opru->pru_accept;
492 	pru->pru_attach         = opru->pru_attach;
493 	pru->pru_bind           = opru->pru_bind;
494 	pru->pru_connect        = opru->pru_connect;
495 	pru->pru_connect2       = opru->pru_connect2;
496 	pru->pru_control        = opru->pru_control;
497 	pru->pru_detach         = opru->pru_detach;
498 	pru->pru_disconnect     = opru->pru_disconnect;
499 	pru->pru_listen         = opru->pru_listen;
500 	pru->pru_peeraddr       = opru->pru_peeraddr;
501 	pru->pru_rcvd           = opru->pru_rcvd;
502 	pru->pru_rcvoob         = opru->pru_rcvoob;
503 	pru->pru_send           = opru->pru_send;
504 	pru->pru_sense          = opru->pru_sense;
505 	pru->pru_shutdown       = opru->pru_shutdown;
506 	pru->pru_sockaddr       = opru->pru_sockaddr;
507 	pru->pru_sosend         = opru->pru_sosend;
508 	pru->pru_soreceive      = opru->pru_soreceive;
509 	pru->pru_sopoll         = opru->pru_sopoll;
510 
511 	pp = kalloc_type(struct protosw, Z_WAITOK | Z_ZERO | Z_NOFAIL);
512 
513 	/*
514 	 * Protocol fast and slow timers are now deprecated.
515 	 */
516 	if (opp->pr_unused != NULL) {
517 		printf("%s: domain (%d,%s), proto %d: pr_fasttimo is "
518 		    "deprecated and won't be called\n", __func__,
519 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
520 	}
521 	if (opp->pr_unused2 != NULL) {
522 		printf("%s: domain (%d,%s), proto %d: pr_slowtimo is "
523 		    "deprecated and won't be called\n", __func__,
524 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
525 	}
526 
527 	/* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */
528 	pp->pr_type             = opp->pr_type;
529 	pp->pr_protocol         = opp->pr_protocol;
530 	pp->pr_flags            = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD;
531 	pp->pr_input            = opp->pr_input;
532 	pp->pr_output           = opp->pr_output;
533 	pp->pr_ctlinput         = opp->pr_ctlinput;
534 	pp->pr_ctloutput        = opp->pr_ctloutput;
535 	pp->pr_usrreqs          = pru;
536 	pp->pr_init             = pr_init_old;
537 	pp->pr_drain            = opp->pr_drain;
538 	pp->pr_sysctl           = opp->pr_sysctl;
539 	pp->pr_lock             = opp->pr_lock;
540 	pp->pr_unlock           = opp->pr_unlock;
541 	pp->pr_getlock          = opp->pr_getlock;
542 	pp->pr_old              = opp;
543 
544 	/* attach as well as initialize */
545 	attach_proto(pp, dp);
546 	net_init_proto(pp, dp);
547 done:
548 	if (error != 0) {
549 		printf("%s: domain (%d,%s), proto %d: failed to attach, "
550 		    "error %d\n", __func__, odp->dom_family,
551 		    odp->dom_name, opp->pr_protocol, error);
552 
553 		kfree_type(struct pr_usrreqs, pru);
554 		kfree_type(struct protosw, pp);
555 	}
556 
557 	domain_guard_release(guard);
558 	return error;
559 }
560 
561 /*
562  * Internal routine, not exported.
563  *
564  * net_del_proto - remove a protosw from a domain's protosw chain.
565  * Search the protosw chain for the element with matching data.
566  * Then unlink and return.
567  *
568  * NOTE: Caller must have acquired domain_proto_mtx
569  */
570 int
net_del_proto(int type,int protocol,struct domain * dp)571 net_del_proto(int type, int protocol, struct domain *dp)
572 {
573 	struct protosw *pp __single;
574 
575 	/*
576 	 * This could be called as part of initializing the domain,
577 	 * and thus DOM_INITIALIZED may not be set (yet).
578 	 */
579 	domain_proto_mtx_lock_assert_held();
580 
581 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
582 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
583 			break;
584 		}
585 	}
586 	if (pp == NULL) {
587 		return ENXIO;
588 	}
589 
590 	detach_proto(pp, dp);
591 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
592 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
593 	}
594 	if (pp->pr_flags & PR_OLD) {
595 		kfree_type(struct protosw, pp);
596 	}
597 
598 	return 0;
599 }
600 
601 /*
602  * Exported (private) routine, indirection of net_del_proto.
603  */
604 int
net_del_proto_old(int type,int protocol,struct domain_old * odp)605 net_del_proto_old(int type, int protocol, struct domain_old *odp)
606 {
607 	int error = 0;
608 	struct protosw *pp __single;
609 	struct domain *dp;
610 	domain_guard_t guard __single;
611 
612 	/*
613 	 * This could be called as part of initializing the domain,
614 	 * and thus DOM_INITIALIZED may not be set (yet).
615 	 */
616 	guard = domain_guard_deploy();
617 
618 	/* Make sure the domain has been added via net_add_domain */
619 	TAILQ_FOREACH(dp, &domains, dom_entry) {
620 		if (!(dp->dom_flags & DOM_OLD)) {
621 			continue;
622 		}
623 		if (dp->dom_old == odp) {
624 			break;
625 		}
626 	}
627 	if (dp == NULL) {
628 		error = ENXIO;
629 		goto done;
630 	}
631 
632 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
633 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
634 			break;
635 		}
636 	}
637 	if (pp == NULL) {
638 		error = ENXIO;
639 		goto done;
640 	}
641 	detach_proto(pp, dp);
642 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
643 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
644 	}
645 	if (pp->pr_flags & PR_OLD) {
646 		kfree_type(struct protosw, pp);
647 	}
648 
649 done:
650 	domain_guard_release(guard);
651 	return error;
652 }
653 
654 static void
domain_sched_timeout(void)655 domain_sched_timeout(void)
656 {
657 	LCK_MTX_ASSERT(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED);
658 
659 	if (!domain_timeout_run && domain_draining) {
660 		domain_timeout_run = TRUE;
661 		timeout(domain_timeout, NULL, hz);
662 	}
663 }
664 
665 void
net_drain_domains(void)666 net_drain_domains(void)
667 {
668 	lck_mtx_lock(&domain_timeout_mtx);
669 	domain_draining = TRUE;
670 	domain_sched_timeout();
671 	lck_mtx_unlock(&domain_timeout_mtx);
672 }
673 
674 extern struct domain inet6domain_s;
675 #if IPSEC
676 extern struct domain keydomain_s;
677 #endif
678 
679 extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s;
680 extern struct domain systemdomain_s, localdomain_s;
681 extern struct domain vsockdomain_s;
682 
683 #if MULTIPATH
684 extern struct domain mpdomain_s;
685 #endif /* MULTIPATH */
686 
687 static void
domain_timeout(void * arg)688 domain_timeout(void *arg)
689 {
690 #pragma unused(arg)
691 	struct protosw *pp;
692 	struct domain *dp;
693 	domain_guard_t guard __single;
694 
695 	lck_mtx_lock(&domain_timeout_mtx);
696 	if (domain_draining) {
697 		domain_draining = FALSE;
698 		lck_mtx_unlock(&domain_timeout_mtx);
699 
700 		guard = domain_guard_deploy();
701 		TAILQ_FOREACH(dp, &domains, dom_entry) {
702 			TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
703 				if (pp->pr_drain != NULL) {
704 					(*pp->pr_drain)();
705 				}
706 			}
707 		}
708 		domain_guard_release(guard);
709 
710 		lck_mtx_lock(&domain_timeout_mtx);
711 	}
712 
713 	/* re-arm the timer if there's work to do */
714 	domain_timeout_run = FALSE;
715 	domain_sched_timeout();
716 	lck_mtx_unlock(&domain_timeout_mtx);
717 }
718 
719 void
domaininit(void)720 domaininit(void)
721 {
722 	struct domain *dp;
723 	domain_guard_t guard __single;
724 
725 	eventhandler_lists_ctxt_init(&protoctl_evhdlr_ctxt);
726 
727 	guard = domain_guard_deploy();
728 	/*
729 	 * Add all the static domains to the domains list.  route domain
730 	 * gets added and initialized last, since we need it to attach
731 	 * rt_tables[] to everything that's already there.  This also
732 	 * means that domains added after this point won't get their
733 	 * dom_rtattach() called on rt_tables[].
734 	 */
735 	attach_domain(&inetdomain_s);
736 	attach_domain(&inet6domain_s);
737 #if MULTIPATH
738 	attach_domain(&mpdomain_s);
739 #endif /* MULTIPATH */
740 	attach_domain(&systemdomain_s);
741 	attach_domain(&localdomain_s);
742 #if IPSEC
743 	attach_domain(&keydomain_s);
744 #endif /* IPSEC */
745 	attach_domain(&ndrvdomain_s);
746 	attach_domain(&vsockdomain_s);
747 	attach_domain(&routedomain_s);  /* must be last domain */
748 
749 	/*
750 	 * Now ask them all to init (XXX including the routing domain,
751 	 * see above)
752 	 */
753 	TAILQ_FOREACH(dp, &domains, dom_entry)
754 	init_domain(dp);
755 
756 	domain_guard_release(guard);
757 }
758 
759 static __inline__ struct domain *
pffinddomain_locked(int pf)760 pffinddomain_locked(int pf)
761 {
762 	struct domain *dp;
763 
764 	domain_proto_mtx_lock_assert_held();
765 
766 	TAILQ_FOREACH(dp, &domains, dom_entry) {
767 		if (dp->dom_family == pf) {
768 			break;
769 		}
770 	}
771 	return dp;
772 }
773 
774 struct protosw *
pffindtype(int family,int type)775 pffindtype(int family, int type)
776 {
777 	struct protosw *pp = NULL;
778 	struct domain *dp;
779 	domain_guard_t guard __single;
780 
781 	guard = domain_guard_deploy();
782 	if ((dp = pffinddomain_locked(family)) == NULL) {
783 		goto done;
784 	}
785 
786 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
787 		if (pp->pr_type != 0 && pp->pr_type == type) {
788 			goto done;
789 		}
790 	}
791 done:
792 	domain_guard_release(guard);
793 	return pp;
794 }
795 
796 /*
797  * Internal routine, not exported.
798  */
799 struct domain *
pffinddomain(int pf)800 pffinddomain(int pf)
801 {
802 	struct domain *dp;
803 	domain_guard_t guard __single;
804 
805 	guard = domain_guard_deploy();
806 	dp = pffinddomain_locked(pf);
807 	domain_guard_release(guard);
808 	return dp;
809 }
810 
811 /*
812  * Exported (private) routine, indirection of pffinddomain.
813  */
814 struct domain_old *
pffinddomain_old(int pf)815 pffinddomain_old(int pf)
816 {
817 	struct domain_old *odp = NULL;
818 	struct domain *dp;
819 	domain_guard_t guard __single;
820 
821 	guard = domain_guard_deploy();
822 	if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD)) {
823 		odp = dp->dom_old;
824 	}
825 	domain_guard_release(guard);
826 	return odp;
827 }
828 
829 /*
830  * Internal routine, not exported.
831  */
832 struct protosw *
pffindproto(int family,int protocol,int type)833 pffindproto(int family, int protocol, int type)
834 {
835 	struct protosw *pp;
836 	domain_guard_t guard __single;
837 
838 	guard = domain_guard_deploy();
839 	pp = pffindproto_locked(family, protocol, type);
840 	domain_guard_release(guard);
841 	return pp;
842 }
843 
844 struct protosw *
pffindproto_locked(int family,int protocol,int type)845 pffindproto_locked(int family, int protocol, int type)
846 {
847 	struct protosw *maybe = NULL;
848 	struct protosw *pp;
849 	struct domain *dp;
850 
851 	domain_proto_mtx_lock_assert_held();
852 
853 	if (family == 0) {
854 		return 0;
855 	}
856 
857 	dp = pffinddomain_locked(family);
858 	if (dp == NULL) {
859 		return NULL;
860 	}
861 
862 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
863 		if ((pp->pr_protocol == protocol) && (pp->pr_type == type)) {
864 			return pp;
865 		}
866 
867 		if (type == SOCK_RAW && pp->pr_type == SOCK_RAW &&
868 		    pp->pr_protocol == 0 && maybe == NULL) {
869 			maybe = pp;
870 		}
871 	}
872 	return maybe;
873 }
874 
875 /*
876  * Exported (private) routine, indirection of pffindproto.
877  */
878 struct protosw_old *
pffindproto_old(int family,int protocol,int type)879 pffindproto_old(int family, int protocol, int type)
880 {
881 	struct protosw_old *opr = NULL;
882 	struct protosw *pp;
883 	domain_guard_t guard __single;
884 
885 	guard = domain_guard_deploy();
886 	if ((pp = pffindproto_locked(family, protocol, type)) != NULL &&
887 	    (pp->pr_flags & PR_OLD)) {
888 		opr = pp->pr_old;
889 	}
890 	domain_guard_release(guard);
891 	return opr;
892 }
893 
894 static struct protosw *
pffindprotonotype_locked(int family,int protocol,int type)895 pffindprotonotype_locked(int family, int protocol, int type)
896 {
897 #pragma unused(type)
898 	struct domain *dp;
899 	struct protosw *pp;
900 
901 	domain_proto_mtx_lock_assert_held();
902 
903 	if (family == 0) {
904 		return 0;
905 	}
906 
907 	dp = pffinddomain_locked(family);
908 	if (dp == NULL) {
909 		return NULL;
910 	}
911 
912 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
913 		if (pp->pr_protocol == protocol) {
914 			return pp;
915 		}
916 	}
917 	return NULL;
918 }
919 
920 struct protosw *
pffindprotonotype(int family,int protocol)921 pffindprotonotype(int family, int protocol)
922 {
923 	struct protosw *pp;
924 	domain_guard_t guard __single;
925 
926 	if (protocol == 0) {
927 		return NULL;
928 	}
929 
930 	guard = domain_guard_deploy();
931 	pp = pffindprotonotype_locked(family, protocol, 0);
932 	domain_guard_release(guard);
933 	return pp;
934 }
935 
936 void
pfctlinput(int cmd,struct sockaddr * sa)937 pfctlinput(int cmd, struct sockaddr *sa)
938 {
939 	pfctlinput2(cmd, sa, NULL);
940 }
941 
942 void
pfctlinput2(int cmd,struct sockaddr * sa,void * ctlparam)943 pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
944 {
945 	struct domain *dp;
946 	struct protosw *pp;
947 	domain_guard_t guard __single;
948 
949 	if (sa == NULL) {
950 		return;
951 	}
952 
953 	guard = domain_guard_deploy();
954 	TAILQ_FOREACH(dp, &domains, dom_entry) {
955 		TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
956 			if (pp->pr_ctlinput != NULL) {
957 				(*pp->pr_ctlinput)(cmd, sa, ctlparam, NULL);
958 			}
959 		}
960 	}
961 	domain_guard_release(guard);
962 }
963 
964 void
net_update_uptime_with_time(const struct timeval * tvp)965 net_update_uptime_with_time(const struct timeval *tvp)
966 {
967 	uint64_t tmp;
968 	uint64_t seconds = tvp->tv_sec;;
969 	uint64_t milliseconds = ((uint64_t)tvp->tv_sec * 1000) + ((uint64_t)tvp->tv_usec / 1000);
970 	uint64_t microseconds = ((uint64_t)tvp->tv_sec * USEC_PER_SEC) + (uint64_t)tvp->tv_usec;
971 
972 	/*
973 	 * Round up the timer to the nearest integer value because otherwise
974 	 * we might setup networking timers that are off by almost 1 second.
975 	 */
976 	if (tvp->tv_usec > 500000) {
977 		seconds++;
978 	}
979 
980 	tmp = os_atomic_load(&_net_uptime, relaxed);
981 	if (tmp < seconds) {
982 		os_atomic_cmpxchg(&_net_uptime, tmp, seconds, relaxed);
983 
984 		/*
985 		 * No loop needed. If we are racing with another thread, let's give
986 		 * the other one the priority.
987 		 */
988 	}
989 
990 	/* update milliseconds variant */
991 	tmp = os_atomic_load(&_net_uptime_ms, relaxed);
992 	if (tmp < milliseconds) {
993 		os_atomic_cmpxchg(&_net_uptime_ms, tmp, milliseconds, relaxed);
994 	}
995 
996 	/* update microseconds variant */
997 	tmp = os_atomic_load(&_net_uptime_us, relaxed);
998 	if (tmp < microseconds) {
999 		os_atomic_cmpxchg(&_net_uptime_us, tmp, microseconds, relaxed);
1000 	}
1001 }
1002 
1003 void
net_update_uptime(void)1004 net_update_uptime(void)
1005 {
1006 	struct timeval tv;
1007 
1008 	microuptime(&tv);
1009 
1010 	net_update_uptime_with_time(&tv);
1011 }
1012 
1013 /*
1014  * Convert our uin64_t net_uptime to a struct timeval.
1015  */
1016 void
net_uptime2timeval(struct timeval * tv)1017 net_uptime2timeval(struct timeval *tv)
1018 {
1019 	if (tv == NULL) {
1020 		return;
1021 	}
1022 
1023 	tv->tv_usec = 0;
1024 	tv->tv_sec = (time_t)net_uptime();
1025 }
1026 
1027 /*
1028  * An alternative way to obtain the coarse-grained uptime (in seconds)
1029  * for networking code which do not require high-precision timestamp,
1030  * as this is significantly cheaper than microuptime().
1031  */
1032 uint64_t
net_uptime(void)1033 net_uptime(void)
1034 {
1035 	if (_net_uptime == 0) {
1036 		net_update_uptime();
1037 	}
1038 
1039 	return _net_uptime;
1040 }
1041 
1042 uint64_t
net_uptime_ms(void)1043 net_uptime_ms(void)
1044 {
1045 	if (_net_uptime_ms == 0) {
1046 		net_update_uptime();
1047 	}
1048 
1049 	return _net_uptime_ms;
1050 }
1051 
1052 uint64_t
net_uptime_us(void)1053 net_uptime_us(void)
1054 {
1055 	if (_net_uptime_us == 0) {
1056 		net_update_uptime();
1057 	}
1058 
1059 	return _net_uptime_us;
1060 }
1061 
1062 void
domain_proto_mtx_lock_assert_held(void)1063 domain_proto_mtx_lock_assert_held(void)
1064 {
1065 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1066 }
1067 
1068 void
domain_proto_mtx_lock_assert_notheld(void)1069 domain_proto_mtx_lock_assert_notheld(void)
1070 {
1071 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1072 }
1073 
1074 domain_guard_t
domain_guard_deploy(void)1075 domain_guard_deploy(void)
1076 {
1077 	net_thread_marks_t marks __single;
1078 
1079 	marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN);
1080 	if (marks != net_thread_marks_none) {
1081 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1082 		lck_mtx_lock(&domain_proto_mtx);
1083 	} else {
1084 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1085 	}
1086 
1087 	return (domain_guard_t)(const void*)marks;
1088 }
1089 
1090 void
domain_guard_release(domain_guard_t guard)1091 domain_guard_release(domain_guard_t guard)
1092 {
1093 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)guard;
1094 
1095 	if (marks != net_thread_marks_none) {
1096 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1097 		lck_mtx_unlock(&domain_proto_mtx);
1098 		net_thread_marks_pop(marks);
1099 	} else {
1100 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1101 	}
1102 }
1103 
1104 domain_unguard_t
domain_unguard_deploy(void)1105 domain_unguard_deploy(void)
1106 {
1107 	net_thread_marks_t marks __single;
1108 
1109 	marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN);
1110 	if (marks != net_thread_marks_none) {
1111 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1112 		lck_mtx_unlock(&domain_proto_mtx);
1113 	} else {
1114 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1115 	}
1116 
1117 	return (domain_unguard_t)(const void*)marks;
1118 }
1119 
1120 void
domain_unguard_release(domain_unguard_t unguard)1121 domain_unguard_release(domain_unguard_t unguard)
1122 {
1123 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)unguard;
1124 
1125 	if (marks != net_thread_marks_none) {
1126 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1127 		lck_mtx_lock(&domain_proto_mtx);
1128 		net_thread_unmarks_pop(marks);
1129 	} else {
1130 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1131 	}
1132 }
1133 
1134 #if SKYWALK
1135 /* The following is used to enqueue work items for interface events */
1136 struct protoctl_event {
1137 	struct ifnet *ifp;
1138 	union sockaddr_in_4_6 laddr;
1139 	union sockaddr_in_4_6 raddr;
1140 	uint32_t protoctl_event_code;
1141 	struct protoctl_ev_val val;
1142 	uint16_t lport;
1143 	uint16_t rport;
1144 	uint8_t protocol;
1145 };
1146 
1147 struct protoctl_event_nwk_wq_entry {
1148 	struct nwk_wq_entry nwk_wqe;
1149 	struct protoctl_event protoctl_ev_arg;
1150 };
1151 
1152 static void
protoctl_event_callback(struct nwk_wq_entry * nwk_item)1153 protoctl_event_callback(struct nwk_wq_entry *nwk_item)
1154 {
1155 	struct protoctl_event_nwk_wq_entry *p_ev __single = NULL;
1156 
1157 	p_ev = __unsafe_forge_single(struct protoctl_event_nwk_wq_entry *,
1158 	    __container_of(nwk_item, struct protoctl_event_nwk_wq_entry, nwk_wqe));
1159 
1160 	/* Call this before we walk the tree */
1161 	EVENTHANDLER_INVOKE(&protoctl_evhdlr_ctxt, protoctl_event,
1162 	    p_ev->protoctl_ev_arg.ifp, SA(&p_ev->protoctl_ev_arg.laddr),
1163 	    SA(&p_ev->protoctl_ev_arg.raddr),
1164 	    p_ev->protoctl_ev_arg.lport, p_ev->protoctl_ev_arg.rport,
1165 	    p_ev->protoctl_ev_arg.protocol, p_ev->protoctl_ev_arg.protoctl_event_code,
1166 	    &p_ev->protoctl_ev_arg.val);
1167 
1168 	kfree_type(struct protoctl_event_nwk_wq_entry, p_ev);
1169 }
1170 
1171 /* XXX Some PRC events needs extra verification like sequence number checking */
1172 void
protoctl_event_enqueue_nwk_wq_entry(struct ifnet * ifp,struct sockaddr * p_laddr,struct sockaddr * p_raddr,uint16_t lport,uint16_t rport,uint8_t protocol,uint32_t protoctl_event_code,struct protoctl_ev_val * p_protoctl_ev_val)1173 protoctl_event_enqueue_nwk_wq_entry(struct ifnet *ifp, struct sockaddr *p_laddr,
1174     struct sockaddr *p_raddr, uint16_t lport, uint16_t rport, uint8_t protocol,
1175     uint32_t protoctl_event_code, struct protoctl_ev_val *p_protoctl_ev_val)
1176 {
1177 	struct protoctl_event_nwk_wq_entry *p_protoctl_ev = NULL;
1178 
1179 	evhlog(debug, "%s: eventhandler enqueuing event of type=protoctl_event event_code=%d",
1180 	    __func__, protocol);
1181 
1182 	p_protoctl_ev = kalloc_type(struct protoctl_event_nwk_wq_entry,
1183 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1184 
1185 	p_protoctl_ev->protoctl_ev_arg.ifp = ifp;
1186 
1187 	if (p_laddr != NULL) {
1188 		VERIFY(p_laddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.laddr));
1189 		struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.laddr.sin6;
1190 		SOCKADDR_COPY(SIN6(p_laddr), dst, p_laddr->sa_len);
1191 	}
1192 
1193 	if (p_raddr != NULL) {
1194 		VERIFY(p_raddr->sa_len <= sizeof(p_protoctl_ev->protoctl_ev_arg.raddr));
1195 		struct sockaddr_in6 *dst __single = &p_protoctl_ev->protoctl_ev_arg.raddr.sin6;
1196 		SOCKADDR_COPY(SIN6(p_raddr), dst, p_raddr->sa_len);
1197 	}
1198 
1199 	p_protoctl_ev->protoctl_ev_arg.lport = lport;
1200 	p_protoctl_ev->protoctl_ev_arg.rport = rport;
1201 	p_protoctl_ev->protoctl_ev_arg.protocol = protocol;
1202 	p_protoctl_ev->protoctl_ev_arg.protoctl_event_code = protoctl_event_code;
1203 
1204 	if (p_protoctl_ev_val != NULL) {
1205 		bcopy(p_protoctl_ev_val, &(p_protoctl_ev->protoctl_ev_arg.val),
1206 		    sizeof(*p_protoctl_ev_val));
1207 	}
1208 	p_protoctl_ev->nwk_wqe.func = protoctl_event_callback;
1209 
1210 	nwk_wq_enqueue(&p_protoctl_ev->nwk_wqe);
1211 }
1212 #endif /* SKYWALK */
1213 
1214 #if (DEVELOPMENT || DEBUG)
1215 
1216 static int
1217 sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
1218 {
1219 #pragma unused(arg1, arg2)
1220 	int error;
1221 	int dummy = 0;
1222 
1223 	error = sysctl_handle_int(oidp, &dummy, 0, req);
1224 	if (error || req->newptr == USER_ADDR_NULL) {
1225 		return error;
1226 	}
1227 
1228 	net_drain_domains();
1229 
1230 	return 0;
1231 }
1232 
1233 #endif /* DEVELOPMENT || DEBUG */
1234