xref: /xnu-10002.41.9/bsd/kern/uipc_domain.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 1998-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. All advertising materials mentioning features or use of this software
42  *    must display the following acknowledgement:
43  *	This product includes software developed by the University of
44  *	California, Berkeley and its contributors.
45  * 4. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)uipc_domain.c	8.3 (Berkeley) 2/14/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 #include <sys/protosw.h>
67 #include <sys/domain.h>
68 #include <sys/mcache.h>
69 #include <sys/mbuf.h>
70 #include <sys/time.h>
71 #include <sys/kernel.h>
72 #include <sys/systm.h>
73 #include <sys/proc_internal.h>
74 #include <sys/sysctl.h>
75 #include <sys/syslog.h>
76 #include <sys/queue.h>
77 
78 #include <net/dlil.h>
79 #include <net/nwk_wq.h>
80 #include <net/sockaddr_utils.h>
81 
82 #include <mach/boolean.h>
83 #include <pexpert/pexpert.h>
84 
85 #if __has_ptrcheck
86 #include <machine/trap.h> /* Needed by bound-checks-soft when enabled. */
87 #endif /* __has_ptrcheck */
88 
89 /* Eventhandler context for protocol events */
90 struct eventhandler_lists_ctxt protoctl_evhdlr_ctxt;
91 
92 static void pr_init_old(struct protosw *, struct domain *);
93 static void init_proto(struct protosw *, struct domain *);
94 static void attach_proto(struct protosw *, struct domain *);
95 static void detach_proto(struct protosw *, struct domain *);
96 static void dom_init_old(struct domain *);
97 static void init_domain(struct domain *);
98 static void attach_domain(struct domain *);
99 static void detach_domain(struct domain *);
100 static struct protosw *pffindprotonotype_locked(int, int, int);
101 static struct domain *pffinddomain_locked(int);
102 
103 static boolean_t domain_timeout_run;    /* domain timer is scheduled to run */
104 static boolean_t domain_draining;
105 static void domain_sched_timeout(void);
106 static void domain_timeout(void *);
107 
108 static LCK_GRP_DECLARE(domain_proto_mtx_grp, "domain");
109 static LCK_ATTR_DECLARE(domain_proto_mtx_attr, 0, 0);
110 static LCK_MTX_DECLARE_ATTR(domain_proto_mtx,
111     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
112 static LCK_MTX_DECLARE_ATTR(domain_timeout_mtx,
113     &domain_proto_mtx_grp, &domain_proto_mtx_attr);
114 
115 u_int64_t _net_uptime;
116 u_int64_t _net_uptime_ms;
117 
118 #if (DEVELOPMENT || DEBUG)
119 
120 SYSCTL_DECL(_kern_ipc);
121 
122 static int sysctl_do_drain_domains SYSCTL_HANDLER_ARGS;
123 
124 SYSCTL_PROC(_kern_ipc, OID_AUTO, do_drain_domains,
125     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
126     0, 0,
127     sysctl_do_drain_domains, "I", "force manual drain domains");
128 
129 #endif /* DEVELOPMENT || DEBUG */
130 
131 static void
pr_init_old(struct protosw * pp,struct domain * dp)132 pr_init_old(struct protosw *pp, struct domain *dp)
133 {
134 #pragma unused(dp)
135 	VERIFY(pp->pr_flags & PR_OLD);
136 	VERIFY(pp->pr_old != NULL);
137 
138 	if (pp->pr_old->pr_init != NULL) {
139 		pp->pr_old->pr_init();
140 	}
141 }
142 
143 static void
init_proto(struct protosw * pp,struct domain * dp)144 init_proto(struct protosw *pp, struct domain *dp)
145 {
146 	VERIFY(pp->pr_flags & PR_ATTACHED);
147 
148 	if (!(pp->pr_flags & PR_INITIALIZED)) {
149 		TAILQ_INIT(&pp->pr_filter_head);
150 		if (pp->pr_init != NULL) {
151 			pp->pr_init(pp, dp);
152 		}
153 		pp->pr_flags |= PR_INITIALIZED;
154 	}
155 }
156 
157 static void
attach_proto(struct protosw * pp,struct domain * dp)158 attach_proto(struct protosw *pp, struct domain *dp)
159 {
160 	domain_proto_mtx_lock_assert_held();
161 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
162 	VERIFY(pp->pr_domain == NULL);
163 	VERIFY(pp->pr_protosw == NULL);
164 
165 	TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry);
166 	pp->pr_flags |= PR_ATTACHED;
167 	pp->pr_domain = dp;
168 	pp->pr_protosw = pp;
169 
170 	/* do some cleaning up on user request callbacks */
171 	pru_sanitize(pp->pr_usrreqs);
172 }
173 
174 static void
detach_proto(struct protosw * pp,struct domain * dp)175 detach_proto(struct protosw *pp, struct domain *dp)
176 {
177 	domain_proto_mtx_lock_assert_held();
178 	VERIFY(pp->pr_flags & PR_ATTACHED);
179 	VERIFY(pp->pr_domain == dp);
180 	VERIFY(pp->pr_protosw == pp);
181 
182 	TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry);
183 	pp->pr_flags &= ~PR_ATTACHED;
184 	pp->pr_domain = NULL;
185 	pp->pr_protosw = NULL;
186 }
187 
188 static void
dom_init_old(struct domain * dp)189 dom_init_old(struct domain *dp)
190 {
191 	VERIFY(dp->dom_flags & DOM_OLD);
192 	VERIFY(dp->dom_old != NULL);
193 
194 	if (dp->dom_old->dom_init != NULL) {
195 		dp->dom_old->dom_init();
196 	}
197 }
198 
199 static void
init_domain(struct domain * dp)200 init_domain(struct domain *dp)
201 {
202 	VERIFY(dp->dom_flags & DOM_ATTACHED);
203 
204 	if (!(dp->dom_flags & DOM_INITIALIZED)) {
205 		lck_mtx_init(&dp->dom_mtx_s, &domain_proto_mtx_grp,
206 		    &domain_proto_mtx_attr);
207 		dp->dom_mtx = &dp->dom_mtx_s;
208 		TAILQ_INIT(&dp->dom_protosw);
209 		if (dp->dom_init != NULL) {
210 			dp->dom_init(dp);
211 		}
212 		dp->dom_flags |= DOM_INITIALIZED;
213 	}
214 
215 	/* Recompute for new protocol */
216 	if (max_linkhdr < 16) {        /* XXX - Sheesh; everything's ether? */
217 		max_linkhdr = 16;
218 	}
219 	max_linkhdr = (int)P2ROUNDUP(max_linkhdr, sizeof(uint32_t));
220 
221 	if (dp->dom_protohdrlen > max_protohdr) {
222 		max_protohdr = dp->dom_protohdrlen;
223 	}
224 	max_protohdr = (int)P2ROUNDUP(max_protohdr, sizeof(uint32_t));
225 
226 	max_hdr = max_linkhdr + max_protohdr;
227 	max_datalen = MHLEN - max_hdr;
228 }
229 
230 static void
attach_domain(struct domain * dp)231 attach_domain(struct domain *dp)
232 {
233 	domain_proto_mtx_lock_assert_held();
234 	VERIFY(!(dp->dom_flags & DOM_ATTACHED));
235 
236 	TAILQ_INSERT_TAIL(&domains, dp, dom_entry);
237 	dp->dom_flags |= DOM_ATTACHED;
238 }
239 
240 static void
detach_domain(struct domain * dp)241 detach_domain(struct domain *dp)
242 {
243 	domain_proto_mtx_lock_assert_held();
244 	VERIFY(dp->dom_flags & DOM_ATTACHED);
245 
246 	TAILQ_REMOVE(&domains, dp, dom_entry);
247 	dp->dom_flags &= ~DOM_ATTACHED;
248 
249 	if (dp->dom_flags & DOM_OLD) {
250 		struct domain_old *odp = dp->dom_old;
251 
252 		VERIFY(odp != NULL);
253 		odp->dom_next = NULL;
254 		odp->dom_mtx = NULL;
255 	}
256 }
257 
258 /*
259  * Exported (private) routine, indirection of net_add_domain.
260  */
261 void
net_add_domain_old(struct domain_old * odp)262 net_add_domain_old(struct domain_old *odp)
263 {
264 	struct domain *dp;
265 	domain_guard_t guard __single;
266 
267 	VERIFY(odp != NULL);
268 
269 	guard = domain_guard_deploy();
270 	if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) {
271 		/*
272 		 * There is really nothing better than to panic here,
273 		 * as the caller would not have been able to handle
274 		 * any failures otherwise.
275 		 */
276 		panic("%s: domain (%d,%s) already exists for %s", __func__,
277 		    dp->dom_family, dp->dom_name, odp->dom_name);
278 		/* NOTREACHED */
279 	}
280 
281 	/* Make sure nothing is currently pointing to the odp. */
282 	TAILQ_FOREACH(dp, &domains, dom_entry) {
283 		if (dp->dom_old == odp) {
284 			panic("%s: domain %p (%d,%s) is already "
285 			    "associated with %p (%d,%s)\n", __func__,
286 			    odp, odp->dom_family, odp->dom_name, dp,
287 			    dp->dom_family, dp->dom_name);
288 			/* NOTREACHED */
289 		}
290 	}
291 
292 	if (odp->dom_protosw != NULL) {
293 		panic("%s: domain (%d,%s) protocols need to added "
294 		    "via net_add_proto\n", __func__, odp->dom_family,
295 		    odp->dom_name);
296 		/* NOTREACHED */
297 	}
298 
299 	dp = kalloc_type(struct domain, Z_WAITOK | Z_ZERO | Z_NOFAIL);
300 
301 	/* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */
302 	dp->dom_family          = odp->dom_family;
303 	dp->dom_flags           = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD;
304 	dp->dom_name            = odp->dom_name;
305 	dp->dom_init            = dom_init_old;
306 	dp->dom_externalize     = odp->dom_externalize;
307 	dp->dom_dispose         = odp->dom_dispose;
308 	dp->dom_rtattach        = odp->dom_rtattach;
309 	dp->dom_rtoffset        = odp->dom_rtoffset;
310 	dp->dom_maxrtkey        = odp->dom_maxrtkey;
311 	dp->dom_protohdrlen     = odp->dom_protohdrlen;
312 	dp->dom_old             = odp;
313 
314 	attach_domain(dp);
315 	init_domain(dp);
316 
317 	/* Point the mutex back to the internal structure's */
318 	odp->dom_mtx            = dp->dom_mtx;
319 	domain_guard_release(guard);
320 }
321 
322 /*
323  * Exported (private) routine, indirection of net_del_domain.
324  */
325 int
net_del_domain_old(struct domain_old * odp)326 net_del_domain_old(struct domain_old *odp)
327 {
328 	struct domain *dp1 __single, *dp2 __single;
329 	int error = 0;
330 	domain_guard_t guard __single;
331 
332 	VERIFY(odp != NULL);
333 
334 	guard = domain_guard_deploy();
335 	if (odp->dom_refs != 0) {
336 		error = EBUSY;
337 		goto done;
338 	}
339 
340 	TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) {
341 		if (!(dp1->dom_flags & DOM_OLD)) {
342 			continue;
343 		}
344 		VERIFY(dp1->dom_old != NULL);
345 		if (odp == dp1->dom_old) {
346 			break;
347 		}
348 	}
349 	if (dp1 != NULL) {
350 		struct protosw *pp1 __single, *pp2 __single;
351 
352 		VERIFY(dp1->dom_flags & DOM_OLD);
353 		VERIFY(dp1->dom_old == odp);
354 
355 		/* Remove all protocols attached to this domain */
356 		TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
357 			detach_proto(pp1, dp1);
358 			if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) {
359 				kfree_type(struct pr_usrreqs, pp1->pr_usrreqs);
360 			}
361 			if (pp1->pr_flags & PR_OLD) {
362 				kfree_type(struct protosw, pp1);
363 			}
364 		}
365 
366 		detach_domain(dp1);
367 		kfree_type(struct domain, dp1);
368 	} else {
369 		error = EPFNOSUPPORT;
370 	}
371 done:
372 	domain_guard_release(guard);
373 	return error;
374 }
375 
376 /*
377  * Internal routine, not exported.
378  *
379  * net_add_proto - link a protosw into a domain's protosw chain
380  *
381  * NOTE: Caller must have acquired domain_proto_mtx
382  */
383 int
net_add_proto(struct protosw * pp,struct domain * dp,int doinit)384 net_add_proto(struct protosw *pp, struct domain *dp, int doinit)
385 {
386 	struct protosw *pp1;
387 
388 	/*
389 	 * This could be called as part of initializing the domain,
390 	 * and thus DOM_INITIALIZED may not be set (yet).
391 	 */
392 	domain_proto_mtx_lock_assert_held();
393 	VERIFY(!(pp->pr_flags & PR_ATTACHED));
394 
395 	/* pr_domain is set only after the protocol is attached */
396 	if (pp->pr_domain != NULL) {
397 		panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!",
398 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
399 		/* NOTREACHED */
400 	}
401 
402 	if (pp->pr_usrreqs == NULL) {
403 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
404 		    __func__, dp->dom_family, dp->dom_name, pp->pr_protocol);
405 		/* NOTREACHED */
406 	}
407 
408 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
409 		if (pp1->pr_type == pp->pr_type &&
410 		    pp1->pr_protocol == pp->pr_protocol) {
411 			return EEXIST;
412 		}
413 	}
414 
415 	attach_proto(pp, dp);
416 	if (doinit) {
417 		net_init_proto(pp, dp);
418 	}
419 
420 	return 0;
421 }
422 
423 void
net_init_proto(struct protosw * pp,struct domain * dp)424 net_init_proto(struct protosw *pp, struct domain *dp)
425 {
426 	/*
427 	 * This could be called as part of initializing the domain,
428 	 * and thus DOM_INITIALIZED may not be set (yet).  The protocol
429 	 * must have been attached via net_addr_protosw() by now.
430 	 */
431 	domain_proto_mtx_lock_assert_held();
432 	VERIFY(pp->pr_flags & PR_ATTACHED);
433 
434 	init_proto(pp, dp);
435 }
436 
437 /*
438  * Exported (private) routine, indirection of net_add_proto.
439  */
440 int
net_add_proto_old(struct protosw_old * opp,struct domain_old * odp)441 net_add_proto_old(struct protosw_old *opp, struct domain_old *odp)
442 {
443 	struct pr_usrreqs_old *opru;
444 	struct pr_usrreqs *pru __single = NULL;
445 	struct protosw *pp __single = NULL, *pp1;
446 	int error = 0;
447 	struct domain *dp;
448 	domain_guard_t guard __single;
449 
450 	/*
451 	 * This could be called as part of initializing the domain,
452 	 * and thus DOM_INITIALIZED may not be set (yet).
453 	 */
454 	guard = domain_guard_deploy();
455 
456 	/* Make sure the domain has been added via net_add_domain */
457 	TAILQ_FOREACH(dp, &domains, dom_entry) {
458 		if (!(dp->dom_flags & DOM_OLD)) {
459 			continue;
460 		}
461 		if (dp->dom_old == odp) {
462 			break;
463 		}
464 	}
465 	if (dp == NULL) {
466 		error = EINVAL;
467 		goto done;
468 	}
469 
470 	TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) {
471 		if (pp1->pr_type == opp->pr_type &&
472 		    pp1->pr_protocol == opp->pr_protocol) {
473 			error = EEXIST;
474 			goto done;
475 		}
476 	}
477 
478 	if ((opru = opp->pr_usrreqs) == NULL) {
479 		panic("%s: domain (%d,%s), proto %d has no usrreqs!",
480 		    __func__, odp->dom_family, odp->dom_name, opp->pr_protocol);
481 		/* NOTREACHED */
482 	}
483 
484 	pru = kalloc_type(struct pr_usrreqs, Z_WAITOK | Z_ZERO | Z_NOFAIL);
485 
486 	pru->pru_flags          = PRUF_OLD;
487 	pru->pru_abort          = opru->pru_abort;
488 	pru->pru_accept         = opru->pru_accept;
489 	pru->pru_attach         = opru->pru_attach;
490 	pru->pru_bind           = opru->pru_bind;
491 	pru->pru_connect        = opru->pru_connect;
492 	pru->pru_connect2       = opru->pru_connect2;
493 	pru->pru_control        = opru->pru_control;
494 	pru->pru_detach         = opru->pru_detach;
495 	pru->pru_disconnect     = opru->pru_disconnect;
496 	pru->pru_listen         = opru->pru_listen;
497 	pru->pru_peeraddr       = opru->pru_peeraddr;
498 	pru->pru_rcvd           = opru->pru_rcvd;
499 	pru->pru_rcvoob         = opru->pru_rcvoob;
500 	pru->pru_send           = opru->pru_send;
501 	pru->pru_sense          = opru->pru_sense;
502 	pru->pru_shutdown       = opru->pru_shutdown;
503 	pru->pru_sockaddr       = opru->pru_sockaddr;
504 	pru->pru_sosend         = opru->pru_sosend;
505 	pru->pru_soreceive      = opru->pru_soreceive;
506 	pru->pru_sopoll         = opru->pru_sopoll;
507 
508 	pp = kalloc_type(struct protosw, Z_WAITOK | Z_ZERO | Z_NOFAIL);
509 
510 	/*
511 	 * Protocol fast and slow timers are now deprecated.
512 	 */
513 	if (opp->pr_unused != NULL) {
514 		printf("%s: domain (%d,%s), proto %d: pr_fasttimo is "
515 		    "deprecated and won't be called\n", __func__,
516 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
517 	}
518 	if (opp->pr_unused2 != NULL) {
519 		printf("%s: domain (%d,%s), proto %d: pr_slowtimo is "
520 		    "deprecated and won't be called\n", __func__,
521 		    odp->dom_family, odp->dom_name, opp->pr_protocol);
522 	}
523 
524 	/* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */
525 	pp->pr_type             = opp->pr_type;
526 	pp->pr_protocol         = opp->pr_protocol;
527 	pp->pr_flags            = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD;
528 	pp->pr_input            = opp->pr_input;
529 	pp->pr_output           = opp->pr_output;
530 	pp->pr_ctlinput         = opp->pr_ctlinput;
531 	pp->pr_ctloutput        = opp->pr_ctloutput;
532 	pp->pr_usrreqs          = pru;
533 	pp->pr_init             = pr_init_old;
534 	pp->pr_drain            = opp->pr_drain;
535 	pp->pr_sysctl           = opp->pr_sysctl;
536 	pp->pr_lock             = opp->pr_lock;
537 	pp->pr_unlock           = opp->pr_unlock;
538 	pp->pr_getlock          = opp->pr_getlock;
539 	pp->pr_old              = opp;
540 
541 	/* attach as well as initialize */
542 	attach_proto(pp, dp);
543 	net_init_proto(pp, dp);
544 done:
545 	if (error != 0) {
546 		printf("%s: domain (%d,%s), proto %d: failed to attach, "
547 		    "error %d\n", __func__, odp->dom_family,
548 		    odp->dom_name, opp->pr_protocol, error);
549 
550 		kfree_type(struct pr_usrreqs, pru);
551 		kfree_type(struct protosw, pp);
552 	}
553 
554 	domain_guard_release(guard);
555 	return error;
556 }
557 
558 /*
559  * Internal routine, not exported.
560  *
561  * net_del_proto - remove a protosw from a domain's protosw chain.
562  * Search the protosw chain for the element with matching data.
563  * Then unlink and return.
564  *
565  * NOTE: Caller must have acquired domain_proto_mtx
566  */
567 int
net_del_proto(int type,int protocol,struct domain * dp)568 net_del_proto(int type, int protocol, struct domain *dp)
569 {
570 	struct protosw *pp __single;
571 
572 	/*
573 	 * This could be called as part of initializing the domain,
574 	 * and thus DOM_INITIALIZED may not be set (yet).
575 	 */
576 	domain_proto_mtx_lock_assert_held();
577 
578 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
579 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
580 			break;
581 		}
582 	}
583 	if (pp == NULL) {
584 		return ENXIO;
585 	}
586 
587 	detach_proto(pp, dp);
588 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
589 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
590 	}
591 	if (pp->pr_flags & PR_OLD) {
592 		kfree_type(struct protosw, pp);
593 	}
594 
595 	return 0;
596 }
597 
598 /*
599  * Exported (private) routine, indirection of net_del_proto.
600  */
601 int
net_del_proto_old(int type,int protocol,struct domain_old * odp)602 net_del_proto_old(int type, int protocol, struct domain_old *odp)
603 {
604 	int error = 0;
605 	struct protosw *pp __single;
606 	struct domain *dp;
607 	domain_guard_t guard __single;
608 
609 	/*
610 	 * This could be called as part of initializing the domain,
611 	 * and thus DOM_INITIALIZED may not be set (yet).
612 	 */
613 	guard = domain_guard_deploy();
614 
615 	/* Make sure the domain has been added via net_add_domain */
616 	TAILQ_FOREACH(dp, &domains, dom_entry) {
617 		if (!(dp->dom_flags & DOM_OLD)) {
618 			continue;
619 		}
620 		if (dp->dom_old == odp) {
621 			break;
622 		}
623 	}
624 	if (dp == NULL) {
625 		error = ENXIO;
626 		goto done;
627 	}
628 
629 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
630 		if (pp->pr_type == type && pp->pr_protocol == protocol) {
631 			break;
632 		}
633 	}
634 	if (pp == NULL) {
635 		error = ENXIO;
636 		goto done;
637 	}
638 	detach_proto(pp, dp);
639 	if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
640 		kfree_type(struct pr_usrreqs, pp->pr_usrreqs);
641 	}
642 	if (pp->pr_flags & PR_OLD) {
643 		kfree_type(struct protosw, pp);
644 	}
645 
646 done:
647 	domain_guard_release(guard);
648 	return error;
649 }
650 
651 static void
domain_sched_timeout(void)652 domain_sched_timeout(void)
653 {
654 	LCK_MTX_ASSERT(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED);
655 
656 	if (!domain_timeout_run && domain_draining) {
657 		domain_timeout_run = TRUE;
658 		timeout(domain_timeout, NULL, hz);
659 	}
660 }
661 
662 void
net_drain_domains(void)663 net_drain_domains(void)
664 {
665 	lck_mtx_lock(&domain_timeout_mtx);
666 	domain_draining = TRUE;
667 	domain_sched_timeout();
668 	lck_mtx_unlock(&domain_timeout_mtx);
669 }
670 
671 extern struct domain inet6domain_s;
672 #if IPSEC
673 extern struct domain keydomain_s;
674 #endif
675 
676 extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s;
677 extern struct domain systemdomain_s, localdomain_s;
678 extern struct domain vsockdomain_s;
679 
680 #if MULTIPATH
681 extern struct domain mpdomain_s;
682 #endif /* MULTIPATH */
683 
684 static void
domain_timeout(void * arg)685 domain_timeout(void *arg)
686 {
687 #pragma unused(arg)
688 	struct protosw *pp;
689 	struct domain *dp;
690 	domain_guard_t guard __single;
691 
692 	lck_mtx_lock(&domain_timeout_mtx);
693 	if (domain_draining) {
694 		domain_draining = FALSE;
695 		lck_mtx_unlock(&domain_timeout_mtx);
696 
697 		guard = domain_guard_deploy();
698 		TAILQ_FOREACH(dp, &domains, dom_entry) {
699 			TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
700 				if (pp->pr_drain != NULL) {
701 					(*pp->pr_drain)();
702 				}
703 			}
704 		}
705 		domain_guard_release(guard);
706 
707 		lck_mtx_lock(&domain_timeout_mtx);
708 	}
709 
710 	/* re-arm the timer if there's work to do */
711 	domain_timeout_run = FALSE;
712 	domain_sched_timeout();
713 	lck_mtx_unlock(&domain_timeout_mtx);
714 }
715 
716 void
domaininit(void)717 domaininit(void)
718 {
719 	struct domain *dp;
720 	domain_guard_t guard __single;
721 
722 	eventhandler_lists_ctxt_init(&protoctl_evhdlr_ctxt);
723 
724 	guard = domain_guard_deploy();
725 	/*
726 	 * Add all the static domains to the domains list.  route domain
727 	 * gets added and initialized last, since we need it to attach
728 	 * rt_tables[] to everything that's already there.  This also
729 	 * means that domains added after this point won't get their
730 	 * dom_rtattach() called on rt_tables[].
731 	 */
732 	attach_domain(&inetdomain_s);
733 	attach_domain(&inet6domain_s);
734 #if MULTIPATH
735 	attach_domain(&mpdomain_s);
736 #endif /* MULTIPATH */
737 	attach_domain(&systemdomain_s);
738 	attach_domain(&localdomain_s);
739 #if IPSEC
740 	attach_domain(&keydomain_s);
741 #endif /* IPSEC */
742 	attach_domain(&ndrvdomain_s);
743 	attach_domain(&vsockdomain_s);
744 	attach_domain(&routedomain_s);  /* must be last domain */
745 
746 	/*
747 	 * Now ask them all to init (XXX including the routing domain,
748 	 * see above)
749 	 */
750 	TAILQ_FOREACH(dp, &domains, dom_entry)
751 	init_domain(dp);
752 
753 	domain_guard_release(guard);
754 }
755 
756 static __inline__ struct domain *
pffinddomain_locked(int pf)757 pffinddomain_locked(int pf)
758 {
759 	struct domain *dp;
760 
761 	domain_proto_mtx_lock_assert_held();
762 
763 	TAILQ_FOREACH(dp, &domains, dom_entry) {
764 		if (dp->dom_family == pf) {
765 			break;
766 		}
767 	}
768 	return dp;
769 }
770 
771 struct protosw *
pffindtype(int family,int type)772 pffindtype(int family, int type)
773 {
774 	struct protosw *pp = NULL;
775 	struct domain *dp;
776 	domain_guard_t guard __single;
777 
778 	guard = domain_guard_deploy();
779 	if ((dp = pffinddomain_locked(family)) == NULL) {
780 		goto done;
781 	}
782 
783 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
784 		if (pp->pr_type != 0 && pp->pr_type == type) {
785 			goto done;
786 		}
787 	}
788 done:
789 	domain_guard_release(guard);
790 	return pp;
791 }
792 
793 /*
794  * Internal routine, not exported.
795  */
796 struct domain *
pffinddomain(int pf)797 pffinddomain(int pf)
798 {
799 	struct domain *dp;
800 	domain_guard_t guard __single;
801 
802 	guard = domain_guard_deploy();
803 	dp = pffinddomain_locked(pf);
804 	domain_guard_release(guard);
805 	return dp;
806 }
807 
808 /*
809  * Exported (private) routine, indirection of pffinddomain.
810  */
811 struct domain_old *
pffinddomain_old(int pf)812 pffinddomain_old(int pf)
813 {
814 	struct domain_old *odp = NULL;
815 	struct domain *dp;
816 	domain_guard_t guard __single;
817 
818 	guard = domain_guard_deploy();
819 	if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD)) {
820 		odp = dp->dom_old;
821 	}
822 	domain_guard_release(guard);
823 	return odp;
824 }
825 
826 /*
827  * Internal routine, not exported.
828  */
829 struct protosw *
pffindproto(int family,int protocol,int type)830 pffindproto(int family, int protocol, int type)
831 {
832 	struct protosw *pp;
833 	domain_guard_t guard __single;
834 
835 	guard = domain_guard_deploy();
836 	pp = pffindproto_locked(family, protocol, type);
837 	domain_guard_release(guard);
838 	return pp;
839 }
840 
841 struct protosw *
pffindproto_locked(int family,int protocol,int type)842 pffindproto_locked(int family, int protocol, int type)
843 {
844 	struct protosw *maybe = NULL;
845 	struct protosw *pp;
846 	struct domain *dp;
847 
848 	domain_proto_mtx_lock_assert_held();
849 
850 	if (family == 0) {
851 		return 0;
852 	}
853 
854 	dp = pffinddomain_locked(family);
855 	if (dp == NULL) {
856 		return NULL;
857 	}
858 
859 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
860 		if ((pp->pr_protocol == protocol) && (pp->pr_type == type)) {
861 			return pp;
862 		}
863 
864 		if (type == SOCK_RAW && pp->pr_type == SOCK_RAW &&
865 		    pp->pr_protocol == 0 && maybe == NULL) {
866 			maybe = pp;
867 		}
868 	}
869 	return maybe;
870 }
871 
872 /*
873  * Exported (private) routine, indirection of pffindproto.
874  */
875 struct protosw_old *
pffindproto_old(int family,int protocol,int type)876 pffindproto_old(int family, int protocol, int type)
877 {
878 	struct protosw_old *opr = NULL;
879 	struct protosw *pp;
880 	domain_guard_t guard __single;
881 
882 	guard = domain_guard_deploy();
883 	if ((pp = pffindproto_locked(family, protocol, type)) != NULL &&
884 	    (pp->pr_flags & PR_OLD)) {
885 		opr = pp->pr_old;
886 	}
887 	domain_guard_release(guard);
888 	return opr;
889 }
890 
891 static struct protosw *
pffindprotonotype_locked(int family,int protocol,int type)892 pffindprotonotype_locked(int family, int protocol, int type)
893 {
894 #pragma unused(type)
895 	struct domain *dp;
896 	struct protosw *pp;
897 
898 	domain_proto_mtx_lock_assert_held();
899 
900 	if (family == 0) {
901 		return 0;
902 	}
903 
904 	dp = pffinddomain_locked(family);
905 	if (dp == NULL) {
906 		return NULL;
907 	}
908 
909 	TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
910 		if (pp->pr_protocol == protocol) {
911 			return pp;
912 		}
913 	}
914 	return NULL;
915 }
916 
917 struct protosw *
pffindprotonotype(int family,int protocol)918 pffindprotonotype(int family, int protocol)
919 {
920 	struct protosw *pp;
921 	domain_guard_t guard __single;
922 
923 	if (protocol == 0) {
924 		return NULL;
925 	}
926 
927 	guard = domain_guard_deploy();
928 	pp = pffindprotonotype_locked(family, protocol, 0);
929 	domain_guard_release(guard);
930 	return pp;
931 }
932 
933 void
pfctlinput(int cmd,struct sockaddr * sa)934 pfctlinput(int cmd, struct sockaddr *sa)
935 {
936 	pfctlinput2(cmd, sa, NULL);
937 }
938 
939 void
pfctlinput2(int cmd,struct sockaddr * sa,void * ctlparam)940 pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam)
941 {
942 	struct domain *dp;
943 	struct protosw *pp;
944 	domain_guard_t guard __single;
945 
946 	if (sa == NULL) {
947 		return;
948 	}
949 
950 	guard = domain_guard_deploy();
951 	TAILQ_FOREACH(dp, &domains, dom_entry) {
952 		TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) {
953 			if (pp->pr_ctlinput != NULL) {
954 				(*pp->pr_ctlinput)(cmd, sa, ctlparam, NULL);
955 			}
956 		}
957 	}
958 	domain_guard_release(guard);
959 }
960 
961 void
net_update_uptime_with_time(const struct timeval * tvp)962 net_update_uptime_with_time(const struct timeval *tvp)
963 {
964 	_net_uptime = tvp->tv_sec;
965 	/*
966 	 * Round up the timer to the nearest integer value because otherwise
967 	 * we might setup networking timers that are off by almost 1 second.
968 	 */
969 	if (tvp->tv_usec > 500000) {
970 		_net_uptime++;
971 	}
972 
973 	/* update milliseconds variant */
974 	_net_uptime_ms = (((u_int64_t)tvp->tv_sec * 1000) +
975 	    ((u_int64_t)tvp->tv_usec / 1000));
976 }
977 
978 void
net_update_uptime(void)979 net_update_uptime(void)
980 {
981 	struct timeval tv;
982 
983 	microuptime(&tv);
984 
985 	net_update_uptime_with_time(&tv);
986 }
987 
988 /*
989  * Convert our uin64_t net_uptime to a struct timeval.
990  */
991 void
net_uptime2timeval(struct timeval * tv)992 net_uptime2timeval(struct timeval *tv)
993 {
994 	if (tv == NULL) {
995 		return;
996 	}
997 
998 	tv->tv_usec = 0;
999 	tv->tv_sec = (time_t)net_uptime();
1000 }
1001 
1002 /*
1003  * An alternative way to obtain the coarse-grained uptime (in seconds)
1004  * for networking code which do not require high-precision timestamp,
1005  * as this is significantly cheaper than microuptime().
1006  */
1007 u_int64_t
net_uptime(void)1008 net_uptime(void)
1009 {
1010 	if (_net_uptime == 0) {
1011 		net_update_uptime();
1012 	}
1013 
1014 	return _net_uptime;
1015 }
1016 
1017 u_int64_t
net_uptime_ms(void)1018 net_uptime_ms(void)
1019 {
1020 	if (_net_uptime_ms == 0) {
1021 		net_update_uptime();
1022 	}
1023 
1024 	return _net_uptime_ms;
1025 }
1026 
1027 void
domain_proto_mtx_lock_assert_held(void)1028 domain_proto_mtx_lock_assert_held(void)
1029 {
1030 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1031 }
1032 
1033 void
domain_proto_mtx_lock_assert_notheld(void)1034 domain_proto_mtx_lock_assert_notheld(void)
1035 {
1036 	LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1037 }
1038 
1039 domain_guard_t
domain_guard_deploy(void)1040 domain_guard_deploy(void)
1041 {
1042 	net_thread_marks_t marks __single;
1043 
1044 	marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN);
1045 	if (marks != net_thread_marks_none) {
1046 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1047 		lck_mtx_lock(&domain_proto_mtx);
1048 	} else {
1049 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1050 	}
1051 
1052 	return (domain_guard_t)(const void*)marks;
1053 }
1054 
1055 void
domain_guard_release(domain_guard_t guard)1056 domain_guard_release(domain_guard_t guard)
1057 {
1058 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)guard;
1059 
1060 	if (marks != net_thread_marks_none) {
1061 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1062 		lck_mtx_unlock(&domain_proto_mtx);
1063 		net_thread_marks_pop(marks);
1064 	} else {
1065 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1066 	}
1067 }
1068 
1069 domain_unguard_t
domain_unguard_deploy(void)1070 domain_unguard_deploy(void)
1071 {
1072 	net_thread_marks_t marks __single;
1073 
1074 	marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN);
1075 	if (marks != net_thread_marks_none) {
1076 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1077 		lck_mtx_unlock(&domain_proto_mtx);
1078 	} else {
1079 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1080 	}
1081 
1082 	return (domain_unguard_t)(const void*)marks;
1083 }
1084 
1085 void
domain_unguard_release(domain_unguard_t unguard)1086 domain_unguard_release(domain_unguard_t unguard)
1087 {
1088 	net_thread_marks_t marks __single = (net_thread_marks_t)(const void*)unguard;
1089 
1090 	if (marks != net_thread_marks_none) {
1091 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED);
1092 		lck_mtx_lock(&domain_proto_mtx);
1093 		net_thread_unmarks_pop(marks);
1094 	} else {
1095 		LCK_MTX_ASSERT(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
1096 	}
1097 }
1098 
1099 #if SKYWALK
1100 /* The following is used to enqueue work items for interface events */
1101 struct protoctl_event {
1102 	struct ifnet *ifp;
1103 	union sockaddr_in_4_6 laddr;
1104 	union sockaddr_in_4_6 raddr;
1105 	uint32_t protoctl_event_code;
1106 	struct protoctl_ev_val val;
1107 	uint16_t lport;
1108 	uint16_t rport;
1109 	uint8_t protocol;
1110 };
1111 
1112 struct protoctl_event_nwk_wq_entry {
1113 	struct nwk_wq_entry nwk_wqe;
1114 	struct protoctl_event protoctl_ev_arg;
1115 };
1116 
1117 static void
protoctl_event_callback(struct nwk_wq_entry * nwk_item)1118 protoctl_event_callback(struct nwk_wq_entry *nwk_item)
1119 {
1120 	struct protoctl_event_nwk_wq_entry *p_ev __single = NULL;
1121 
1122 	p_ev = __unsafe_forge_single(struct protoctl_event_nwk_wq_entry *,
1123 	    __container_of(nwk_item, struct protoctl_event_nwk_wq_entry, nwk_wqe));
1124 
1125 	/* Call this before we walk the tree */
1126 	EVENTHANDLER_INVOKE(&protoctl_evhdlr_ctxt, protoctl_event,
1127 	    p_ev->protoctl_ev_arg.ifp, (struct sockaddr *)&(p_ev->protoctl_ev_arg.laddr),
1128 	    (struct sockaddr *)&(p_ev->protoctl_ev_arg.raddr),
1129 	    p_ev->protoctl_ev_arg.lport, p_ev->protoctl_ev_arg.rport,
1130 	    p_ev->protoctl_ev_arg.protocol, p_ev->protoctl_ev_arg.protoctl_event_code,
1131 	    &p_ev->protoctl_ev_arg.val);
1132 
1133 	kfree_type(struct protoctl_event_nwk_wq_entry, p_ev);
1134 }
1135 
1136 /* XXX Some PRC events needs extra verification like sequence number checking */
1137 void
protoctl_event_enqueue_nwk_wq_entry(struct ifnet * ifp,struct sockaddr * p_laddr,struct sockaddr * p_raddr,uint16_t lport,uint16_t rport,uint8_t protocol,uint32_t protoctl_event_code,struct protoctl_ev_val * p_protoctl_ev_val)1138 protoctl_event_enqueue_nwk_wq_entry(struct ifnet *ifp, struct sockaddr *p_laddr,
1139     struct sockaddr *p_raddr, uint16_t lport, uint16_t rport, uint8_t protocol,
1140     uint32_t protoctl_event_code, struct protoctl_ev_val *p_protoctl_ev_val)
1141 {
1142 	struct protoctl_event_nwk_wq_entry *p_protoctl_ev = NULL;
1143 
1144 	p_protoctl_ev = kalloc_type(struct protoctl_event_nwk_wq_entry,
1145 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
1146 
1147 	p_protoctl_ev->protoctl_ev_arg.ifp = ifp;
1148 
1149 	if (p_laddr != NULL) {
1150 		SOCKADDR_COPY(p_laddr,
1151 		    &p_protoctl_ev->protoctl_ev_arg.laddr);
1152 	}
1153 
1154 	if (p_raddr != NULL) {
1155 		SOCKADDR_COPY(p_raddr,
1156 		    &p_protoctl_ev->protoctl_ev_arg.raddr);
1157 	}
1158 
1159 	p_protoctl_ev->protoctl_ev_arg.lport = lport;
1160 	p_protoctl_ev->protoctl_ev_arg.rport = rport;
1161 	p_protoctl_ev->protoctl_ev_arg.protocol = protocol;
1162 	p_protoctl_ev->protoctl_ev_arg.protoctl_event_code = protoctl_event_code;
1163 
1164 	if (p_protoctl_ev_val != NULL) {
1165 		bcopy(p_protoctl_ev_val, &(p_protoctl_ev->protoctl_ev_arg.val),
1166 		    sizeof(*p_protoctl_ev_val));
1167 	}
1168 	p_protoctl_ev->nwk_wqe.func = protoctl_event_callback;
1169 
1170 	nwk_wq_enqueue(&p_protoctl_ev->nwk_wqe);
1171 }
1172 #endif /* SKYWALK */
1173 
1174 #if (DEVELOPMENT || DEBUG)
1175 
1176 static int
1177 sysctl_do_drain_domains SYSCTL_HANDLER_ARGS
1178 {
1179 #pragma unused(arg1, arg2)
1180 	int error;
1181 	int dummy = 0;
1182 
1183 	error = sysctl_handle_int(oidp, &dummy, 0, req);
1184 	if (error || req->newptr == USER_ADDR_NULL) {
1185 		return error;
1186 	}
1187 
1188 	net_drain_domains();
1189 
1190 	return 0;
1191 }
1192 
1193 #endif /* DEVELOPMENT || DEBUG */
1194