xref: /xnu-8019.80.24/bsd/skywalk/nexus/nexus.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <sys/sdt.h>
33 
34 static uint32_t disable_nxctl_check = 0;
35 #if (DEVELOPMENT || DEBUG)
36 SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
37     CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
38 #endif
39 
40 LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
41 LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
42 LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
43 LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
44 
45 static STAILQ_HEAD(, nxctl) nxctl_head =
46     STAILQ_HEAD_INITIALIZER(nxctl_head);
47 static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
48     STAILQ_HEAD_INITIALIZER(nxprov_head);
49 
50 static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
51 RB_HEAD(kern_nexus_tree, kern_nexus);
52 RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
53 RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
54 static struct kern_nexus_tree   nx_head;
55 
56 static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
57 static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
58 static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
59 static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
60 static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
61 static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
62 static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
63 static void nxctl_retain_locked(struct nxctl *);
64 static int nxctl_release_locked(struct nxctl *);
65 static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
66 static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
67 static void nxctl_free(struct nxctl *);
68 
69 static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
70     struct kern_nexus_domain_provider *, struct nxprov_reg *,
71     const struct kern_nexus_provider_init *init, int *);
72 static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
73 static void nxprov_retain_locked(struct kern_nexus_provider *);
74 static int nxprov_release_locked(struct kern_nexus_provider *);
75 static struct kern_nexus_provider *nxprov_alloc(
76 	struct kern_nexus_domain_provider *, zalloc_flags_t);
77 static void nxprov_free(struct kern_nexus_provider *);
78 
79 static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
80 static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
81 static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
82 static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
83 static struct kern_nexus *nx_alloc(zalloc_flags_t);
84 static void nx_free(struct kern_nexus *);
85 
86 static ZONE_DECLARE(nxctl_zone, SKMEM_ZONE_PREFIX ".nx.ctl",
87     sizeof(struct nxctl), ZC_ZFREE_CLEARMEM);
88 
89 static ZONE_DECLARE(nxbind_zone, SKMEM_ZONE_PREFIX ".nx.bind",
90     sizeof(struct nxbind), ZC_ZFREE_CLEARMEM);
91 
92 static ZONE_DECLARE(nxprov_zone, SKMEM_ZONE_PREFIX ".nx.kern.prov",
93     sizeof(struct kern_nexus_provider), ZC_ZFREE_CLEARMEM);
94 
95 static ZONE_DECLARE(nxprov_params_zone, SKMEM_ZONE_PREFIX ".nx.kern.prov.params",
96     sizeof(struct nxprov_params), ZC_ZFREE_CLEARMEM);
97 
98 static ZONE_DECLARE(nx_zone, SKMEM_ZONE_PREFIX ".nx",
99     sizeof(struct kern_nexus), ZC_ZFREE_CLEARMEM);
100 
101 static int __nx_inited = 0;
102 
103 #define SKMEM_TAG_NX_KEY        "com.apple.skywalk.nexus.key"
104 kern_allocation_name_t skmem_tag_nx_key;
105 
106 #define SKMEM_TAG_NX_MIB        "com.apple.skywalk.nexus.mib"
107 static kern_allocation_name_t skmem_tag_nx_mib;
108 
109 #define SKMEM_TAG_NX_PORT        "com.apple.skywalk.nexus.port"
110 kern_allocation_name_t skmem_tag_nx_port;
111 
112 #define SKMEM_TAG_NX_PORT_INFO        "com.apple.skywalk.nexus.port.info"
113 kern_allocation_name_t skmem_tag_nx_port_info;
114 
115 /*
116  * Special nexus controller handle for Skywalk internal use.  Unlike all
117  * other nexus controller handles that are created by userland or kernel
118  * clients, this one never gets closed or freed.  It is also not part of
119  * the global nxctl_head list.
120  */
121 static struct nxctl _kernnxctl;
122 struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
123 
124 int
nexus_init(void)125 nexus_init(void)
126 {
127 	SK_LOCK_ASSERT_HELD();
128 	ASSERT(!__nx_inited);
129 
130 	RB_INIT(&nx_head);
131 
132 	na_init();
133 
134 	/* attach system built-in domains and domain providers */
135 	nxdom_attach_all();
136 
137 	/*
138 	 * Initialize private kernel nexus controller handle; this is used
139 	 * internally for creating nexus providers and nexus instances from
140 	 * within the Skywalk code (e.g. netif_compat).
141 	 */
142 	nxctl_init(&_kernnxctl, kernproc, NULL);
143 	nxctl_retain_locked(&_kernnxctl);       /* one for us */
144 
145 	ASSERT(skmem_tag_nx_key == NULL);
146 	skmem_tag_nx_key = kern_allocation_name_allocate(SKMEM_TAG_NX_KEY, 0);
147 	ASSERT(skmem_tag_nx_key != NULL);
148 
149 	ASSERT(skmem_tag_nx_mib == NULL);
150 	skmem_tag_nx_mib = kern_allocation_name_allocate(SKMEM_TAG_NX_MIB, 0);
151 	ASSERT(skmem_tag_nx_mib != NULL);
152 
153 	ASSERT(skmem_tag_nx_port == NULL);
154 	skmem_tag_nx_port = kern_allocation_name_allocate(SKMEM_TAG_NX_PORT, 0);
155 	ASSERT(skmem_tag_nx_port != NULL);
156 
157 	ASSERT(skmem_tag_nx_port_info == NULL);
158 	skmem_tag_nx_port_info = kern_allocation_name_allocate(
159 		SKMEM_TAG_NX_PORT_INFO, 0);
160 	ASSERT(skmem_tag_nx_port_info != NULL);
161 
162 	__nx_inited = 1;
163 
164 	return 0;
165 }
166 
167 void
nexus_fini(void)168 nexus_fini(void)
169 {
170 	SK_LOCK_ASSERT_HELD();
171 
172 	if (__nx_inited) {
173 		nxctl_release_locked(&_kernnxctl);
174 
175 		/* tell all domains they're going away */
176 		nxdom_detach_all();
177 
178 		ASSERT(RB_EMPTY(&nx_head));
179 
180 		if (skmem_tag_nx_key != NULL) {
181 			kern_allocation_name_release(skmem_tag_nx_key);
182 			skmem_tag_nx_key = NULL;
183 		}
184 		if (skmem_tag_nx_mib != NULL) {
185 			kern_allocation_name_release(skmem_tag_nx_mib);
186 			skmem_tag_nx_mib = NULL;
187 		}
188 		if (skmem_tag_nx_port != NULL) {
189 			kern_allocation_name_release(skmem_tag_nx_port);
190 			skmem_tag_nx_port = NULL;
191 		}
192 		if (skmem_tag_nx_port_info != NULL) {
193 			kern_allocation_name_release(skmem_tag_nx_port_info);
194 			skmem_tag_nx_port_info = NULL;
195 		}
196 		na_fini();
197 
198 		__nx_inited = 0;
199 	}
200 }
201 
202 struct nxctl *
nxctl_create(struct proc * p,struct fileproc * fp,const uuid_t nxctl_uuid,int * err)203 nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
204     int *err)
205 {
206 	struct nxctl *nxctl = NULL;
207 
208 	ASSERT(!uuid_is_null(nxctl_uuid));
209 
210 	/* privilege checks would be done when performing nxctl operations */
211 
212 	SK_LOCK();
213 
214 	nxctl = nxctl_alloc(p, fp, Z_WAITOK);
215 
216 	STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
217 	nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
218 	uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
219 
220 	nxctl_retain_locked(nxctl);     /* one for being in the list */
221 	nxctl_retain_locked(nxctl);     /* one for the caller */
222 
223 #if SK_LOG
224 	uuid_string_t uuidstr;
225 	SK_D("nxctl 0x%llx UUID %s", SK_KVA(nxctl),
226 	    sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
227 #endif /* SK_LOG */
228 
229 	SK_UNLOCK();
230 
231 	if (*err != 0) {
232 		nxctl_free(nxctl);
233 		nxctl = NULL;
234 	}
235 	return nxctl;
236 }
237 
238 void
nxctl_close(struct nxctl * nxctl)239 nxctl_close(struct nxctl *nxctl)
240 {
241 	struct kern_nexus_provider *nxprov = NULL, *tnxprov;
242 
243 	lck_mtx_lock(&nxctl->nxctl_lock);
244 	SK_LOCK();
245 
246 	ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
247 
248 #if SK_LOG
249 	uuid_string_t uuidstr;
250 	SK_D("nxctl 0x%llx UUID %s flags 0x%b", SK_KVA(nxctl),
251 	    sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
252 	    nxctl->nxctl_flags, NEXUSCTLF_BITS);
253 #endif /* SK_LOG */
254 
255 	if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
256 		nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
257 		nxctl->nxctl_fp = NULL;
258 	}
259 
260 	/* may be called as part of failure cleanup, so check */
261 	if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
262 		/* caller must hold an extra ref */
263 		ASSERT(nxctl->nxctl_refcnt > 1);
264 		(void) nxctl_release_locked(nxctl);
265 
266 		STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
267 		nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
268 	}
269 
270 repeat:
271 	STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
272 		/*
273 		 * Close provider only for those which are owned by
274 		 * this control instance.  Note that if we close the
275 		 * provider, we need to repeat this search as the
276 		 * list might have been changed by another thread.
277 		 * That's possible since SK_UNLOCK() may be called
278 		 * as a result of calling nxprov_close().
279 		 */
280 		if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
281 		    nxprov->nxprov_ctl == nxctl) {
282 			nxprov_retain_locked(nxprov);
283 			(void) nxprov_close(nxprov, TRUE);
284 			(void) nxprov_release_locked(nxprov);
285 			goto repeat;
286 		}
287 	}
288 
289 	SK_UNLOCK();
290 	lck_mtx_unlock(&nxctl->nxctl_lock);
291 }
292 
293 int
nxctl_set_opt(struct nxctl * nxctl,struct sockopt * sopt)294 nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
295 {
296 #pragma unused(nxctl)
297 	int err = 0;
298 
299 	NXCTL_LOCK_ASSERT_HELD(nxctl);
300 
301 	if (sopt->sopt_dir != SOPT_SET) {
302 		sopt->sopt_dir = SOPT_SET;
303 	}
304 
305 	switch (sopt->sopt_name) {
306 	case NXOPT_NEXUS_BIND:
307 		err = nxctl_nexus_bind(nxctl, sopt);
308 		break;
309 
310 	case NXOPT_NEXUS_UNBIND:
311 		err = nxctl_nexus_unbind(nxctl, sopt);
312 		break;
313 
314 	case NXOPT_NEXUS_CONFIG:
315 		err = nxctl_nexus_config(nxctl, sopt);
316 		break;
317 
318 	default:
319 		err = ENOPROTOOPT;
320 		break;
321 	}
322 
323 	return err;
324 }
325 
326 int
nxctl_get_opt(struct nxctl * nxctl,struct sockopt * sopt)327 nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
328 {
329 #pragma unused(nxctl)
330 	int err = 0;
331 
332 	NXCTL_LOCK_ASSERT_HELD(nxctl);
333 
334 	if (sopt->sopt_dir != SOPT_GET) {
335 		sopt->sopt_dir = SOPT_GET;
336 	}
337 
338 	switch (sopt->sopt_name) {
339 	case NXOPT_NEXUS_PROV_LIST:
340 		err = nxctl_get_nexus_prov_list(nxctl, sopt);
341 		break;
342 
343 	case NXOPT_NEXUS_PROV_ENTRY:
344 		err = nxctl_get_nexus_prov_entry(nxctl, sopt);
345 		break;
346 
347 	case NXOPT_NEXUS_LIST:
348 		err = nxctl_get_nexus_list(nxctl, sopt);
349 		break;
350 
351 	case NXOPT_CHANNEL_LIST:
352 		err = nxctl_get_channel_list(nxctl, sopt);
353 		break;
354 
355 	default:
356 		err = ENOPROTOOPT;
357 		break;
358 	}
359 
360 	return err;
361 }
362 
363 /* Upper bound on # of nrl_num_regs that we'd return to user space */
364 #define MAX_NUM_REG_ENTRIES     256
365 
366 /* Hoisted out of line to reduce kernel stack footprint */
367 SK_NO_INLINE_ATTRIBUTE
368 static int
nxctl_get_nexus_prov_list(struct nxctl * nxctl,struct sockopt * sopt)369 nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
370 {
371 	user_addr_t tmp_ptr = USER_ADDR_NULL;
372 	struct nxprov_reg_ent *pnre, *nres = NULL;
373 	struct nxprov_list_req nrlr;
374 	struct kern_nexus_provider *nxprov = NULL;
375 	uint32_t nregs = 0, ncregs = 0;
376 	int err = 0, observeall;
377 	size_t nres_sz;
378 
379 	NXCTL_LOCK_ASSERT_HELD(nxctl);
380 
381 	ASSERT(sopt->sopt_p != NULL);
382 	if (sopt->sopt_val == USER_ADDR_NULL) {
383 		return EINVAL;
384 	}
385 
386 	err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
387 	if (err != 0) {
388 		return err;
389 	}
390 
391 	if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
392 		nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
393 	}
394 
395 	/*
396 	 * If the caller specified a buffer, copy out the Nexus provider
397 	 * entries to caller gracefully.  We only copy out the number of
398 	 * entries which caller has asked for, but we always tell caller
399 	 * how big the buffer really needs to be.
400 	 */
401 	tmp_ptr = nrlr.nrl_regs;
402 	if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
403 		nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
404 		nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
405 		if (__improbable(nres == NULL)) {
406 			return ENOBUFS;
407 		}
408 	}
409 
410 	observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
411 	    PRIV_SKYWALK_OBSERVE_ALL) == 0);
412 
413 	SK_LOCK();
414 	/*
415 	 * Count number of providers.  If buffer space exists and
416 	 * remains, copy out provider entries.
417 	 */
418 	nregs = nrlr.nrl_num_regs;
419 	pnre = nres;
420 
421 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
422 		/*
423 		 * Return only entries that are visible to the caller,
424 		 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
425 		 */
426 		if (nxprov->nxprov_ctl != nxctl && !observeall) {
427 			continue;
428 		}
429 
430 		if (nres != NULL && nregs > 0) {
431 			uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
432 			bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
433 			    sizeof(struct nxprov_params));
434 			--nregs;
435 			++pnre;
436 			++ncregs;
437 		}
438 	}
439 	SK_UNLOCK();
440 
441 	if (ncregs == 0) {
442 		err = ENOENT;
443 	}
444 
445 	if (nres != NULL) {
446 		if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
447 			if (sopt->sopt_p != kernproc) {
448 				err = copyout(nres, tmp_ptr,
449 				    ncregs * sizeof(*nres));
450 			} else {
451 				bcopy(nres, CAST_DOWN(caddr_t, tmp_ptr),
452 				    ncregs * sizeof(*nres));
453 			}
454 		}
455 		sk_free_data(nres, nres_sz);
456 		nres = NULL;
457 	}
458 
459 	if (err == 0) {
460 		nrlr.nrl_num_regs = ncregs;
461 		err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
462 	}
463 
464 	return err;
465 }
466 
467 /* Hoisted out of line to reduce kernel stack footprint */
468 SK_NO_INLINE_ATTRIBUTE
469 static int
nxctl_get_nexus_prov_entry(struct nxctl * nxctl,struct sockopt * sopt)470 nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
471 {
472 	struct nxprov_reg_ent nre;
473 	struct kern_nexus_provider *nxprov = NULL;
474 	int err = 0;
475 
476 	NXCTL_LOCK_ASSERT_HELD(nxctl);
477 
478 	ASSERT(sopt->sopt_p != NULL);
479 	if (sopt->sopt_val == USER_ADDR_NULL) {
480 		return EINVAL;
481 	}
482 
483 	bzero(&nre, sizeof(nre));
484 	err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
485 	if (err != 0) {
486 		return err;
487 	}
488 
489 	if (uuid_is_null(nre.npre_prov_uuid)) {
490 		return EINVAL;
491 	}
492 
493 	SK_LOCK();
494 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
495 		if (uuid_compare(nxprov->nxprov_uuid,
496 		    nre.npre_prov_uuid) == 0) {
497 			/*
498 			 * Return only entries that are visible to the caller,
499 			 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
500 			 */
501 			if (nxprov->nxprov_ctl != nxctl) {
502 				if (skywalk_priv_check_cred(sopt->sopt_p,
503 				    nxctl->nxctl_cred,
504 				    PRIV_SKYWALK_OBSERVE_ALL) != 0) {
505 					nxprov = NULL;
506 					break;
507 				}
508 			}
509 
510 			bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
511 			    sizeof(struct nxprov_params));
512 			break;
513 		}
514 	}
515 	SK_UNLOCK();
516 
517 	if (nxprov != NULL) {
518 		err = sooptcopyout(sopt, &nre, sizeof(nre));
519 	} else {
520 		err = ENOENT;
521 	}
522 
523 	return err;
524 }
525 
526 /* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
527 #define MAX_NUM_NX_UUIDS        4096
528 
529 /* Hoisted out of line to reduce kernel stack footprint */
530 SK_NO_INLINE_ATTRIBUTE
531 static int
nxctl_get_nexus_list(struct nxctl * nxctl,struct sockopt * sopt)532 nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
533 {
534 	user_addr_t tmp_ptr = USER_ADDR_NULL;
535 	uint32_t nuuids = 0, ncuuids = 0;
536 	uuid_t *puuid, *uuids = NULL;
537 	size_t uuids_sz;
538 	struct nx_list_req nlr;
539 	struct kern_nexus_provider *nxprov = NULL;
540 	struct kern_nexus *nx = NULL;
541 	int err = 0, observeall;
542 
543 	NXCTL_LOCK_ASSERT_HELD(nxctl);
544 
545 	ASSERT(sopt->sopt_p != NULL);
546 	if (sopt->sopt_val == USER_ADDR_NULL) {
547 		return EINVAL;
548 	}
549 
550 	err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
551 	if (err != 0) {
552 		return err;
553 	}
554 
555 	if (uuid_is_null(nlr.nl_prov_uuid)) {
556 		return EINVAL;
557 	} else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
558 		nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
559 	}
560 
561 	/*
562 	 * If the caller specified a buffer, copy out the Nexus UUIDs to
563 	 * caller gracefully.  We only copy out the number of UUIDs which
564 	 * caller has asked for, but we always tell caller how big the
565 	 * buffer really needs to be.
566 	 */
567 	tmp_ptr = nlr.nl_nx_uuids;
568 	if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
569 		uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
570 		uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
571 		if (__improbable(uuids == NULL)) {
572 			return ENOBUFS;
573 		}
574 	}
575 
576 	observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
577 	    PRIV_SKYWALK_OBSERVE_ALL) == 0);
578 
579 	SK_LOCK();
580 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
581 		/*
582 		 * Return only entries that are visible to the caller,
583 		 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
584 		 */
585 		if (nxprov->nxprov_ctl != nxctl && !observeall) {
586 			continue;
587 		}
588 
589 		if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
590 			break;
591 		}
592 	}
593 
594 	if (nxprov != NULL) {
595 		/*
596 		 * Count number of Nexus.  If buffer space exists
597 		 * and remains, copy out the Nexus UUIDs.
598 		 */
599 		nuuids = nlr.nl_num_nx_uuids;
600 		puuid = uuids;
601 
602 		STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
603 			++ncuuids;
604 			if (uuids != NULL && nuuids > 0) {
605 				uuid_copy(*puuid, nx->nx_uuid);
606 				--nuuids;
607 				++puuid;
608 			}
609 		}
610 	} else {
611 		err = ENOENT;
612 	}
613 	SK_UNLOCK();
614 
615 	if (uuids != NULL) {
616 		if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
617 			uintptr_t cnt_uuid;
618 
619 			/* Note: Pointer arithmetic */
620 			cnt_uuid = (uintptr_t)(puuid - uuids);
621 			if (cnt_uuid > 0) {
622 				if (sopt->sopt_p != kernproc) {
623 					err = copyout(uuids, tmp_ptr,
624 					    cnt_uuid * sizeof(uuid_t));
625 				} else {
626 					bcopy(uuids,
627 					    CAST_DOWN(caddr_t, tmp_ptr),
628 					    cnt_uuid * sizeof(uuid_t));
629 				}
630 			}
631 		}
632 		sk_free_data(uuids, uuids_sz);
633 		uuids = NULL;
634 	}
635 
636 	if (err == 0) {
637 		nlr.nl_num_nx_uuids = ncuuids;
638 		err = sooptcopyout(sopt, &nlr, sizeof(nlr));
639 	}
640 
641 	return err;
642 }
643 
644 /* Hoisted out of line to reduce kernel stack footprint */
645 SK_NO_INLINE_ATTRIBUTE
646 static int
nxctl_nexus_bind(struct nxctl * nxctl,struct sockopt * sopt)647 nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
648 {
649 	boolean_t m_pid, m_exec_uuid, m_key;
650 	struct nx_bind_req nbr;
651 	struct proc *p = PROC_NULL;
652 	struct nxbind *nxb = NULL;
653 	uint64_t p_uniqueid = -1;
654 	pid_t p_pid = -1;
655 	struct kern_nexus *nx = NULL;
656 #if SK_LOG
657 	uuid_string_t exec_uuidstr;
658 #endif /* SK_LOG */
659 	uuid_t p_uuid;
660 	void *key = NULL;
661 	int err = 0;
662 
663 	NXCTL_LOCK_ASSERT_HELD(nxctl);
664 
665 	if (sopt->sopt_val == USER_ADDR_NULL) {
666 		return EINVAL;
667 	}
668 
669 	uuid_clear(p_uuid);
670 	bzero(&nbr, sizeof(nbr));
671 	err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
672 	if (err != 0) {
673 		return err;
674 	}
675 
676 	if (uuid_is_null(nbr.nb_nx_uuid)) {
677 		err = EINVAL;
678 		goto done_unlocked;
679 	}
680 
681 	nbr.nb_flags &= NBR_MATCH_MASK;
682 	if (nbr.nb_flags == 0) {
683 		/* must choose one of the match criteria */
684 		err = EINVAL;
685 		goto done_unlocked;
686 	}
687 	m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
688 	m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
689 	m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
690 
691 	if (m_pid || m_exec_uuid) {
692 		/*
693 		 * Validate process ID.  A valid PID is needed when we're
694 		 * asked to match by PID, or if asked to match by executable
695 		 * UUID with a NULL nb_exec_uuid supplied.  The latter is
696 		 * to support the case when a userland Nexus provider isn't
697 		 * able to acquire its client's executable UUID, but is
698 		 * able to identify it via PID.
699 		 */
700 		if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
701 		    (p = proc_find(nbr.nb_pid)) == PROC_NULL) {
702 			err = ESRCH;
703 			goto done_unlocked;
704 		}
705 		/* exclude kernel from the match criteria */
706 		if (p == kernproc) {
707 			err = EACCES;
708 			goto done_unlocked;
709 		} else if (p != PROC_NULL) {
710 			proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
711 			p_uniqueid = proc_uniqueid(p);
712 			p_pid = proc_pid(p);
713 		} else {
714 			uuid_copy(p_uuid, nbr.nb_exec_uuid);
715 		}
716 	}
717 
718 	if (m_key) {
719 		if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
720 		    nbr.nb_key == USER_ADDR_NULL) {
721 			err = EINVAL;
722 			goto done_unlocked;
723 		}
724 
725 		key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
726 		if (__improbable(key == NULL)) {
727 			err = ENOMEM;
728 			goto done_unlocked;
729 		}
730 
731 		if (sopt->sopt_p != kernproc) {
732 			err = copyin(nbr.nb_key, key, nbr.nb_key_len);
733 			if (err != 0) {
734 				goto done_unlocked;
735 			}
736 		} else {
737 			bcopy((void *)nbr.nb_key, key, nbr.nb_key_len);
738 		}
739 	}
740 
741 	SK_LOCK();
742 	nx = nx_find(nbr.nb_nx_uuid, TRUE);
743 	if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
744 	    nxctl != &_kernnxctl)) {    /* make exception for kernnxctl */
745 		err = ENOENT;
746 		goto done;
747 	}
748 
749 	/* bind isn't applicable on anonymous nexus provider */
750 	if (NX_ANONYMOUS_PROV(nx)) {
751 		err = ENXIO;
752 		goto done;
753 	}
754 
755 	/* port must be within the domain's range */
756 	if (nbr.nb_port != NEXUS_PORT_ANY &&
757 	    nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
758 		err = EDOM;
759 		goto done;
760 	} else if (nbr.nb_port == NEXUS_PORT_ANY) {
761 		/* for now, this is allowed only for kernel clients */
762 		if (sopt->sopt_p != kernproc) {
763 			err = EPERM;
764 			goto done;
765 		}
766 	}
767 
768 	nxb = nxb_alloc(Z_WAITOK);
769 
770 	if (m_pid) {
771 		nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
772 		nxb->nxb_uniqueid = p_uniqueid;
773 		nxb->nxb_pid = p_pid;
774 	}
775 	if (m_exec_uuid) {
776 		nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
777 		ASSERT(!uuid_is_null(p_uuid));
778 		uuid_copy(nxb->nxb_exec_uuid, p_uuid);
779 	}
780 	if (m_key) {
781 		nxb->nxb_flags |= NXBF_MATCH_KEY;
782 		ASSERT(key != NULL);
783 		nxb->nxb_key = key;
784 		key = NULL;     /* let nxb_free() free it */
785 		ASSERT(nbr.nb_key_len != 0 &&
786 		    nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
787 		nxb->nxb_key_len = nbr.nb_key_len;
788 	}
789 
790 	/*
791 	 * Bind the creds to the nexus port.  If client doesn't have a port,
792 	 * find one, claim it, and associate the creds to it.  Upon success,
793 	 * the nexus may move the nxbind contents (including the key) to
794 	 * its own nxbind instance; in that case, nxb_free() below will not
795 	 * be freeing the key within.
796 	 */
797 	err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
798 	if (err != 0) {
799 		goto done;
800 	}
801 
802 	ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
803 	(void) sooptcopyout(sopt, &nbr, sizeof(nbr));
804 
805 	SK_D("nexus 0x%llx nxb 0x%llx port %u flags 0x%b pid %d "
806 	    "(uniqueid %llu) exec_uuid %s key 0x%llx key_len %u",
807 	    SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
808 	    NXBF_BITS, nxb->nxb_pid, nxb->nxb_uniqueid,
809 	    sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
810 	    (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
811 	    nxb->nxb_key_len);
812 
813 done:
814 	if (nx != NULL) {
815 		(void) nx_release_locked(nx);
816 		nx = NULL;
817 	}
818 	SK_UNLOCK();
819 
820 done_unlocked:
821 	ASSERT(nx == NULL);
822 
823 	if (nxb != NULL) {
824 		nxb_free(nxb);
825 		nxb = NULL;
826 	}
827 	if (key != NULL) {
828 		sk_free_data(key, nbr.nb_key_len);
829 		key = NULL;
830 	}
831 	if (p != PROC_NULL) {
832 		proc_rele(p);
833 	}
834 
835 	return err;
836 }
837 
838 /* Hoisted out of line to reduce kernel stack footprint */
839 SK_NO_INLINE_ATTRIBUTE
840 static int
nxctl_nexus_unbind(struct nxctl * nxctl,struct sockopt * sopt)841 nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
842 {
843 	struct nx_unbind_req nur;
844 	struct kern_nexus *nx = NULL;
845 	int err = 0;
846 
847 	NXCTL_LOCK_ASSERT_HELD(nxctl);
848 
849 	if (sopt->sopt_val == USER_ADDR_NULL) {
850 		return EINVAL;
851 	}
852 
853 	bzero(&nur, sizeof(nur));
854 	err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
855 	if (err != 0) {
856 		return err;
857 	}
858 
859 	if (uuid_is_null(nur.nu_nx_uuid)) {
860 		return EINVAL;
861 	}
862 
863 	SK_LOCK();
864 	nx = nx_find(nur.nu_nx_uuid, TRUE);
865 	if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
866 	    nxctl != &_kernnxctl)) {    /* make exception for kernnxctl */
867 		err = ENOENT;
868 		goto done;
869 	}
870 
871 	/* unbind isn't applicable on anonymous nexus provider */
872 	if (NX_ANONYMOUS_PROV(nx)) {
873 		err = ENXIO;
874 		goto done;
875 	}
876 
877 	if (nur.nu_port == NEXUS_PORT_ANY) {
878 		err = EINVAL;
879 		goto done;
880 	}
881 
882 	err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
883 
884 done:
885 	if (nx != NULL) {
886 		(void) nx_release_locked(nx);
887 		nx = NULL;
888 	}
889 	SK_UNLOCK();
890 
891 	return err;
892 }
893 
894 /* Hoisted out of line to reduce kernel stack footprint */
895 SK_NO_INLINE_ATTRIBUTE
896 static int
nxctl_nexus_config(struct nxctl * nxctl,struct sockopt * sopt)897 nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
898 {
899 	struct kern_nexus *nx = NULL;
900 	struct nx_cfg_req ncr;
901 	int err = 0;
902 
903 	NXCTL_LOCK_ASSERT_HELD(nxctl);
904 
905 	if (sopt->sopt_val == USER_ADDR_NULL) {
906 		return EINVAL;
907 	}
908 
909 	bzero(&ncr, sizeof(ncr));
910 	err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
911 	if (err != 0) {
912 		return err;
913 	}
914 
915 	if (uuid_is_null(ncr.nc_nx_uuid)) {
916 		return EINVAL;
917 	}
918 
919 	SK_LOCK();
920 	nx = nx_find(ncr.nc_nx_uuid, TRUE);
921 	if (nx == NULL || (disable_nxctl_check == 0 &&
922 	    nx->nx_prov->nxprov_ctl != nxctl &&
923 	    nxctl != &_kernnxctl)) {    /* make exception for kernnxctl */
924 		err = ENOENT;
925 		goto done;
926 	}
927 
928 	if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
929 		err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
930 		    nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
931 	} else {
932 		err = EPERM;
933 	}
934 
935 	if (err == 0) {
936 		(void) sooptcopyout(sopt, &ncr, sizeof(ncr));
937 	}
938 done:
939 	if (nx != NULL) {
940 		(void) nx_release_locked(nx);
941 		nx = NULL;
942 	}
943 	SK_UNLOCK();
944 
945 	return err;
946 }
947 
948 struct nxbind *
nxb_alloc(zalloc_flags_t how)949 nxb_alloc(zalloc_flags_t how)
950 {
951 	struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
952 
953 	if (nxb) {
954 		SK_DF(SK_VERB_MEM, "nxb 0x%llx ALLOC", SK_KVA(nxb));
955 	}
956 	return nxb;
957 }
958 
959 void
nxb_free(struct nxbind * nxb)960 nxb_free(struct nxbind *nxb)
961 {
962 	SK_DF(SK_VERB_MEM, "nxb 0x%llx key 0x%llx FREE", SK_KVA(nxb),
963 	    (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
964 
965 	if (nxb->nxb_key != NULL) {
966 		sk_free_data(nxb->nxb_key, nxb->nxb_key_len);
967 		nxb->nxb_key = NULL;
968 	}
969 	zfree(nxbind_zone, nxb);
970 }
971 
972 /*
973  * nxb0 is assumed to possess the truth, compare nxb1 against it.
974  */
975 boolean_t
nxb_is_equal(struct nxbind * nxb0,struct nxbind * nxb1)976 nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
977 {
978 	ASSERT(nxb0 != NULL && nxb1 != NULL);
979 	ASSERT(nxb0 != nxb1);
980 
981 	/* we always compare using uniqueid and not pid */
982 	if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
983 	    nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
984 		return FALSE;
985 	}
986 
987 	if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
988 	    uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
989 		return FALSE;
990 	}
991 
992 	ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
993 	    (nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
994 
995 	if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
996 	    (nxb0->nxb_key_len != nxb1->nxb_key_len ||
997 	    nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
998 	    nxb1->nxb_key_len) != 0)) {
999 		return FALSE;
1000 	}
1001 
1002 	return TRUE;
1003 }
1004 
1005 void
nxb_move(struct nxbind * snxb,struct nxbind * dnxb)1006 nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
1007 {
1008 	ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
1009 	    (snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
1010 
1011 	/* in case the destination has a key attached, free it first */
1012 	if (dnxb->nxb_key != NULL) {
1013 		sk_free_data(dnxb->nxb_key, dnxb->nxb_key_len);
1014 		dnxb->nxb_key = NULL;
1015 	}
1016 
1017 	/* move everything from src to dst, and then wipe out src */
1018 	bcopy(snxb, dnxb, sizeof(*dnxb));
1019 	bzero(snxb, sizeof(*snxb));
1020 }
1021 
1022 /* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
1023 #define MAX_NUM_CH_UUIDS        4096
1024 
1025 /* Hoisted out of line to reduce kernel stack footprint */
1026 SK_NO_INLINE_ATTRIBUTE
1027 static int
nxctl_get_channel_list(struct nxctl * nxctl,struct sockopt * sopt)1028 nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
1029 {
1030 	user_addr_t tmp_ptr = USER_ADDR_NULL;
1031 	uint32_t nuuids = 0, ncuuids = 0;
1032 	uuid_t *puuid, *uuids = NULL;
1033 	size_t uuids_sz;
1034 	struct ch_list_req clr;
1035 	struct kern_channel *ch = NULL;
1036 	struct kern_nexus *nx = NULL;
1037 	struct kern_nexus find;
1038 	int err = 0, observeall;
1039 
1040 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1041 
1042 	ASSERT(sopt->sopt_p != NULL);
1043 	if (sopt->sopt_val == USER_ADDR_NULL) {
1044 		return EINVAL;
1045 	}
1046 
1047 	err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
1048 	if (err != 0) {
1049 		return err;
1050 	}
1051 
1052 	if (uuid_is_null(clr.cl_nx_uuid)) {
1053 		return EINVAL;
1054 	} else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
1055 		clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
1056 	}
1057 
1058 	/*
1059 	 * If the caller specified a buffer, copy out the Channel UUIDs to
1060 	 * caller gracefully.  We only copy out the number of UUIDs which
1061 	 * caller has asked for, but we always tell caller how big the
1062 	 * buffer really needs to be.
1063 	 */
1064 	tmp_ptr = clr.cl_ch_uuids;
1065 	if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
1066 		uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
1067 		uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
1068 		if (uuids == NULL) {
1069 			return ENOBUFS;
1070 		}
1071 	}
1072 
1073 	observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
1074 	    PRIV_SKYWALK_OBSERVE_ALL) == 0);
1075 
1076 	SK_LOCK();
1077 	uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
1078 	nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1079 	if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
1080 		/*
1081 		 * Return only entries that are visible to the caller,
1082 		 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
1083 		 */
1084 		nx = NULL;
1085 	}
1086 	if (nx != NULL) {
1087 		/*
1088 		 * Count number of Channels.  If buffer space exists
1089 		 * and remains, copy out the Channel UUIDs.
1090 		 */
1091 		nuuids = clr.cl_num_ch_uuids;
1092 		puuid = uuids;
1093 
1094 		STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
1095 			++ncuuids;
1096 			if (uuids != NULL && nuuids > 0) {
1097 				uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
1098 				--nuuids;
1099 				++puuid;
1100 			}
1101 		}
1102 	} else {
1103 		err = ENOENT;
1104 	}
1105 	SK_UNLOCK();
1106 
1107 	if (uuids != NULL) {
1108 		if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
1109 			uintptr_t cnt_uuid;
1110 
1111 			/* Note: Pointer arithmetic */
1112 			cnt_uuid = (uintptr_t)(puuid - uuids);
1113 			ASSERT(cnt_uuid > 0);
1114 
1115 			if (sopt->sopt_p != kernproc) {
1116 				err = copyout(uuids, tmp_ptr,
1117 				    cnt_uuid * sizeof(uuid_t));
1118 			} else {
1119 				bcopy(uuids, CAST_DOWN(caddr_t, tmp_ptr),
1120 				    cnt_uuid * sizeof(uuid_t));
1121 			}
1122 		}
1123 		sk_free_data(uuids, uuids_sz);
1124 		uuids = NULL;
1125 	}
1126 
1127 	if (err == 0) {
1128 		clr.cl_num_ch_uuids = ncuuids;
1129 		err = sooptcopyout(sopt, &clr, sizeof(clr));
1130 	}
1131 
1132 	return err;
1133 }
1134 
1135 static void
nxctl_init(struct nxctl * nxctl,struct proc * p,struct fileproc * fp)1136 nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
1137 {
1138 	uuid_t p_uuid;
1139 
1140 	bzero(nxctl, sizeof(*nxctl));
1141 
1142 	proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
1143 
1144 	lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
1145 	uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
1146 	nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
1147 	nxctl->nxctl_cred = kauth_cred_proc_ref(p);
1148 	nxctl->nxctl_fp = fp;
1149 	if (nxctl == &_kernnxctl) {
1150 		ASSERT(p == kernproc);
1151 		nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
1152 	}
1153 	if (fp == NULL) {
1154 		nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
1155 	}
1156 }
1157 
1158 static struct nxctl *
nxctl_alloc(struct proc * p,struct fileproc * fp,zalloc_flags_t how)1159 nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
1160 {
1161 	struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
1162 
1163 	if (nxctl != NULL) {
1164 		nxctl_init(nxctl, p, fp);
1165 	}
1166 	return nxctl;
1167 }
1168 
1169 static void
nxctl_free(struct nxctl * nxctl)1170 nxctl_free(struct nxctl *nxctl)
1171 {
1172 	ASSERT(nxctl->nxctl_refcnt == 0);
1173 	ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
1174 	kauth_cred_unref(&nxctl->nxctl_cred);
1175 	lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
1176 	SK_D("nxctl 0x%llx FREE", SK_KVA(nxctl));
1177 	if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
1178 		zfree(nxctl_zone, nxctl);
1179 	}
1180 }
1181 
1182 static void
nxctl_retain_locked(struct nxctl * nxctl)1183 nxctl_retain_locked(struct nxctl *nxctl)
1184 {
1185 	SK_LOCK_ASSERT_HELD();
1186 
1187 	nxctl->nxctl_refcnt++;
1188 	ASSERT(nxctl->nxctl_refcnt != 0);
1189 }
1190 
1191 void
nxctl_retain(struct nxctl * nxctl)1192 nxctl_retain(struct nxctl *nxctl)
1193 {
1194 	SK_LOCK();
1195 	nxctl_retain_locked(nxctl);
1196 	SK_UNLOCK();
1197 }
1198 
1199 static int
nxctl_release_locked(struct nxctl * nxctl)1200 nxctl_release_locked(struct nxctl *nxctl)
1201 {
1202 	int oldref = nxctl->nxctl_refcnt;
1203 
1204 	SK_LOCK_ASSERT_HELD();
1205 
1206 	ASSERT(nxctl->nxctl_refcnt != 0);
1207 	if (--nxctl->nxctl_refcnt == 0) {
1208 		nxctl_free(nxctl);
1209 	}
1210 
1211 	return oldref == 1;
1212 }
1213 
1214 int
nxctl_release(struct nxctl * nxctl)1215 nxctl_release(struct nxctl *nxctl)
1216 {
1217 	int lastref;
1218 
1219 	SK_LOCK();
1220 	lastref = nxctl_release_locked(nxctl);
1221 	SK_UNLOCK();
1222 
1223 	return lastref;
1224 }
1225 
1226 void
nxctl_dtor(void * arg)1227 nxctl_dtor(void *arg)
1228 {
1229 	struct nxctl *nxctl = arg;
1230 
1231 	nxctl_close(nxctl);
1232 	SK_LOCK();
1233 	(void) nxctl_release_locked(nxctl);
1234 	SK_UNLOCK();
1235 }
1236 
1237 int
nxprov_advise_connect(struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1238 nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
1239     struct proc *p)
1240 {
1241 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
1242 	int err = 0;
1243 
1244 	ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
1245 	ASSERT(ch->ch_ctx == NULL);
1246 
1247 	SK_LOCK_ASSERT_HELD();
1248 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1249 
1250 	/* monitor channels aren't externally visible/usable, so ignore */
1251 	if ((ch->ch_info->cinfo_ch_mode & CHMODE_MONITOR) ||
1252 	    (ch->ch_flags & CHANF_EXT_SKIP) ||
1253 	    (nxprov->nxprov_ext.nxpi_pre_connect == NULL &&
1254 	    nxprov->nxprov_ext.nxpi_connected == NULL)) {
1255 		return 0;
1256 	}
1257 
1258 	ch_retain_locked(ch);
1259 	lck_mtx_unlock(&ch->ch_lock);
1260 	SK_UNLOCK();
1261 	lck_mtx_lock(&ch->ch_lock);
1262 
1263 	err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
1264 	    ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
1265 	if (err != 0) {
1266 		SK_D("ch 0x%llx flags %b nx 0x%llx pre_connect "
1267 		    "error %d", SK_KVA(ch), ch->ch_flags,
1268 		    CHANF_BITS, SK_KVA(nx), err);
1269 		ch->ch_ctx = NULL;
1270 		goto done;
1271 	}
1272 	/*
1273 	 * Upon ring/slot init failure, this is cleared
1274 	 * by nxprov_advise_disconnect() below.
1275 	 */
1276 	atomic_bitset_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1277 	if (NXPROV_LLINK(nxprov)) {
1278 		err = nx_netif_llink_ext_init_default_queues(nx);
1279 	} else {
1280 		err = nx_init_rings(nx, ch);
1281 	}
1282 	if (err != 0) {
1283 		goto done;
1284 	}
1285 	ASSERT(err == 0);
1286 	ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
1287 	    CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
1288 
1289 	err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
1290 	if (err != 0) {
1291 		SK_D("ch 0x%llx flags %b nx 0x%llx connected error %d",
1292 		    SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx), err);
1293 		goto done;
1294 	}
1295 	atomic_bitset_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1296 	SK_D("ch 0x%llx flags %b nx 0x%llx connected",
1297 	    SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1298 
1299 
1300 done:
1301 	lck_mtx_unlock(&ch->ch_lock);
1302 	SK_LOCK();
1303 	lck_mtx_lock(&ch->ch_lock);
1304 	if ((err != 0) &&
1305 	    (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
1306 		nxprov_advise_disconnect(nx, ch);
1307 	}
1308 	/* caller is expected to hold one, in addition to ourselves */
1309 	VERIFY(ch->ch_refcnt >= 2);
1310 	ch_release_locked(ch);
1311 
1312 	return err;
1313 }
1314 
1315 void
nxprov_advise_disconnect(struct kern_nexus * nx,struct kern_channel * ch)1316 nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
1317 {
1318 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
1319 
1320 	SK_LOCK_ASSERT_HELD();
1321 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1322 
1323 	/* check as we might be called in the error handling path */
1324 	if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
1325 		ch_retain_locked(ch);
1326 		lck_mtx_unlock(&ch->ch_lock);
1327 		SK_UNLOCK();
1328 		lck_mtx_lock(&ch->ch_lock);
1329 
1330 		ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
1331 		if (ch->ch_flags & CHANF_EXT_CONNECTED) {
1332 			nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
1333 			atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1334 		}
1335 
1336 		/*
1337 		 * Inform the external domain provider that the rings
1338 		 * and slots for this channel are no longer valid.
1339 		 */
1340 		if (NXPROV_LLINK(nxprov)) {
1341 			nx_netif_llink_ext_fini_default_queues(nx);
1342 		} else {
1343 			nx_fini_rings(nx, ch);
1344 		}
1345 
1346 		ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
1347 		nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
1348 		atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1349 
1350 		SK_D("ch 0x%llx flags %b nx 0x%llx disconnected",
1351 		    SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1352 
1353 		/* We're done with this channel */
1354 		ch->ch_ctx = NULL;
1355 
1356 		lck_mtx_unlock(&ch->ch_lock);
1357 		SK_LOCK();
1358 		lck_mtx_lock(&ch->ch_lock);
1359 		/* caller is expected to hold one, in addition to ourselves */
1360 		VERIFY(ch->ch_refcnt >= 2);
1361 		ch_release_locked(ch);
1362 	}
1363 	ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
1364 	ASSERT(ch->ch_ctx == NULL);
1365 }
1366 
1367 static struct kern_nexus_provider *
nxprov_create_common(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1368 nxprov_create_common(struct nxctl *nxctl,
1369     struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1370     const struct kern_nexus_provider_init *init, int *err)
1371 {
1372 	struct skmem_region_params srp[SKMEM_REGIONS];
1373 	struct kern_nexus_provider *nxprov = NULL;
1374 	struct skmem_region_params *bsrp;
1375 	struct nxprov_params nxp;
1376 	uint32_t override = 0;
1377 	int i;
1378 
1379 	_CASSERT(sizeof(*init) == sizeof(nxprov->nxprov_ext));
1380 	_CASSERT(sizeof(*init) >=
1381 	    sizeof(struct kern_nexus_netif_provider_init));
1382 
1383 	SK_LOCK_ASSERT_HELD();
1384 	ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
1385 
1386 	/* process and validate provider parameters */
1387 	if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
1388 	    &nxp, srp, override)) != 0) {
1389 		goto done;
1390 	}
1391 
1392 	nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
1393 	ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
1394 
1395 	STAILQ_INIT(&nxprov->nxprov_nx_head);
1396 	STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
1397 	nxprov->nxprov_flags |= NXPROVF_ATTACHED;
1398 	nxprov->nxprov_ctl = nxctl;
1399 	uuid_generate_random(nxprov->nxprov_uuid);
1400 	bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
1401 
1402 	if (init != NULL) {
1403 		if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
1404 			ASSERT(NXPROV_LLINK(nxprov));
1405 			bcopy(init, &nxprov->nxprov_netif_ext,
1406 			    sizeof(nxprov->nxprov_netif_ext));
1407 		} else {
1408 			ASSERT(!NXPROV_LLINK(nxprov));
1409 			ASSERT(init->nxpi_version ==
1410 			    KERN_NEXUS_PROVIDER_CURRENT_VERSION);
1411 			bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
1412 		}
1413 		nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
1414 	}
1415 
1416 	/* store validated region parameters to the provider */
1417 	for (i = 0; i < SKMEM_REGIONS; i++) {
1418 		nxprov->nxprov_region_params[i] = srp[i];
1419 	}
1420 
1421 	bsrp = &nxprov->nxprov_region_params[SKMEM_REGION_BUF];
1422 	/*
1423 	 * Special handling for external nexus providers; similar
1424 	 * logic to what's done in kern_pbufpool_create().
1425 	 */
1426 	if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
1427 		uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
1428 		/*
1429 		 * Set SKMEM_REGION_CR_MONOLITHIC if the provider does
1430 		 * not want more than a single segment for entire region.
1431 		 */
1432 		if (nxpi_flags & NXPIF_MONOLITHIC) {
1433 			bsrp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
1434 		} else {
1435 			bsrp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
1436 		}
1437 
1438 		if (nxpi_flags & NXPIF_INHIBIT_CACHE) {
1439 			bsrp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
1440 		} else {
1441 			bsrp->srp_cflags &= ~SKMEM_REGION_CR_NOCACHE;
1442 		}
1443 
1444 		/* recalculate what's done by nxprov_params_adjust() earlier */
1445 		skmem_region_params_config(bsrp);
1446 
1447 		if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
1448 			nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1449 		}
1450 	} else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
1451 	    NEXUS_TYPE_NET_IF) {
1452 		/*
1453 		 * Treat non-netif built-in nexus providers as those
1454 		 * meant for inter-process communications, i.e. there
1455 		 * is no actual networking hardware involved.
1456 		 */
1457 		nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1458 	}
1459 
1460 	if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
1461 		struct skmem_region_params *kmd_srp =
1462 		    &nxprov->nxprov_region_params[SKMEM_REGION_KMD];
1463 		struct skmem_region_params *umd_srp =
1464 		    &nxprov->nxprov_region_params[SKMEM_REGION_UMD];
1465 
1466 		kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
1467 		umd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
1468 		skmem_region_params_config(kmd_srp);
1469 		skmem_region_params_config(umd_srp);
1470 	}
1471 
1472 	nxprov_retain_locked(nxprov);   /* one for being in the list */
1473 	nxprov_retain_locked(nxprov);   /* one for the caller */
1474 
1475 #if SK_LOG
1476 	uuid_string_t uuidstr;
1477 	SK_D("nxprov 0x%llx UUID %s", SK_KVA(nxprov),
1478 	    sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
1479 #endif /* SK_LOG */
1480 
1481 done:
1482 	return nxprov;
1483 }
1484 
1485 struct kern_nexus_provider *
nxprov_create(struct proc * p,struct nxctl * nxctl,struct nxprov_reg * reg,int * err)1486 nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
1487     int *err)
1488 {
1489 	struct nxprov_params *nxp = &reg->nxpreg_params;
1490 	struct kern_nexus_domain_provider *nxdom_prov = NULL;
1491 	struct kern_nexus_provider *nxprov = NULL;
1492 
1493 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1494 
1495 	ASSERT(nxctl->nxctl_cred != proc_ucred(kernproc));
1496 	*err = 0;
1497 
1498 	switch (nxp->nxp_type) {
1499 	case NEXUS_TYPE_USER_PIPE:      /* only for userland */
1500 		*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1501 		    PRIV_SKYWALK_REGISTER_USER_PIPE);
1502 		break;
1503 
1504 	case NEXUS_TYPE_FLOW_SWITCH:    /* allowed for userland */
1505 		*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1506 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
1507 		break;
1508 
1509 	case NEXUS_TYPE_NET_IF:         /* allowed for userland */
1510 		*err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1511 		    PRIV_SKYWALK_REGISTER_NET_IF);
1512 		break;
1513 
1514 	case NEXUS_TYPE_KERNEL_PIPE:    /* only for kernel */
1515 	case NEXUS_TYPE_MONITOR:        /* invalid */
1516 	default:
1517 		*err = EINVAL;
1518 		goto done;
1519 	}
1520 
1521 	if (*err != 0) {
1522 		goto done;
1523 	}
1524 
1525 	ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
1526 	if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
1527 		*err = ENXIO;
1528 		goto done;
1529 	}
1530 
1531 #if CONFIG_NEXUS_NETIF
1532 	/* make sure netif_compat is the default here */
1533 	ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
1534 	    strcmp(nxdom_prov->nxdom_prov_name,
1535 	    NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
1536 #endif /* CONFIG_NEXUS_NETIF */
1537 
1538 	SK_LOCK();
1539 	/* callee holds a reference for our caller upon success */
1540 	nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
1541 	SK_UNLOCK();
1542 done:
1543 	return nxprov;
1544 }
1545 
1546 struct kern_nexus_provider *
nxprov_create_kern(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1547 nxprov_create_kern(struct nxctl *nxctl,
1548     struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1549     const struct kern_nexus_provider_init *init, int *err)
1550 {
1551 	struct nxprov_params *nxp = &reg->nxpreg_params;
1552 	struct kern_nexus_provider *nxprov = NULL;
1553 
1554 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1555 	SK_LOCK_ASSERT_HELD();
1556 
1557 	ASSERT(nxctl->nxctl_cred == proc_ucred(kernproc));
1558 	ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
1559 	ASSERT(init == NULL ||
1560 	    init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
1561 	    init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
1562 
1563 	*err = 0;
1564 
1565 	switch (nxp->nxp_type) {
1566 	case NEXUS_TYPE_NET_IF:
1567 		break;
1568 	case NEXUS_TYPE_KERNEL_PIPE:
1569 		if (init == NULL) {
1570 			*err = EINVAL;
1571 			goto done;
1572 		}
1573 		break;
1574 	case NEXUS_TYPE_FLOW_SWITCH:
1575 		if (init != NULL) {
1576 			*err = EINVAL;
1577 			goto done;
1578 		}
1579 		break;
1580 
1581 	case NEXUS_TYPE_USER_PIPE:      /* only for userland */
1582 	case NEXUS_TYPE_MONITOR:        /* invalid */
1583 	default:
1584 		*err = EINVAL;
1585 		goto done;
1586 	}
1587 
1588 	/* callee holds a reference for our caller upon success */
1589 	nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
1590 
1591 done:
1592 	return nxprov;
1593 }
1594 
1595 int
nxprov_destroy(struct nxctl * nxctl,const uuid_t nxprov_uuid)1596 nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
1597 {
1598 	struct kern_nexus_provider *nxprov = NULL;
1599 	int err = 0;
1600 
1601 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1602 
1603 	SK_LOCK();
1604 
1605 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1606 		if (nxctl == nxprov->nxprov_ctl &&
1607 		    uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1608 			nxprov_retain_locked(nxprov);
1609 			break;
1610 		}
1611 	}
1612 
1613 	if (nxprov == NULL) {
1614 		err = ENOENT;
1615 	} else {
1616 		err = nxprov_close(nxprov, TRUE);
1617 	}
1618 
1619 	if (nxprov != NULL) {
1620 		(void) nxprov_release_locked(nxprov);
1621 	}
1622 
1623 	SK_UNLOCK();
1624 
1625 	return err;
1626 }
1627 
1628 int
nxprov_close(struct kern_nexus_provider * nxprov,boolean_t locked)1629 nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
1630 {
1631 	int err = 0;
1632 
1633 	if (!locked) {
1634 		SK_LOCK();
1635 	}
1636 
1637 	SK_LOCK_ASSERT_HELD();
1638 
1639 #if SK_LOG
1640 	uuid_string_t uuidstr;
1641 	SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1642 	    sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1643 	    nxprov->nxprov_flags, NXPROVF_BITS);
1644 #endif /* SK_LOG */
1645 
1646 	if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
1647 		err = EALREADY;
1648 	} else {
1649 		struct kern_nexus *nx, *tnx;
1650 
1651 		nxprov->nxprov_ctl = NULL;
1652 
1653 		STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
1654 		    nx_prov_link, tnx) {
1655 			nx_retain_locked(nx);
1656 			(void) nx_close(nx, TRUE);
1657 			(void) nx_release_locked(nx);
1658 		}
1659 
1660 		if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
1661 			/* no nexus created on this, so detach now */
1662 			nxprov_detach(nxprov, TRUE);
1663 		} else {
1664 			/* detach when last nexus is destroyed */
1665 			ASSERT(nxprov->nxprov_refcnt > 1);
1666 			nxprov->nxprov_flags |= NXPROVF_CLOSED;
1667 		}
1668 	}
1669 
1670 	if (!locked) {
1671 		SK_UNLOCK();
1672 	}
1673 
1674 	return err;
1675 }
1676 
1677 static void
nxprov_detach(struct kern_nexus_provider * nxprov,boolean_t locked)1678 nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
1679 {
1680 	if (!locked) {
1681 		SK_LOCK();
1682 	}
1683 
1684 	SK_LOCK_ASSERT_HELD();
1685 
1686 #if SK_LOG
1687 	uuid_string_t uuidstr;
1688 	SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1689 	    sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1690 	    nxprov->nxprov_flags, NXPROVF_BITS);
1691 #endif /* SK_LOG */
1692 
1693 	ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
1694 	STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
1695 	nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
1696 
1697 	/* caller must hold an extra ref */
1698 	ASSERT(nxprov->nxprov_refcnt > 1);
1699 	(void) nxprov_release_locked(nxprov);
1700 
1701 	if (!locked) {
1702 		SK_UNLOCK();
1703 	}
1704 }
1705 
1706 static struct kern_nexus_provider *
nxprov_alloc(struct kern_nexus_domain_provider * nxdom_prov,zalloc_flags_t how)1707 nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
1708 {
1709 	struct kern_nexus_provider *nxprov;
1710 	struct nxprov_params *nxp;
1711 
1712 	ASSERT(nxdom_prov != NULL);
1713 
1714 	nxp = nxprov_params_alloc(how);
1715 	if (nxp == NULL) {
1716 		SK_ERR("Failed to allocate nxprov_params");
1717 		return NULL;
1718 	}
1719 
1720 	nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
1721 	if (nxprov == NULL) {
1722 		SK_ERR("Failed to allocate nxprov");
1723 		nxprov_params_free(nxp);
1724 		return NULL;
1725 	}
1726 
1727 	nxprov->nxprov_dom_prov = nxdom_prov;
1728 	nxprov->nxprov_params = nxp;
1729 	/* hold a reference for nxprov */
1730 	nxdom_prov_retain_locked(nxdom_prov);
1731 
1732 	return nxprov;
1733 }
1734 
1735 static void
nxprov_free(struct kern_nexus_provider * nxprov)1736 nxprov_free(struct kern_nexus_provider *nxprov)
1737 {
1738 	struct kern_nexus_domain_provider *nxdom_prov =
1739 	    nxprov->nxprov_dom_prov;
1740 
1741 	SK_LOCK_ASSERT_HELD();
1742 
1743 	ASSERT(nxdom_prov != NULL);
1744 	(void) nxdom_prov_release_locked(nxdom_prov);
1745 	nxprov->nxprov_dom_prov = NULL;
1746 	ASSERT(nxprov->nxprov_params != NULL);
1747 	nxprov_params_free(nxprov->nxprov_params);
1748 	nxprov->nxprov_params = NULL;
1749 	ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
1750 	SK_DF(SK_VERB_MEM, "nxprov 0x%llx FREE", SK_KVA(nxprov));
1751 	zfree(nxprov_zone, nxprov);
1752 }
1753 
1754 static void
nxprov_retain_locked(struct kern_nexus_provider * nxprov)1755 nxprov_retain_locked(struct kern_nexus_provider *nxprov)
1756 {
1757 	SK_LOCK_ASSERT_HELD();
1758 
1759 	nxprov->nxprov_refcnt++;
1760 	ASSERT(nxprov->nxprov_refcnt != 0);
1761 }
1762 
1763 void
nxprov_retain(struct kern_nexus_provider * nxprov)1764 nxprov_retain(struct kern_nexus_provider *nxprov)
1765 {
1766 	SK_LOCK();
1767 	nxprov_retain_locked(nxprov);
1768 	SK_UNLOCK();
1769 }
1770 
1771 static int
nxprov_release_locked(struct kern_nexus_provider * nxprov)1772 nxprov_release_locked(struct kern_nexus_provider *nxprov)
1773 {
1774 	int oldref = nxprov->nxprov_refcnt;
1775 
1776 	SK_LOCK_ASSERT_HELD();
1777 
1778 	ASSERT(nxprov->nxprov_refcnt != 0);
1779 	if (--nxprov->nxprov_refcnt == 0) {
1780 		nxprov_free(nxprov);
1781 	}
1782 
1783 	return oldref == 1;
1784 }
1785 
1786 int
nxprov_release(struct kern_nexus_provider * nxprov)1787 nxprov_release(struct kern_nexus_provider *nxprov)
1788 {
1789 	int lastref;
1790 
1791 	SK_LOCK();
1792 	lastref = nxprov_release_locked(nxprov);
1793 	SK_UNLOCK();
1794 
1795 	return lastref;
1796 }
1797 
1798 struct nxprov_params *
nxprov_params_alloc(zalloc_flags_t how)1799 nxprov_params_alloc(zalloc_flags_t how)
1800 {
1801 	return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
1802 }
1803 
1804 void
nxprov_params_free(struct nxprov_params * nxp)1805 nxprov_params_free(struct nxprov_params *nxp)
1806 {
1807 	SK_DF(SK_VERB_MEM, "nxp 0x%llx FREE", SK_KVA(nxp));
1808 	zfree(nxprov_params_zone, nxp);
1809 }
1810 
1811 static int
nx_check_pp(struct kern_nexus_provider * nxprov,struct kern_pbufpool * pp)1812 nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
1813 {
1814 	struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
1815 
1816 	if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
1817 		SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
1818 		return ENOTSUP;
1819 	}
1820 
1821 	/*
1822 	 * Require that the nexus domain metadata type and the
1823 	 * metadata type of the caller-provided pbufpool match.
1824 	 */
1825 	if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
1826 	    pp->pp_md_type ||
1827 	    nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
1828 	    pp->pp_md_subtype) {
1829 		SK_ERR("Mismatch in metadata type/subtype "
1830 		    "(%u/%u != %u/%u)", pp->pp_md_type,
1831 		    nxdom_prov->nxdom_prov_dom->nxdom_md_type,
1832 		    pp->pp_md_subtype,
1833 		    nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
1834 		return EINVAL;
1835 	}
1836 
1837 	/*
1838 	 * Require that the nexus provider memory configuration
1839 	 * has the same impedance as the caller-provided one.
1840 	 * Both need to be lacking or present; if one of them
1841 	 * is set and the other isn't, then we bail.
1842 	 */
1843 	if (!!(pp->pp_buf_region->skr_mode & SKR_MODE_MONOLITHIC) ^
1844 	    !!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
1845 		SK_ERR("Memory config mismatch: monolithic mode");
1846 		return EINVAL;
1847 	}
1848 
1849 	return 0;
1850 }
1851 
1852 struct kern_nexus *
nx_create(struct nxctl * nxctl,const uuid_t nxprov_uuid,const nexus_type_t dom_type,const void * nx_ctx,nexus_ctx_release_fn_t nx_ctx_release,struct kern_pbufpool * tx_pp,struct kern_pbufpool * rx_pp,int * err)1853 nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
1854     const nexus_type_t dom_type, const void *nx_ctx,
1855     nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
1856     struct kern_pbufpool *rx_pp, int *err)
1857 {
1858 	struct kern_nexus_domain_provider *nxdom_prov;
1859 	struct kern_nexus_provider *nxprov = NULL;
1860 	struct kern_nexus *nx = NULL;
1861 #if SK_LOG
1862 	uuid_string_t uuidstr;
1863 #endif /* SK_LOG */
1864 
1865 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1866 
1867 	ASSERT(dom_type < NEXUS_TYPE_MAX);
1868 	ASSERT(!uuid_is_null(nxprov_uuid));
1869 	*err = 0;
1870 
1871 	SK_LOCK();
1872 
1873 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1874 		if (nxctl == nxprov->nxprov_ctl &&
1875 		    uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1876 			break;
1877 		}
1878 	}
1879 
1880 	if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
1881 		SK_ERR("Provider not found or has been closed");
1882 		*err = ENOENT;
1883 		goto done;
1884 	}
1885 
1886 	nxdom_prov = nxprov->nxprov_dom_prov;
1887 	if (dom_type != NEXUS_TYPE_UNDEFINED &&
1888 	    (nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
1889 		SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
1890 		    dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
1891 		nxdom_prov = NULL;
1892 		nxprov = NULL;
1893 		*err = ENODEV;
1894 		goto done;
1895 	}
1896 
1897 	if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
1898 	    (!tx_pp || !rx_pp)) {
1899 #if SK_LOG
1900 		SK_ERR("TX/RX packet pool is required for netif logical link "
1901 		    "nexus provider UUID: %s",
1902 		    sk_uuid_unparse(nxprov_uuid, uuidstr));
1903 #endif /* SK_LOG */
1904 		nxdom_prov = NULL;
1905 		nxprov = NULL;
1906 		*err = EINVAL;
1907 		goto done;
1908 	}
1909 
1910 	if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
1911 	    (rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
1912 		goto done;
1913 	}
1914 
1915 	nx = nx_alloc(Z_WAITOK);
1916 
1917 	STAILQ_INIT(&nx->nx_ch_head);
1918 	STAILQ_INIT(&nx->nx_ch_nonxref_head);
1919 	lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
1920 	    &nexus_lock_attr);
1921 	STAILQ_INIT(&nx->nx_ch_if_adv_head);
1922 	uuid_generate_random(nx->nx_uuid);
1923 	nx->nx_prov = nxprov;
1924 	nx->nx_ctx = (void *)(uintptr_t)nx_ctx;
1925 	nx->nx_ctx_release = nx_ctx_release;
1926 	nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
1927 
1928 	if (tx_pp != NULL) {
1929 		nx->nx_tx_pp = tx_pp;
1930 		pp_retain(tx_pp);       /* released by nx_free */
1931 	}
1932 
1933 	if (rx_pp != NULL) {
1934 		nx->nx_rx_pp = rx_pp;
1935 		pp_retain(rx_pp);       /* released by nx_free */
1936 	}
1937 
1938 	/* this nexus is alive; tell the nexus constructor to set it up */
1939 	if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
1940 		*err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
1941 		if (*err != 0) {
1942 			nx->nx_prov = NULL;
1943 			goto done;
1944 		}
1945 	}
1946 
1947 	nxprov_retain_locked(nxprov);   /* hold a ref on the nexus reg */
1948 
1949 	STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
1950 	nxprov->nxprov_nx_count++;
1951 	RB_INSERT(kern_nexus_tree, &nx_head, nx);
1952 	atomic_bitset_32(&nx->nx_flags, NXF_ATTACHED);
1953 
1954 	nx_retain_locked(nx);   /* one for the provider list */
1955 	nx_retain_locked(nx);   /* one for the global list */
1956 	nx_retain_locked(nx);   /* one for the caller */
1957 
1958 #if SK_LOG
1959 	SK_D("nexus 0x%llx (%s:%s) UUID %s", SK_KVA(nx),
1960 	    nxdom_prov->nxdom_prov_dom->nxdom_name,
1961 	    nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
1962 #endif /* SK_LOG */
1963 done:
1964 	SK_UNLOCK();
1965 
1966 	if (*err != 0) {
1967 		if (nx != NULL) {
1968 			nx_free(nx);
1969 			nx = NULL;
1970 		}
1971 	}
1972 	return nx;
1973 }
1974 
1975 int
nx_destroy(struct nxctl * nxctl,const uuid_t nx_uuid)1976 nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
1977 {
1978 	struct kern_nexus *nx = NULL;
1979 	struct kern_nexus find;
1980 	int err = 0;
1981 
1982 	NXCTL_LOCK_ASSERT_HELD(nxctl);
1983 
1984 	SK_LOCK();
1985 
1986 	uuid_copy(find.nx_uuid, nx_uuid);
1987 	nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1988 	if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
1989 		nx = NULL;
1990 	}
1991 
1992 	if (nx != NULL) {
1993 		nx_retain_locked(nx);
1994 	}
1995 
1996 	if (nx == NULL) {
1997 		err = ENOENT;
1998 	} else {
1999 		err = nx_close(nx, TRUE);
2000 		(void) nx_release_locked(nx);
2001 	}
2002 
2003 	SK_UNLOCK();
2004 
2005 	return err;
2006 }
2007 
2008 static inline int
nx_cmp(const struct kern_nexus * a,const struct kern_nexus * b)2009 nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
2010 {
2011 	return uuid_compare(a->nx_uuid, b->nx_uuid);
2012 }
2013 
2014 struct kern_nexus *
nx_find(const uuid_t nx_uuid,boolean_t locked)2015 nx_find(const uuid_t nx_uuid, boolean_t locked)
2016 {
2017 	struct kern_nexus *nx = NULL;
2018 	struct kern_nexus find;
2019 
2020 	if (!locked) {
2021 		SK_LOCK();
2022 	}
2023 
2024 	SK_LOCK_ASSERT_HELD();
2025 
2026 	uuid_copy(find.nx_uuid, nx_uuid);
2027 	nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
2028 	if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
2029 		nx = NULL;
2030 	}
2031 
2032 	/* return reference to caller */
2033 	if (nx != NULL) {
2034 		nx_retain_locked(nx);
2035 	}
2036 
2037 	if (!locked) {
2038 		SK_UNLOCK();
2039 	}
2040 
2041 	return nx;
2042 }
2043 
2044 int
nx_close(struct kern_nexus * nx,boolean_t locked)2045 nx_close(struct kern_nexus *nx, boolean_t locked)
2046 {
2047 	int err = 0;
2048 
2049 	if (!locked) {
2050 		SK_LOCK();
2051 	}
2052 
2053 	SK_LOCK_ASSERT_HELD();
2054 
2055 
2056 	if (nx->nx_flags & NXF_CLOSED) {
2057 		err = EALREADY;
2058 	} else {
2059 #if SK_LOG
2060 		uuid_string_t uuidstr;
2061 		SK_D("nexus 0x%llx (%s:%s) UUID %s flags 0x%b", SK_KVA(nx),
2062 		    NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
2063 		    sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags,
2064 		    NXF_BITS);
2065 #endif /* SK_LOG */
2066 
2067 		if (STAILQ_EMPTY(&nx->nx_ch_head)) {
2068 			/* no regular channels open to it, so detach now */
2069 			nx_detach(nx);
2070 		} else {
2071 			/* detach when the last channel closes */
2072 			ASSERT(nx->nx_refcnt > 3);
2073 			atomic_bitset_32(&nx->nx_flags, NXF_CLOSED);
2074 		}
2075 	}
2076 
2077 	if (!locked) {
2078 		SK_UNLOCK();
2079 	}
2080 
2081 	return err;
2082 }
2083 
2084 void
nx_stop(struct kern_nexus * nx)2085 nx_stop(struct kern_nexus *nx)
2086 {
2087 	struct kern_nexus_provider *nxprov = nx->nx_prov;
2088 
2089 	SK_LOCK_ASSERT_HELD();
2090 
2091 	/* send a stop message */
2092 	if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
2093 		nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
2094 	}
2095 }
2096 
2097 void
nx_detach(struct kern_nexus * nx)2098 nx_detach(struct kern_nexus *nx)
2099 {
2100 	struct kern_nexus_provider *nxprov = nx->nx_prov;
2101 
2102 	SK_LOCK_ASSERT_HELD();
2103 
2104 #if SK_LOG
2105 	uuid_string_t uuidstr;
2106 	SK_D("nexus 0x%llx UUID %s flags 0x%b", SK_KVA(nx),
2107 	    sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags, NXF_BITS);
2108 #endif /* SK_LOG */
2109 
2110 	/* Caller must hold extra refs, on top of the two in reg/global lists */
2111 	ASSERT(nx->nx_refcnt >= 3);
2112 	ASSERT(nx->nx_flags & NXF_ATTACHED);
2113 
2114 	/* this nexus is done; let the nexus destructor do final cleanups */
2115 	if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
2116 		nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
2117 	}
2118 
2119 	ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2120 	ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2121 
2122 	STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
2123 	nxprov->nxprov_nx_count--;
2124 	RB_REMOVE(kern_nexus_tree, &nx_head, nx);
2125 	atomic_bitclear_32(&nx->nx_flags, NXF_ATTACHED);
2126 	nx->nx_prov = NULL;
2127 	if (nx->nx_ctx_release != NULL) {
2128 		nx->nx_ctx_release(nx->nx_ctx);
2129 	}
2130 	nx->nx_ctx = NULL;
2131 
2132 	(void) nx_release_locked(nx);   /* one for the reg list */
2133 	(void) nx_release_locked(nx);   /* one for the global list */
2134 
2135 	/*
2136 	 * If this was the last nexus and the provider has been closed,
2137 	 * detach the provider and and finish up the postponed job.
2138 	 */
2139 	if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
2140 	    (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
2141 		nxprov_detach(nxprov, TRUE);
2142 	}
2143 	(void) nxprov_release_locked(nxprov);
2144 }
2145 
2146 int
nx_advisory_alloc(struct kern_nexus * nx,const char * name,struct skmem_region_params * srp_nexusadv,nexus_advisory_type_t type)2147 nx_advisory_alloc(struct kern_nexus *nx, const char *name,
2148     struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
2149 {
2150 	struct __kern_nexus_adv_metadata *adv_md;
2151 
2152 	_CASSERT(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
2153 	_CASSERT((sizeof(struct sk_nexusadv) +
2154 	    sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2155 	_CASSERT((sizeof(struct netif_nexus_advisory) +
2156 	    sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2157 	ASSERT(nx->nx_adv.nxv_reg == NULL);
2158 	ASSERT(nx->nx_adv.nxv_adv == NULL);
2159 	ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
2160 	    type == NEXUS_ADVISORY_TYPE_NETIF);
2161 
2162 	if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
2163 	    NULL, NULL, NULL)) == NULL) {
2164 		return ENOMEM;
2165 	}
2166 
2167 	nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, NULL,
2168 	    NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC));
2169 	adv_md = nx->nx_adv.nxv_adv;
2170 	adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
2171 	adv_md->knam_type = type;
2172 	adv_md->__reserved = 0;
2173 	nx->nx_adv.nxv_adv_type = type;
2174 	nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
2175 	if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
2176 		nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
2177 		    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
2178 	} else {
2179 		nx->nx_adv.netif_nxv_adv->nna_version =
2180 		    NX_NETIF_ADVISORY_CURRENT_VERSION;
2181 	}
2182 	return 0;
2183 }
2184 
2185 void
nx_advisory_free(struct kern_nexus * nx)2186 nx_advisory_free(struct kern_nexus *nx)
2187 {
2188 	if (nx->nx_adv.nxv_reg != NULL) {
2189 		ASSERT(nx->nx_adv.nxv_adv != NULL);
2190 		skmem_region_free(nx->nx_adv.nxv_reg,
2191 		    nx->nx_adv.nxv_adv, NULL);
2192 		nx->nx_adv.nxv_adv = NULL;
2193 		nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
2194 		nx->nx_adv.flowswitch_nxv_adv = NULL;
2195 		skmem_region_release(nx->nx_adv.nxv_reg);
2196 		nx->nx_adv.nxv_reg = NULL;
2197 	}
2198 
2199 	ASSERT(nx->nx_adv.nxv_reg == NULL);
2200 	ASSERT(nx->nx_adv.nxv_adv == NULL);
2201 	ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
2202 	ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
2203 }
2204 
2205 static struct kern_nexus *
nx_alloc(zalloc_flags_t how)2206 nx_alloc(zalloc_flags_t how)
2207 {
2208 	SK_LOCK_ASSERT_HELD();
2209 
2210 	return zalloc_flags(nx_zone, how | Z_ZERO);
2211 }
2212 
2213 static void
nx_free(struct kern_nexus * nx)2214 nx_free(struct kern_nexus *nx)
2215 {
2216 	ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
2217 	ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2218 	ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2219 
2220 	nx_port_free_all(nx);
2221 
2222 	if (nx->nx_tx_pp != NULL) {
2223 		pp_release(nx->nx_tx_pp);
2224 		nx->nx_tx_pp = NULL;
2225 	}
2226 	if (nx->nx_rx_pp != NULL) {
2227 		pp_release(nx->nx_rx_pp);
2228 		nx->nx_rx_pp = NULL;
2229 	}
2230 
2231 	ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
2232 	lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
2233 
2234 	SK_DF(SK_VERB_MEM, "nexus 0x%llx FREE", SK_KVA(nx));
2235 	zfree(nx_zone, nx);
2236 }
2237 
2238 void
nx_retain_locked(struct kern_nexus * nx)2239 nx_retain_locked(struct kern_nexus *nx)
2240 {
2241 	SK_LOCK_ASSERT_HELD();
2242 
2243 	nx->nx_refcnt++;
2244 	VERIFY(nx->nx_refcnt > 0);
2245 }
2246 
2247 void
nx_retain(struct kern_nexus * nx)2248 nx_retain(struct kern_nexus *nx)
2249 {
2250 	SK_LOCK();
2251 	nx_retain_locked(nx);
2252 	SK_UNLOCK();
2253 }
2254 
2255 int
nx_release_locked(struct kern_nexus * nx)2256 nx_release_locked(struct kern_nexus *nx)
2257 {
2258 	int oldref = nx->nx_refcnt;
2259 
2260 	SK_LOCK_ASSERT_HELD();
2261 
2262 	VERIFY(nx->nx_refcnt > 0);
2263 	if (--nx->nx_refcnt == 0) {
2264 		nx_free(nx);
2265 	}
2266 
2267 	return oldref == 1;
2268 }
2269 
2270 int
nx_release(struct kern_nexus * nx)2271 nx_release(struct kern_nexus *nx)
2272 {
2273 	int lastref;
2274 
2275 	SK_LOCK_ASSERT_NOTHELD();
2276 
2277 	SK_LOCK();
2278 	lastref = nx_release_locked(nx);
2279 	SK_UNLOCK();
2280 
2281 	return lastref;
2282 }
2283 
2284 static int
nx_init_rings(struct kern_nexus * nx,struct kern_channel * ch)2285 nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
2286 {
2287 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
2288 	struct nexus_adapter *na = ch->ch_na;
2289 	boolean_t undo = FALSE;
2290 	int ksd_retains = 0;
2291 	enum txrx t;
2292 	int err = 0;
2293 
2294 	ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
2295 	    CHANF_EXT_PRECONNECT);
2296 
2297 	if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
2298 		return 0;
2299 	}
2300 
2301 	for_rx_tx(t) {
2302 		uint32_t i;
2303 
2304 		for (i = 0; i < na_get_nrings(na, t); i++) {
2305 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2306 
2307 			/* skip host rings */
2308 			if (kring->ckr_flags & CKRF_HOST) {
2309 				continue;
2310 			}
2311 
2312 			if ((err = nxprov->nxprov_ext.nxpi_ring_init(
2313 				    nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
2314 				    &kring->ckr_ctx)) != 0) {
2315 				SK_D("ch 0x%llx flags %b nx 0x%llx kr \"%s\" "
2316 				    "(0x%llx) krflags %b ring_init error %d",
2317 				    SK_KVA(ch), ch->ch_flags, CHANF_BITS,
2318 				    SK_KVA(nx), kring->ckr_name, SK_KVA(kring),
2319 				    kring->ckr_flags, CKRF_BITS, err);
2320 				kring->ckr_ctx = NULL;
2321 				undo = TRUE;
2322 				break;
2323 			}
2324 			kring->ckr_flags |= CKRF_EXT_RING_INITED;
2325 
2326 			if ((err = nx_init_slots(nx, kring)) != 0) {
2327 				undo = TRUE;
2328 				break;
2329 			}
2330 
2331 			if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2332 				++ksd_retains;
2333 			}
2334 		}
2335 		if (undo) {
2336 			break;
2337 		}
2338 	}
2339 
2340 	/*
2341 	 * Note: retain KSD even in case of error, as we have set
2342 	 * CKRF_EXT_SLOTS_INITED flag for some of the rings
2343 	 * nx_fini_rings would take care of release based on it.
2344 	 */
2345 	if (ksd_retains != 0) {
2346 		/*
2347 		 * Mark the kernel slot descriptor region as busy; this
2348 		 * prevents it from being torn-down at channel defunct
2349 		 * time, as we need to invoke the slot_fini() callback
2350 		 * for each slot and we need the descriptors until then.
2351 		 */
2352 		skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
2353 		    ksd_retains);
2354 	}
2355 
2356 	if (err != 0) {
2357 		ASSERT(undo);
2358 		nx_fini_rings(nx, ch);
2359 	}
2360 
2361 	return err;
2362 }
2363 
2364 static void
nx_fini_rings(struct kern_nexus * nx,struct kern_channel * ch)2365 nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
2366 {
2367 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
2368 	struct nexus_adapter *na = ch->ch_na;
2369 	int ksd_releases = 0;
2370 	enum txrx t;
2371 
2372 	for_rx_tx(t) {
2373 		uint32_t i;
2374 
2375 		for (i = 0; i < na_get_nrings(na, t); i++) {
2376 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2377 
2378 			if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
2379 				continue;
2380 			}
2381 
2382 			ASSERT(!(kring->ckr_flags & CKRF_HOST));
2383 			ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
2384 			nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
2385 			kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
2386 
2387 			if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2388 				++ksd_releases;
2389 			}
2390 
2391 			/*
2392 			 * Undo the work done in nx_init_slots() and inform
2393 			 * the external domain provider, if applicable, that
2394 			 * the slots for this ring are no longer valid.
2395 			 */
2396 			nx_fini_slots(nx, kring);
2397 			kring->ckr_ctx = NULL;
2398 		}
2399 	}
2400 
2401 	if (ksd_releases != 0) {
2402 		/*
2403 		 * Now that we've finished invoking the slot_fini()
2404 		 * callbacks, release the busy retain counts held
2405 		 * earlier in nx_init_rings().  This will allow the
2406 		 * kernel slot descriptor region to be torn down.
2407 		 */
2408 		skmem_arena_nexus_sd_set_noidle(
2409 			skmem_arena_nexus(na->na_arena), -ksd_releases);
2410 	}
2411 }
2412 
2413 static int
nx_init_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2414 nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2415 {
2416 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
2417 	struct __slot_desc *slot = kring->ckr_ksds;
2418 	int err = 0;
2419 	uint32_t i;
2420 
2421 	/*
2422 	 * If the slot init callback was not provided, or if the
2423 	 * kring was not created to hold any slot contexts, don't
2424 	 * go any further.
2425 	 */
2426 	if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
2427 	    kring->ckr_slot_ctxs == NULL) {
2428 		return 0;
2429 	}
2430 
2431 	ASSERT(kring->ckr_slot_ctxs_set == 0);
2432 	ASSERT(slot != NULL);
2433 
2434 	for (i = 0; i < kring->ckr_num_slots; i++) {
2435 		struct kern_slot_prop *slot_ctx_prop = NULL;
2436 		void *slot_ctx_arg = NULL;
2437 
2438 		ASSERT(&slot[i] <= kring->ckr_ksds_last);
2439 		if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
2440 		    &slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
2441 			SK_D("nx 0x%llx kr \"%s\" (0x%llx) krflags %b slot %u "
2442 			    "slot_init error %d", SK_KVA(nx), kring->ckr_name,
2443 			    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, i, err);
2444 			break;
2445 		}
2446 		/* we don't want this to be used by client, so verify here */
2447 		ASSERT(slot_ctx_prop == NULL);
2448 		kring->ckr_slot_ctxs[i].slot_ctx_arg =
2449 		    (mach_vm_address_t)slot_ctx_arg;
2450 		kring->ckr_slot_ctxs_set++;
2451 	}
2452 
2453 	if (err != 0) {
2454 		nx_fini_slots(nx, kring);
2455 	} else {
2456 		kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
2457 	}
2458 
2459 	return err;
2460 }
2461 
2462 static void
nx_fini_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2463 nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2464 {
2465 	struct kern_nexus_provider *nxprov = NX_PROV(nx);
2466 	struct __slot_desc *slot = kring->ckr_ksds;
2467 	uint32_t i;
2468 
2469 	ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
2470 	    nxprov->nxprov_ext.nxpi_slot_fini != NULL);
2471 	ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
2472 
2473 	for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
2474 		ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
2475 		if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
2476 			nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
2477 			    kring, &slot[i], i);
2478 		}
2479 		if (kring->ckr_slot_ctxs != NULL) {
2480 			kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
2481 		}
2482 	}
2483 	kring->ckr_slot_ctxs_set = 0;
2484 
2485 	/* We're done with this kring */
2486 	kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
2487 }
2488 
2489 
2490 /* 64-bit mask with range */
2491 #define BMASK64(_beg, _end)     \
2492 	((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
2493 
2494 int
nx_port_find(struct kern_nexus * nx,nexus_port_t first,nexus_port_t last,nexus_port_t * nx_port)2495 nx_port_find(struct kern_nexus *nx, nexus_port_t first,
2496     nexus_port_t last, nexus_port_t *nx_port)
2497 {
2498 	int err = 0;
2499 
2500 	ASSERT(first < last);
2501 	*nx_port = NEXUS_PORT_ANY;
2502 
2503 	if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
2504 		/*
2505 		 * Left edge of the range is beyond the current map;
2506 		 * let nx_port_alloc() handle the growing later.
2507 		 */
2508 		*nx_port = first;
2509 	} else {
2510 		uint32_t fc = (first / NX_PORT_CHUNK);
2511 		uint32_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
2512 		uint32_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
2513 		uint32_t i, j;
2514 		bitmap_t *bmap;
2515 
2516 		/*
2517 		 * The right edge of the range is either within or
2518 		 * beyond the current map; scan thru the current
2519 		 * map and find the first available port.
2520 		 */
2521 		for (i = fc; i <= lc; i++) {
2522 			bitmap_t mask;
2523 			uint32_t beg = 0, end = 63;
2524 
2525 			if (i == fc) {
2526 				beg = (first % NX_PORT_CHUNK);
2527 			}
2528 			if (i == (last / NX_PORT_CHUNK)) {
2529 				end = (last % NX_PORT_CHUNK);
2530 			}
2531 
2532 			if (i < lim) {
2533 				bmap = &nx->nx_ports_bmap[i];
2534 				mask = BMASK64(beg, end);
2535 
2536 				j = ffsll((*bmap) & mask);
2537 				if (j == 0) {
2538 					continue;
2539 				}
2540 
2541 				--j;
2542 				*nx_port = (i * NX_PORT_CHUNK) + j;
2543 			}
2544 			break;
2545 		}
2546 
2547 		/*
2548 		 * If the requested range is within the current map and we
2549 		 * couldn't find a port, return an err.  Otherwise, return
2550 		 * the next port index to trigger growing later.
2551 		 */
2552 		if (*nx_port == NEXUS_PORT_ANY) {
2553 			if (lc == (last / NX_PORT_CHUNK)) {
2554 				err = EBUSY;
2555 				SK_ERR("port unavail in [%u, %u)", first, last);
2556 			} else {
2557 				*nx_port = nx->nx_num_ports;
2558 			}
2559 		}
2560 	}
2561 
2562 	SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d (err %d)", SK_KVA(nx),
2563 	    (int)*nx_port, err);
2564 
2565 	return err;
2566 }
2567 
2568 static int
nx_port_grow(struct kern_nexus * nx,uint32_t grow)2569 nx_port_grow(struct kern_nexus *nx, uint32_t grow)
2570 {
2571 	nexus_port_t dom_port_max = NXDOM_MAX(NX_DOM(nx), ports);
2572 	struct nx_port_info *ports;
2573 	size_t limit;
2574 	uint32_t i, num_ports, old_num_ports;
2575 	bitmap_t *bmap;
2576 
2577 	ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
2578 	ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2579 	_CASSERT((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
2580 	ASSERT(powerof2(dom_port_max));
2581 	ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
2582 
2583 	old_num_ports = nx->nx_num_ports;
2584 	num_ports = nx->nx_num_ports + grow;
2585 	limit = P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
2586 	if (num_ports > limit) {
2587 		SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
2588 		    nx->nx_num_ports, grow, num_ports, limit);
2589 		return EDOM;
2590 	}
2591 
2592 	if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
2593 	    (old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2594 	    (num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2595 	    Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2596 		SK_ERR("bmap alloc failed, num_port %u", num_ports);
2597 		return ENOMEM;
2598 	}
2599 	nx->nx_ports_bmap = bmap;
2600 
2601 	if ((ports = sk_realloc_data(nx->nx_ports, old_num_ports * sizeof(*ports),
2602 	    num_ports * sizeof(*ports), Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2603 		/* can't free bmap here, otherwise nexus won't work */
2604 		SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
2605 		return ENOMEM;
2606 	}
2607 
2608 	/* initialize the additional new ports */
2609 	bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
2610 	nx->nx_ports = ports;
2611 
2612 	/* initialize new bitmaps (set all bits) */
2613 	for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
2614 	    i < (num_ports / NX_PORT_CHUNK); i++) {
2615 		bmap[i] = NX_PORT_CHUNK_FREE;
2616 	}
2617 
2618 	nx->nx_num_ports = num_ports;
2619 
2620 	SK_DF(SK_VERB_NXPORT, "!!! nx 0x%llx ports %u/%u, %u ports added",
2621 	    SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
2622 
2623 	return 0;
2624 }
2625 
2626 int
nx_port_alloc(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb,struct nexus_adapter ** na,struct proc * p)2627 nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
2628     struct nexus_adapter **na, struct proc *p)
2629 {
2630 	struct nx_port_info *npi = NULL;
2631 	struct nxbind *nxb0;
2632 	size_t g;
2633 	uint32_t i, j;
2634 	bitmap_t *bmap;
2635 	bool refonly = false;
2636 	int err = 0;
2637 
2638 	ASSERT(nx_port != NEXUS_PORT_ANY);
2639 	ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2640 
2641 	/* port is zero-based, so adjust here */
2642 	if ((nx_port + 1) > nx->nx_num_ports) {
2643 		g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2644 		VERIFY(g <= UINT32_MAX);
2645 		if ((err = nx_port_grow(nx, (uint32_t)g)) != 0) {
2646 			goto done;
2647 		}
2648 	}
2649 	ASSERT(err == 0);
2650 	ASSERT(nx_port < nx->nx_num_ports);
2651 	npi = &nx->nx_ports[nx_port];
2652 	nxb0 = npi->npi_nxb;
2653 	i = nx_port / NX_PORT_CHUNK;
2654 	j = nx_port % NX_PORT_CHUNK;
2655 	bmap = &nx->nx_ports_bmap[i];
2656 
2657 	if (bit_test(*bmap, j)) {
2658 		/* port is not (yet) bound or allocated */
2659 		ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2660 		if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
2661 			/*
2662 			 * If the port allocation is requested by userland
2663 			 * and the nexus is non-anonymous, then fail the
2664 			 * request.
2665 			 */
2666 			err = EACCES;
2667 			SK_ERR("user proc alloc on named nexus needs binding");
2668 		} else if (na != NULL && *na != NULL) {
2669 			/*
2670 			 * Otherwise claim it (clear bit) if the caller
2671 			 * supplied an adapter for this port; else, it
2672 			 * is just an existential check and so there's
2673 			 * no action needed at this point (we'll skip
2674 			 * the init below since vpna is NULL).
2675 			 */
2676 			bit_clear(*bmap, j);
2677 		}
2678 	} else {
2679 		/* if port is bound, check if credentials match */
2680 		if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
2681 		    (nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
2682 			SK_ERR("nexus binding mismatch");
2683 			err = EACCES;
2684 		} else {
2685 			/*
2686 			 * If port is already occupied by an adapter,
2687 			 * see if the client is requesting a reference
2688 			 * to it; if so, return the adapter.  Otherwise,
2689 			 * if unoccupied and vpna is non-NULL, associate
2690 			 * it with this nexus port via the below init.
2691 			 */
2692 			if (NPI_NA(npi) != NULL) {
2693 				if (na != NULL && *na == NULL) {
2694 					*na = NPI_NA(npi);
2695 					na_retain_locked(*na);
2696 					/* skip the init below */
2697 					refonly = true;
2698 				} else {
2699 					/*
2700 					 * If the client supplied an adapter
2701 					 * (regardless of its value) for a
2702 					 * nexus port that's already occupied,
2703 					 * then we fail the request.
2704 					 */
2705 					SK_ERR("nexus adapted exits");
2706 					err = EEXIST;
2707 				}
2708 			}
2709 		}
2710 	}
2711 
2712 done:
2713 	/* initialize the nexus port and the adapter occupying it */
2714 	if (err == 0 && na != NULL && *na != NULL && !refonly) {
2715 		ASSERT(nx_port < nx->nx_num_ports);
2716 		ASSERT(npi->npi_nah == 0);
2717 		ASSERT(nx->nx_active_ports < nx->nx_num_ports);
2718 		ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
2719 		    (nx_port % NX_PORT_CHUNK)));
2720 
2721 		nx->nx_active_ports++;
2722 		npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
2723 		(*na)->na_nx_port = nx_port;
2724 	}
2725 
2726 	SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d, ports %u/%u (err %d)",
2727 	    SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
2728 	    err);
2729 
2730 	return err;
2731 }
2732 
2733 void
nx_port_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2734 nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2735 {
2736 	struct nx_port_info *npi = &nx->nx_ports[nx_port];
2737 
2738 	npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
2739 	    NEXUS_PORT_STATE_DEFUNCT);
2740 }
2741 
2742 void
nx_port_free(struct kern_nexus * nx,nexus_port_t nx_port)2743 nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
2744 {
2745 	struct nx_port_info *npi = NULL;
2746 	bitmap_t *bmap;
2747 	uint32_t i, j;
2748 
2749 	ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2750 	ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
2751 	ASSERT(nx->nx_active_ports != 0);
2752 
2753 	i = nx_port / NX_PORT_CHUNK;
2754 	j = nx_port % NX_PORT_CHUNK;
2755 	bmap = &nx->nx_ports_bmap[i];
2756 	ASSERT(!bit_test(*bmap, j));
2757 
2758 	npi = &nx->nx_ports[nx_port];
2759 	npi->npi_nah = 0;
2760 	if (npi->npi_nxb == NULL) {
2761 		/* it's vacant, release it (set bit) */
2762 		bit_set(*bmap, j);
2763 	}
2764 
2765 	nx->nx_active_ports--;
2766 
2767 	//XXX [email protected] --- try to shrink bitmap & nx_ports ???
2768 
2769 	SK_DF(SK_VERB_NXPORT, "--- nx 0x%llx nx_port %d, ports %u/%u",
2770 	    SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
2771 }
2772 
2773 int
nx_port_bind_info(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0,void * info)2774 nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
2775     struct nxbind *nxb0, void *info)
2776 {
2777 	struct nx_port_info *npi = NULL;
2778 	size_t g;
2779 	uint32_t i, j;
2780 	bitmap_t *bmap;
2781 	int err = 0;
2782 
2783 	ASSERT(nx_port != NEXUS_PORT_ANY);
2784 	ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
2785 	ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2786 	ASSERT(nxb0 != NULL);
2787 
2788 	if ((nx_port) + 1 > nx->nx_num_ports) {
2789 		g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2790 		VERIFY(g <= UINT32_MAX);
2791 		if ((err = nx_port_grow(nx, (uint32_t)g)) != 0) {
2792 			goto done;
2793 		}
2794 	}
2795 	ASSERT(err == 0);
2796 
2797 	npi = &nx->nx_ports[nx_port];
2798 	i = nx_port / NX_PORT_CHUNK;
2799 	j = nx_port % NX_PORT_CHUNK;
2800 	bmap = &nx->nx_ports_bmap[i];
2801 	if (bit_test(*bmap, j)) {
2802 		/* port is not (yet) bound or allocated */
2803 		ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2804 
2805 		bit_clear(*bmap, j);
2806 		struct nxbind *nxb = nxb_alloc(Z_WAITOK);
2807 		nxb_move(nxb0, nxb);
2808 		npi->npi_nxb = nxb;
2809 		npi->npi_info = info;
2810 		/* claim it (clear bit) */
2811 		bit_clear(*bmap, j);
2812 		ASSERT(err == 0);
2813 	} else {
2814 		/* port is already taken */
2815 		ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
2816 		err = EEXIST;
2817 	}
2818 done:
2819 
2820 	SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2821 	    "+++ nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2822 	    (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2823 
2824 	return err;
2825 }
2826 
2827 int
nx_port_bind(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0)2828 nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
2829 {
2830 	return nx_port_bind_info(nx, nx_port, nxb0, NULL);
2831 }
2832 
2833 static int
nx_port_info_size(void * info,size_t * sz)2834 nx_port_info_size(void *info, size_t *sz)
2835 {
2836 	struct nx_port_info_header *hdr = info;
2837 
2838 	switch (hdr->ih_type) {
2839 	case NX_PORT_INFO_TYPE_NETIF:
2840 		break;
2841 	default:
2842 		return EINVAL;
2843 	}
2844 	*sz = hdr->ih_size;
2845 	return 0;
2846 }
2847 
2848 int
nx_port_unbind(struct kern_nexus * nx,nexus_port_t nx_port)2849 nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
2850 {
2851 	struct nx_port_info *npi = NULL;
2852 	struct nxbind *nxb;
2853 	uint32_t i, j;
2854 	bitmap_t *bmap;
2855 	int err = 0;
2856 
2857 	ASSERT(nx_port != NEXUS_PORT_ANY);
2858 
2859 	if (nx_port >= nx->nx_num_ports) {
2860 		err = EDOM;
2861 		goto done;
2862 	}
2863 
2864 	npi = &nx->nx_ports[nx_port];
2865 	i = nx_port / NX_PORT_CHUNK;
2866 	j = nx_port % NX_PORT_CHUNK;
2867 	bmap = &nx->nx_ports_bmap[i];
2868 
2869 	if ((nxb = npi->npi_nxb) == NULL) {
2870 		/* must be either free or allocated */
2871 		ASSERT(NPI_NA(npi) == NULL ||
2872 		    (!bit_test(*bmap, j) && nx->nx_active_ports > 0));
2873 		err = ENOENT;
2874 	} else {
2875 		nxb_free(nxb);
2876 		npi->npi_nxb = NULL;
2877 		if (npi->npi_info != NULL) {
2878 			size_t sz;
2879 
2880 			VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
2881 			sk_free_data(npi->npi_info, sz);
2882 			npi->npi_info = NULL;
2883 		}
2884 		ASSERT(!bit_test(*bmap, j));
2885 		if (NPI_NA(npi) == NULL) {
2886 			/* it's vacant, release it (set bit) */
2887 			bit_set(*bmap, j);
2888 		}
2889 	}
2890 
2891 done:
2892 	SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2893 	    "--- nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2894 	    (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2895 
2896 	return err;
2897 }
2898 
2899 struct nexus_adapter *
nx_port_get_na(struct kern_nexus * nx,nexus_port_t nx_port)2900 nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
2901 {
2902 	if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
2903 		return NPI_NA(&nx->nx_ports[nx_port]);
2904 	} else {
2905 		return NULL;
2906 	}
2907 }
2908 
2909 int
nx_port_get_info(struct kern_nexus * nx,nexus_port_t port,nx_port_info_type_t type,void * info,uint32_t len)2910 nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
2911     nx_port_info_type_t type, void *info, uint32_t len)
2912 {
2913 	struct nx_port_info *npi;
2914 	struct nx_port_info_header *hdr;
2915 
2916 	if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
2917 		return ENXIO;
2918 	}
2919 	npi = &nx->nx_ports[port];
2920 	hdr = npi->npi_info;
2921 	if (hdr == NULL) {
2922 		return ENOENT;
2923 	}
2924 
2925 	if (hdr->ih_type != type) {
2926 		return EINVAL;
2927 	}
2928 
2929 	bcopy(npi->npi_info, info, len);
2930 	return 0;
2931 }
2932 
2933 bool
nx_port_is_valid(struct kern_nexus * nx,nexus_port_t nx_port)2934 nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
2935 {
2936 	return nx_port < nx->nx_num_ports;
2937 }
2938 
2939 bool
nx_port_is_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2940 nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2941 {
2942 	ASSERT(nx_port_is_valid(nx, nx_port));
2943 
2944 	return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
2945 }
2946 
2947 void
nx_port_free_all(struct kern_nexus * nx)2948 nx_port_free_all(struct kern_nexus *nx)
2949 {
2950 	uint32_t num_ports;
2951 
2952 	/* uncrustify doesn't handle C blocks properly */
2953 	/* BEGIN IGNORE CODESTYLE */
2954 	nx_port_foreach(nx, ^(nexus_port_t p) {
2955 		struct nxbind *nxb;
2956 		void *info;
2957 		nxb = nx->nx_ports[p].npi_nxb;
2958 		info = nx->nx_ports[p].npi_info;
2959 		if (nxb != NULL) {
2960 			nxb_free(nxb);
2961 			nx->nx_ports[p].npi_nxb = NULL;
2962 		}
2963 		if (info != NULL) {
2964 			size_t sz;
2965 
2966 			VERIFY(nx_port_info_size(info, &sz) == 0);
2967 			skn_free_data(info, info, sz);
2968 			nx->nx_ports[p].npi_info = NULL;
2969 		}
2970 	});
2971 	/* END IGNORE CODESTYLE */
2972 
2973 	num_ports = nx->nx_num_ports;
2974 	nx->nx_num_ports = 0;
2975 	nx->nx_active_ports = 0;
2976 	skn_free_data(ports_bmap,
2977 	    nx->nx_ports_bmap, (num_ports / NX_PORT_CHUNK) * sizeof(bitmap_t));
2978 	nx->nx_ports_bmap = NULL;
2979 	skn_free_data(ports,
2980 	    nx->nx_ports, num_ports * sizeof(struct nx_port_info));
2981 	nx->nx_ports = NULL;
2982 }
2983 
2984 void
2985 nx_port_foreach(struct kern_nexus *nx,
2986     void (^port_handle)(nexus_port_t nx_port))
2987 {
2988 	for (uint32_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
2989 		bitmap_t bmap = nx->nx_ports_bmap[i];
2990 
2991 		if (bmap == NX_PORT_CHUNK_FREE) {
2992 			continue;
2993 		}
2994 
2995 		for (uint32_t j = 0; j < NX_PORT_CHUNK; j++) {
2996 			if (bit_test(bmap, j)) {
2997 				continue;
2998 			}
2999 			port_handle((i * NX_PORT_CHUNK) + j);
3000 		}
3001 	}
3002 }
3003 
3004 /*
3005  * sysctl interfaces
3006  */
3007 static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
3008 static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
3009 static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
3010 
3011 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
3012     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3013     0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
3014 
3015 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
3016     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3017     0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
3018 
3019 SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
3020     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3021     0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
3022     "A list of logical links");
3023 
3024 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
3025     CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
3026     0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
3027     "Nexus inet flows with stats collected in kernel");
3028 
3029 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
3030     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3031     0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
3032     "Nexus flow owners");
3033 
3034 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
3035     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3036     0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
3037     "Nexus flow routes");
3038 
3039 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
3040     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3041     0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
3042     "Nexus netif statistics collected in kernel");
3043 
3044 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
3045     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3046     0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
3047     "Nexus flowswitch statistics collected in kernel");
3048 
3049 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
3050     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3051     0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
3052     "Nexus userstack statistics counter");
3053 
3054 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
3055     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3056     0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
3057     "Nexus flow advisory dump");
3058 
3059 /*
3060  * Provider list sysctl
3061  */
3062 static void
nexus_provider_info_populate(struct kern_nexus_provider * nxprov,nexus_provider_info_t info)3063 nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
3064     nexus_provider_info_t info)
3065 {
3066 	struct kern_nexus *nx;
3067 	uuid_t *uuids;
3068 
3069 	SK_LOCK_ASSERT_HELD();
3070 
3071 	/* provider UUID + params */
3072 	uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
3073 	bcopy(nxprov->nxprov_params, &info->npi_prov_params,
3074 	    sizeof(struct nxprov_params));
3075 	info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
3076 
3077 	/* instance UUID list */
3078 	uuids = info->npi_instance_uuids;
3079 	STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
3080 		uuid_copy(*uuids, nx->nx_uuid);
3081 		uuids++;
3082 	}
3083 }
3084 
3085 static int
3086 nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
3087 {
3088 #pragma unused(arg1, arg2, oidp)
3089 	size_t actual_space;
3090 	caddr_t buffer = NULL;
3091 	size_t buffer_space;
3092 	size_t allocated_space;
3093 	int out_error;
3094 	int error = 0;
3095 	struct kern_nexus_provider *nxprov;
3096 	caddr_t scan;
3097 
3098 	if (!kauth_cred_issuser(kauth_cred_get())) {
3099 		return EPERM;
3100 	}
3101 
3102 	net_update_uptime();
3103 	buffer_space = req->oldlen;
3104 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3105 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3106 			buffer_space = SK_SYSCTL_ALLOC_MAX;
3107 		}
3108 		allocated_space = buffer_space;
3109 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3110 		if (__improbable(buffer == NULL)) {
3111 			return ENOBUFS;
3112 		}
3113 	} else if (req->oldptr == USER_ADDR_NULL) {
3114 		buffer_space = 0;
3115 	}
3116 	actual_space = 0;
3117 	scan = buffer;
3118 	SK_LOCK();
3119 	STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
3120 		size_t                  info_size;
3121 
3122 		info_size
3123 		        = NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
3124 		if (scan != NULL) {
3125 			if (buffer_space < info_size) {
3126 				/* supplied buffer too small, stop copying */
3127 				error = ENOMEM;
3128 				break;
3129 			}
3130 			nexus_provider_info_populate(nxprov, (void *)scan);
3131 			scan += info_size;
3132 			buffer_space -= info_size;
3133 		}
3134 		actual_space += info_size;
3135 	}
3136 	SK_UNLOCK();
3137 
3138 	out_error = SYSCTL_OUT(req, buffer, actual_space);
3139 	if (out_error != 0) {
3140 		error = out_error;
3141 	}
3142 
3143 	if (buffer != NULL) {
3144 		sk_free_data(buffer, allocated_space);
3145 	}
3146 
3147 	return error;
3148 }
3149 
3150 /*
3151  * Channel list sysctl
3152  */
3153 static uint32_t
channel_ring_count(struct kern_channel * ch,enum txrx which)3154 channel_ring_count(struct kern_channel *ch, enum txrx which)
3155 {
3156 	return ch->ch_last[which] - ch->ch_first[which];
3157 }
3158 
3159 static void
populate_ring_entries(struct __kern_channel_ring * kring,ring_id_t first,ring_id_t last,nexus_channel_ring_entry_t entries)3160 populate_ring_entries(struct __kern_channel_ring *kring,
3161     ring_id_t first, ring_id_t last, nexus_channel_ring_entry_t entries)
3162 {
3163 	ring_id_t i;
3164 	nexus_channel_ring_entry_t scan;
3165 	struct __kern_channel_ring *ring;
3166 
3167 	scan = entries;
3168 	for (i = first; i < last; i++, scan++) {
3169 		ring = &kring[i];
3170 
3171 		DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
3172 		    ring);
3173 		if (kr_stat_enable == 0) {
3174 			bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
3175 			bzero(&scan->ncre_user_stats,
3176 			    sizeof(scan->ncre_user_stats));
3177 		} else {
3178 			scan->ncre_stats = ring->ckr_stats;
3179 			scan->ncre_user_stats = ring->ckr_usr_stats;
3180 		}
3181 		scan->ncre_error_stats = ring->ckr_err_stats;
3182 		scan->ncre_ring_id = i;
3183 	}
3184 }
3185 
3186 /* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
3187 static uint32_t
nexus_channel_get_flags(uint32_t ch_mode,uint32_t ch_flags)3188 nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
3189 {
3190 	uint32_t flags = 0;
3191 
3192 	flags |= (ch_mode & CHMODE_MONITOR_TX) ? SCHF_MONITOR_TX : 0;
3193 	flags |= (ch_mode & CHMODE_MONITOR_RX) ? SCHF_MONITOR_RX : 0;
3194 	flags |= (ch_mode & CHMODE_MONITOR_NO_COPY) ? SCHF_MONITOR_NO_COPY : 0;
3195 	flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
3196 	flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
3197 	flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
3198 	flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
3199 	flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
3200 	flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
3201 	flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
3202 	flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
3203 	flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
3204 	flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
3205 
3206 	return flags;
3207 }
3208 
3209 SK_NO_INLINE_ATTRIBUTE
3210 static void
nexus_channel_entry_populate(struct kern_channel * ch,nexus_channel_entry_t entry)3211 nexus_channel_entry_populate(struct kern_channel *ch,
3212     nexus_channel_entry_t entry)
3213 {
3214 	uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
3215 	uint32_t ch_flags = ch->ch_flags;
3216 	ring_id_t rx_first = ch->ch_first[NR_RX];
3217 	ring_id_t rx_last = ch->ch_last[NR_RX];
3218 	ring_id_t tx_last = ch->ch_last[NR_TX];
3219 	ring_id_t tx_first = ch->ch_first[NR_TX];
3220 
3221 	uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
3222 	entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
3223 	entry->nce_port = ch->ch_info->cinfo_nx_port;
3224 	entry->nce_pid = ch->ch_pid;
3225 	entry->nce_fd = ch->ch_fd;
3226 	entry->nce_tx_rings = tx_last - tx_first;
3227 	entry->nce_rx_rings = rx_last - rx_first;
3228 	populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
3229 	    entry->nce_ring_entries);
3230 	populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
3231 	    entry->nce_ring_entries + entry->nce_tx_rings);
3232 }
3233 
3234 SK_NO_INLINE_ATTRIBUTE
3235 static size_t
nexus_channel_info_populate(struct kern_nexus * nx,nexus_channel_info_t info,size_t buffer_size)3236 nexus_channel_info_populate(struct kern_nexus *nx,
3237     nexus_channel_info_t info, size_t buffer_size)
3238 {
3239 	struct kern_channel *ch = NULL;
3240 	size_t info_size;
3241 	caddr_t scan = NULL;
3242 
3243 	SK_LOCK_ASSERT_HELD();
3244 
3245 	info_size = sizeof(*info);
3246 
3247 	/* channel list */
3248 	if (info != NULL) {
3249 		if (buffer_size < info_size) {
3250 			return info_size;
3251 		}
3252 
3253 		/* instance UUID */
3254 		uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
3255 		info->nci_channel_entries_count = nx->nx_ch_count;
3256 		scan = (caddr_t)info->nci_channel_entries;
3257 	}
3258 	STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
3259 		size_t          entry_size;
3260 		uint32_t        ring_count;
3261 
3262 		ring_count = channel_ring_count(ch, NR_TX) +
3263 		    channel_ring_count(ch, NR_RX);
3264 		entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
3265 		info_size += entry_size;
3266 		if (scan != NULL) {
3267 			if (buffer_size < info_size) {
3268 				return info_size;
3269 			}
3270 
3271 			nexus_channel_entry_populate(ch, (void *)scan);
3272 			scan += entry_size;
3273 		}
3274 	}
3275 	return info_size;
3276 }
3277 
3278 static int
3279 nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
3280 {
3281 #pragma unused(arg1, arg2, oidp)
3282 	size_t actual_space;
3283 	caddr_t buffer = NULL;
3284 	size_t buffer_space;
3285 	size_t allocated_space;
3286 	int out_error;
3287 	struct kern_nexus *nx;
3288 	int error = 0;
3289 	caddr_t scan;
3290 
3291 	if (!kauth_cred_issuser(kauth_cred_get())) {
3292 		return EPERM;
3293 	}
3294 
3295 	net_update_uptime();
3296 	buffer_space = req->oldlen;
3297 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3298 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3299 			buffer_space = SK_SYSCTL_ALLOC_MAX;
3300 		}
3301 		allocated_space = buffer_space;
3302 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3303 		if (__improbable(buffer == NULL)) {
3304 			return ENOBUFS;
3305 		}
3306 	} else if (req->oldptr == USER_ADDR_NULL) {
3307 		buffer_space = 0;
3308 	}
3309 	actual_space = 0;
3310 	scan = buffer;
3311 	SK_LOCK();
3312 	RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3313 		size_t info_size;
3314 
3315 		info_size = nexus_channel_info_populate(nx, (void *)scan,
3316 		    buffer_space);
3317 		if (scan != NULL) {
3318 			if (buffer_space < info_size) {
3319 				/* supplied buffer too small, stop copying */
3320 				error = ENOMEM;
3321 				break;
3322 			}
3323 			scan += info_size;
3324 			buffer_space -= info_size;
3325 		}
3326 		actual_space += info_size;
3327 	}
3328 	SK_UNLOCK();
3329 
3330 	if (actual_space != 0) {
3331 		out_error = SYSCTL_OUT(req, buffer, actual_space);
3332 		if (out_error != 0) {
3333 			error = out_error;
3334 		}
3335 	}
3336 	if (buffer != NULL) {
3337 		sk_free_data(buffer, allocated_space);
3338 	}
3339 
3340 	return error;
3341 }
3342 
3343 static int
3344 nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
3345 {
3346 #pragma unused(arg1, arg2)
3347 	struct proc *p = req->p;
3348 	struct nexus_mib_filter filter;
3349 	int error = 0;
3350 	size_t actual_space;
3351 	caddr_t buffer = NULL;
3352 	size_t buffer_space;
3353 	size_t allocated_space;
3354 	int out_error;
3355 	struct kern_nexus *nx;
3356 	caddr_t scan;
3357 
3358 	/* Restrict protocol stats access to root user only (like netstat). */
3359 	if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
3360 	    !kauth_cred_issuser(kauth_cred_get())) {
3361 		SK_ERR("mib request rejected, EPERM");
3362 		return EPERM;
3363 	}
3364 
3365 	if (req->newptr == USER_ADDR_NULL) {
3366 		/* use subcommand for multiple nodes */
3367 		filter.nmf_type = oidp->oid_arg2;
3368 		filter.nmf_bitmap = 0x0;
3369 	} else if (req->newlen != sizeof(struct nexus_mib_filter)) {
3370 		SK_ERR("mis-matching newlen");
3371 		return EINVAL;
3372 	} else {
3373 		error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
3374 		if (error != 0) {
3375 			SK_ERR("SYSCTL_IN err %d", error);
3376 			return error;
3377 		}
3378 		if (filter.nmf_type != oidp->oid_arg2) {
3379 			SK_ERR("mis-matching nmf_type");
3380 			return EINVAL;
3381 		}
3382 	}
3383 
3384 	net_update_uptime();
3385 	buffer_space = req->oldlen;
3386 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3387 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3388 			buffer_space = SK_SYSCTL_ALLOC_MAX;
3389 		}
3390 		allocated_space = buffer_space;
3391 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3392 		if (__improbable(buffer == NULL)) {
3393 			return ENOBUFS;
3394 		}
3395 	} else if (req->oldptr == USER_ADDR_NULL) {
3396 		buffer_space = 0;
3397 	}
3398 	actual_space = 0;
3399 	scan = buffer;
3400 
3401 	SK_LOCK();
3402 	RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3403 		if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
3404 			continue;
3405 		}
3406 
3407 		size_t size;
3408 		struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
3409 
3410 		size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
3411 		    buffer_space, p);
3412 
3413 		if (scan != NULL) {
3414 			if (buffer_space < size) {
3415 				/* supplied buffer too small, stop copying */
3416 				error = ENOMEM;
3417 				break;
3418 			}
3419 			scan += size;
3420 			buffer_space -= size;
3421 		}
3422 		actual_space += size;
3423 	}
3424 	SK_UNLOCK();
3425 
3426 	if (actual_space != 0) {
3427 		out_error = SYSCTL_OUT(req, buffer, actual_space);
3428 		if (out_error != 0) {
3429 			error = out_error;
3430 		}
3431 	}
3432 	if (buffer != NULL) {
3433 		sk_free_data(buffer, allocated_space);
3434 	}
3435 
3436 	return error;
3437 }
3438 
3439 void
kern_nexus_walktree(kern_nexus_walktree_f_t * f,void * arg0,boolean_t is_sk_locked)3440 kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
3441     boolean_t is_sk_locked)
3442 {
3443 	struct kern_nexus *nx = NULL;
3444 
3445 	if (!is_sk_locked) {
3446 		SK_LOCK();
3447 	} else {
3448 		SK_LOCK_ASSERT_HELD();
3449 	}
3450 
3451 	RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3452 		(*f)(nx, arg0);
3453 	}
3454 
3455 	if (!is_sk_locked) {
3456 		SK_UNLOCK();
3457 	}
3458 }
3459 
3460 errno_t
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,struct kern_pbufpool_memory_info * rx_pool_info,struct kern_pbufpool_memory_info * tx_pool_info)3461 kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
3462     struct kern_pbufpool_memory_info *rx_pool_info,
3463     struct kern_pbufpool_memory_info *tx_pool_info)
3464 {
3465 	struct kern_pbufpool *tpp, *rpp;
3466 	struct kern_nexus *nx;
3467 	errno_t err = 0;
3468 
3469 	nx = nx_find(nx_uuid, FALSE);
3470 	if (nx == NULL) {
3471 		err = ENOENT;
3472 		goto done;
3473 	}
3474 
3475 	if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
3476 		err = ENOTSUP;
3477 		goto done;
3478 	}
3479 
3480 	err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
3481 	if (err != 0) {
3482 		goto done;
3483 	}
3484 
3485 	if ((tpp == NULL) && (rpp == NULL)) {
3486 		err = ENOENT;
3487 		goto done;
3488 	}
3489 
3490 	if (tx_pool_info != NULL) {
3491 		bzero(tx_pool_info, sizeof(*tx_pool_info));
3492 	}
3493 	if (rx_pool_info != NULL) {
3494 		bzero(rx_pool_info, sizeof(*rx_pool_info));
3495 	}
3496 
3497 	if ((tx_pool_info != NULL) && (tpp != NULL)) {
3498 		err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
3499 		if (err != 0) {
3500 			goto done;
3501 		}
3502 	}
3503 
3504 	if ((rx_pool_info != NULL) && (rpp != NULL)) {
3505 		err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
3506 	}
3507 
3508 done:
3509 	if (nx != NULL) {
3510 		(void) nx_release(nx);
3511 		nx = NULL;
3512 	}
3513 	return err;
3514 }
3515 
3516 void
nx_interface_advisory_notify(struct kern_nexus * nx)3517 nx_interface_advisory_notify(struct kern_nexus *nx)
3518 {
3519 	struct kern_channel *ch;
3520 	struct netif_stats *nifs;
3521 	struct fsw_stats *fsw_stats;
3522 	nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
3523 
3524 	if (nxdom_type == NEXUS_TYPE_NET_IF) {
3525 		nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
3526 	} else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
3527 		fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
3528 	} else {
3529 		VERIFY(0);
3530 		__builtin_unreachable();
3531 	}
3532 	if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
3533 		if (nxdom_type == NEXUS_TYPE_NET_IF) {
3534 			STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
3535 		} else {
3536 			STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
3537 		}
3538 		return;
3539 	}
3540 	/*
3541 	 * if the channel is in "nx_ch_if_adv_head" list, then we can
3542 	 * safely assume that the channel is not closed yet.
3543 	 * In ch_close_common(), the channel is removed from the
3544 	 * "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
3545 	 * exclusive mode, prior to closing the channel.
3546 	 */
3547 	STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
3548 		struct nexus_adapter *na = ch->ch_na;
3549 
3550 		ASSERT(na != NULL);
3551 		na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
3552 		    TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
3553 		if (nxdom_type == NEXUS_TYPE_NET_IF) {
3554 			STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
3555 		} else {
3556 			STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
3557 		}
3558 	}
3559 	lck_rw_done(&nx->nx_ch_if_adv_lock);
3560 }
3561