1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <sys/sdt.h>
33
34 static uint32_t disable_nxctl_check = 0;
35 #if (DEVELOPMENT || DEBUG)
36 SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
37 CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
38 #endif
39
40 LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
41 LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
42 LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
43 LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
44
45 static STAILQ_HEAD(, nxctl) nxctl_head =
46 STAILQ_HEAD_INITIALIZER(nxctl_head);
47 static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
48 STAILQ_HEAD_INITIALIZER(nxprov_head);
49
50 static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
51 RB_HEAD(kern_nexus_tree, kern_nexus);
52 RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
53 RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
54 static struct kern_nexus_tree nx_head;
55
56 static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
57 static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
58 static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
59 static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
60 static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
61 static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
62 static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
63 static void nxctl_retain_locked(struct nxctl *);
64 static int nxctl_release_locked(struct nxctl *);
65 static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
66 static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
67 static void nxctl_free(struct nxctl *);
68
69 static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
70 struct kern_nexus_domain_provider *, struct nxprov_reg *,
71 const struct kern_nexus_provider_init *init, int *);
72 static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
73 static void nxprov_retain_locked(struct kern_nexus_provider *);
74 static int nxprov_release_locked(struct kern_nexus_provider *);
75 static struct kern_nexus_provider *nxprov_alloc(
76 struct kern_nexus_domain_provider *, zalloc_flags_t);
77 static void nxprov_free(struct kern_nexus_provider *);
78
79 static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
80 static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
81 static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
82 static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
83 static struct kern_nexus *nx_alloc(zalloc_flags_t);
84 static void nx_free(struct kern_nexus *);
85
86 static SKMEM_TYPE_DEFINE(nxctl_zone, struct nxctl);
87
88 static SKMEM_TYPE_DEFINE(nxbind_zone, struct nxbind);
89
90 static SKMEM_TYPE_DEFINE(nxprov_zone, struct kern_nexus_provider);
91
92 static SKMEM_TYPE_DEFINE(nxprov_params_zone, struct nxprov_params);
93
94 static SKMEM_TYPE_DEFINE(nx_zone, struct kern_nexus);
95
96 static int __nx_inited = 0;
97
98 #define SKMEM_TAG_NX_KEY "com.apple.skywalk.nexus.key"
99 SKMEM_TAG_DEFINE(skmem_tag_nx_key, SKMEM_TAG_NX_KEY);
100
101 #define SKMEM_TAG_NX_MIB "com.apple.skywalk.nexus.mib"
102 static SKMEM_TAG_DEFINE(skmem_tag_nx_mib, SKMEM_TAG_NX_MIB);
103
104 #define SKMEM_TAG_NX_PORT "com.apple.skywalk.nexus.port"
105 SKMEM_TAG_DEFINE(skmem_tag_nx_port, SKMEM_TAG_NX_PORT);
106
107 #define SKMEM_TAG_NX_PORT_INFO "com.apple.skywalk.nexus.port.info"
108 SKMEM_TAG_DEFINE(skmem_tag_nx_port_info, SKMEM_TAG_NX_PORT_INFO);
109
110 /*
111 * Special nexus controller handle for Skywalk internal use. Unlike all
112 * other nexus controller handles that are created by userland or kernel
113 * clients, this one never gets closed or freed. It is also not part of
114 * the global nxctl_head list.
115 */
116 static struct nxctl _kernnxctl;
117 struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
118
119 int
nexus_init(void)120 nexus_init(void)
121 {
122 SK_LOCK_ASSERT_HELD();
123 ASSERT(!__nx_inited);
124
125 RB_INIT(&nx_head);
126
127 na_init();
128
129 /* attach system built-in domains and domain providers */
130 nxdom_attach_all();
131
132 /*
133 * Initialize private kernel nexus controller handle; this is used
134 * internally for creating nexus providers and nexus instances from
135 * within the Skywalk code (e.g. netif_compat).
136 */
137 nxctl_init(&_kernnxctl, kernproc, NULL);
138 nxctl_retain_locked(&_kernnxctl); /* one for us */
139 nxctl_traffic_rule_init();
140
141 __nx_inited = 1;
142
143 return 0;
144 }
145
146 void
nexus_fini(void)147 nexus_fini(void)
148 {
149 SK_LOCK_ASSERT_HELD();
150
151 if (__nx_inited) {
152 nxctl_traffic_rule_fini();
153 nxctl_release_locked(&_kernnxctl);
154
155 /* tell all domains they're going away */
156 nxdom_detach_all();
157
158 ASSERT(RB_EMPTY(&nx_head));
159
160 na_fini();
161
162 __nx_inited = 0;
163 }
164 }
165
166 struct nxctl *
nxctl_create(struct proc * p,struct fileproc * fp,const uuid_t nxctl_uuid,int * err)167 nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
168 int *err)
169 {
170 struct nxctl *nxctl = NULL;
171
172 ASSERT(!uuid_is_null(nxctl_uuid));
173
174 /* privilege checks would be done when performing nxctl operations */
175
176 SK_LOCK();
177
178 nxctl = nxctl_alloc(p, fp, Z_WAITOK);
179
180 STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
181 nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
182 uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
183
184 nxctl_retain_locked(nxctl); /* one for being in the list */
185 nxctl_retain_locked(nxctl); /* one for the caller */
186
187 #if SK_LOG
188 uuid_string_t uuidstr;
189 SK_D("nxctl 0x%llx UUID %s", SK_KVA(nxctl),
190 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
191 #endif /* SK_LOG */
192
193 SK_UNLOCK();
194
195 if (*err != 0) {
196 nxctl_free(nxctl);
197 nxctl = NULL;
198 }
199 return nxctl;
200 }
201
202 void
nxctl_close(struct nxctl * nxctl)203 nxctl_close(struct nxctl *nxctl)
204 {
205 struct kern_nexus_provider *nxprov = NULL, *tnxprov;
206
207 lck_mtx_lock(&nxctl->nxctl_lock);
208 SK_LOCK();
209
210 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
211
212 #if SK_LOG
213 uuid_string_t uuidstr;
214 SK_D("nxctl 0x%llx UUID %s flags 0x%b", SK_KVA(nxctl),
215 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
216 nxctl->nxctl_flags, NEXUSCTLF_BITS);
217 #endif /* SK_LOG */
218
219 if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
220 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
221 nxctl->nxctl_fp = NULL;
222 }
223
224 /* may be called as part of failure cleanup, so check */
225 if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
226 /* caller must hold an extra ref */
227 ASSERT(nxctl->nxctl_refcnt > 1);
228 (void) nxctl_release_locked(nxctl);
229
230 STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
231 nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
232 }
233
234 repeat:
235 STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
236 /*
237 * Close provider only for those which are owned by
238 * this control instance. Note that if we close the
239 * provider, we need to repeat this search as the
240 * list might have been changed by another thread.
241 * That's possible since SK_UNLOCK() may be called
242 * as a result of calling nxprov_close().
243 */
244 if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
245 nxprov->nxprov_ctl == nxctl) {
246 nxprov_retain_locked(nxprov);
247 (void) nxprov_close(nxprov, TRUE);
248 (void) nxprov_release_locked(nxprov);
249 goto repeat;
250 }
251 }
252
253 SK_UNLOCK();
254 lck_mtx_unlock(&nxctl->nxctl_lock);
255 nxctl_traffic_rule_clean(nxctl);
256 }
257
258 int
nxctl_set_opt(struct nxctl * nxctl,struct sockopt * sopt)259 nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
260 {
261 #pragma unused(nxctl)
262 int err = 0;
263
264 NXCTL_LOCK_ASSERT_HELD(nxctl);
265
266 if (sopt->sopt_dir != SOPT_SET) {
267 sopt->sopt_dir = SOPT_SET;
268 }
269
270 switch (sopt->sopt_name) {
271 case NXOPT_NEXUS_BIND:
272 err = nxctl_nexus_bind(nxctl, sopt);
273 break;
274
275 case NXOPT_NEXUS_UNBIND:
276 err = nxctl_nexus_unbind(nxctl, sopt);
277 break;
278
279 case NXOPT_NEXUS_CONFIG:
280 err = nxctl_nexus_config(nxctl, sopt);
281 break;
282
283 default:
284 err = ENOPROTOOPT;
285 break;
286 }
287
288 return err;
289 }
290
291 int
nxctl_get_opt(struct nxctl * nxctl,struct sockopt * sopt)292 nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
293 {
294 #pragma unused(nxctl)
295 int err = 0;
296
297 NXCTL_LOCK_ASSERT_HELD(nxctl);
298
299 if (sopt->sopt_dir != SOPT_GET) {
300 sopt->sopt_dir = SOPT_GET;
301 }
302
303 switch (sopt->sopt_name) {
304 case NXOPT_NEXUS_PROV_LIST:
305 err = nxctl_get_nexus_prov_list(nxctl, sopt);
306 break;
307
308 case NXOPT_NEXUS_PROV_ENTRY:
309 err = nxctl_get_nexus_prov_entry(nxctl, sopt);
310 break;
311
312 case NXOPT_NEXUS_LIST:
313 err = nxctl_get_nexus_list(nxctl, sopt);
314 break;
315
316 case NXOPT_CHANNEL_LIST:
317 err = nxctl_get_channel_list(nxctl, sopt);
318 break;
319
320 default:
321 err = ENOPROTOOPT;
322 break;
323 }
324
325 return err;
326 }
327
328 /* Upper bound on # of nrl_num_regs that we'd return to user space */
329 #define MAX_NUM_REG_ENTRIES 256
330
331 /* Hoisted out of line to reduce kernel stack footprint */
332 SK_NO_INLINE_ATTRIBUTE
333 static int
nxctl_get_nexus_prov_list(struct nxctl * nxctl,struct sockopt * sopt)334 nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
335 {
336 user_addr_t tmp_ptr = USER_ADDR_NULL;
337 struct nxprov_reg_ent *pnre, *nres = NULL;
338 struct nxprov_list_req nrlr;
339 struct kern_nexus_provider *nxprov = NULL;
340 uint32_t nregs = 0, ncregs = 0;
341 int err = 0, observeall;
342 size_t nres_sz;
343
344 NXCTL_LOCK_ASSERT_HELD(nxctl);
345
346 ASSERT(sopt->sopt_p != NULL);
347 if (sopt->sopt_val == USER_ADDR_NULL) {
348 return EINVAL;
349 }
350
351 err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
352 if (err != 0) {
353 return err;
354 }
355
356 if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
357 nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
358 }
359
360 /*
361 * If the caller specified a buffer, copy out the Nexus provider
362 * entries to caller gracefully. We only copy out the number of
363 * entries which caller has asked for, but we always tell caller
364 * how big the buffer really needs to be.
365 */
366 tmp_ptr = nrlr.nrl_regs;
367 if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
368 nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
369 nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
370 if (__improbable(nres == NULL)) {
371 return ENOBUFS;
372 }
373 }
374
375 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
376 PRIV_SKYWALK_OBSERVE_ALL) == 0);
377
378 SK_LOCK();
379 /*
380 * Count number of providers. If buffer space exists and
381 * remains, copy out provider entries.
382 */
383 nregs = nrlr.nrl_num_regs;
384 pnre = nres;
385
386 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
387 /*
388 * Return only entries that are visible to the caller,
389 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
390 */
391 if (nxprov->nxprov_ctl != nxctl && !observeall) {
392 continue;
393 }
394
395 if (nres != NULL && nregs > 0) {
396 uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
397 bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
398 sizeof(struct nxprov_params));
399 --nregs;
400 ++pnre;
401 ++ncregs;
402 }
403 }
404 SK_UNLOCK();
405
406 if (ncregs == 0) {
407 err = ENOENT;
408 }
409
410 if (nres != NULL) {
411 if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
412 if (sopt->sopt_p != kernproc) {
413 err = copyout(nres, tmp_ptr,
414 ncregs * sizeof(*nres));
415 } else {
416 bcopy(nres, CAST_DOWN(caddr_t, tmp_ptr),
417 ncregs * sizeof(*nres));
418 }
419 }
420 sk_free_data(nres, nres_sz);
421 nres = NULL;
422 }
423
424 if (err == 0) {
425 nrlr.nrl_num_regs = ncregs;
426 err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
427 }
428
429 return err;
430 }
431
432 /* Hoisted out of line to reduce kernel stack footprint */
433 SK_NO_INLINE_ATTRIBUTE
434 static int
nxctl_get_nexus_prov_entry(struct nxctl * nxctl,struct sockopt * sopt)435 nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
436 {
437 struct nxprov_reg_ent nre;
438 struct kern_nexus_provider *nxprov = NULL;
439 int err = 0;
440
441 NXCTL_LOCK_ASSERT_HELD(nxctl);
442
443 ASSERT(sopt->sopt_p != NULL);
444 if (sopt->sopt_val == USER_ADDR_NULL) {
445 return EINVAL;
446 }
447
448 bzero(&nre, sizeof(nre));
449 err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
450 if (err != 0) {
451 return err;
452 }
453
454 if (uuid_is_null(nre.npre_prov_uuid)) {
455 return EINVAL;
456 }
457
458 SK_LOCK();
459 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
460 if (uuid_compare(nxprov->nxprov_uuid,
461 nre.npre_prov_uuid) == 0) {
462 /*
463 * Return only entries that are visible to the caller,
464 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
465 */
466 if (nxprov->nxprov_ctl != nxctl) {
467 if (skywalk_priv_check_cred(sopt->sopt_p,
468 nxctl->nxctl_cred,
469 PRIV_SKYWALK_OBSERVE_ALL) != 0) {
470 nxprov = NULL;
471 break;
472 }
473 }
474
475 bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
476 sizeof(struct nxprov_params));
477 break;
478 }
479 }
480 SK_UNLOCK();
481
482 if (nxprov != NULL) {
483 err = sooptcopyout(sopt, &nre, sizeof(nre));
484 } else {
485 err = ENOENT;
486 }
487
488 return err;
489 }
490
491 /* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
492 #define MAX_NUM_NX_UUIDS 4096
493
494 /* Hoisted out of line to reduce kernel stack footprint */
495 SK_NO_INLINE_ATTRIBUTE
496 static int
nxctl_get_nexus_list(struct nxctl * nxctl,struct sockopt * sopt)497 nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
498 {
499 user_addr_t tmp_ptr = USER_ADDR_NULL;
500 uint32_t nuuids = 0, ncuuids = 0;
501 uuid_t *puuid, *uuids = NULL;
502 size_t uuids_sz;
503 struct nx_list_req nlr;
504 struct kern_nexus_provider *nxprov = NULL;
505 struct kern_nexus *nx = NULL;
506 int err = 0, observeall;
507
508 NXCTL_LOCK_ASSERT_HELD(nxctl);
509
510 ASSERT(sopt->sopt_p != NULL);
511 if (sopt->sopt_val == USER_ADDR_NULL) {
512 return EINVAL;
513 }
514
515 err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
516 if (err != 0) {
517 return err;
518 }
519
520 if (uuid_is_null(nlr.nl_prov_uuid)) {
521 return EINVAL;
522 } else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
523 nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
524 }
525
526 /*
527 * If the caller specified a buffer, copy out the Nexus UUIDs to
528 * caller gracefully. We only copy out the number of UUIDs which
529 * caller has asked for, but we always tell caller how big the
530 * buffer really needs to be.
531 */
532 tmp_ptr = nlr.nl_nx_uuids;
533 if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
534 uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
535 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
536 if (__improbable(uuids == NULL)) {
537 return ENOBUFS;
538 }
539 }
540
541 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
542 PRIV_SKYWALK_OBSERVE_ALL) == 0);
543
544 SK_LOCK();
545 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
546 /*
547 * Return only entries that are visible to the caller,
548 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
549 */
550 if (nxprov->nxprov_ctl != nxctl && !observeall) {
551 continue;
552 }
553
554 if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
555 break;
556 }
557 }
558
559 if (nxprov != NULL) {
560 /*
561 * Count number of Nexus. If buffer space exists
562 * and remains, copy out the Nexus UUIDs.
563 */
564 nuuids = nlr.nl_num_nx_uuids;
565 puuid = uuids;
566
567 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
568 ++ncuuids;
569 if (uuids != NULL && nuuids > 0) {
570 uuid_copy(*puuid, nx->nx_uuid);
571 --nuuids;
572 ++puuid;
573 }
574 }
575 } else {
576 err = ENOENT;
577 }
578 SK_UNLOCK();
579
580 if (uuids != NULL) {
581 if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
582 uintptr_t cnt_uuid;
583
584 /* Note: Pointer arithmetic */
585 cnt_uuid = (uintptr_t)(puuid - uuids);
586 if (cnt_uuid > 0) {
587 if (sopt->sopt_p != kernproc) {
588 err = copyout(uuids, tmp_ptr,
589 cnt_uuid * sizeof(uuid_t));
590 } else {
591 bcopy(uuids,
592 CAST_DOWN(caddr_t, tmp_ptr),
593 cnt_uuid * sizeof(uuid_t));
594 }
595 }
596 }
597 sk_free_data(uuids, uuids_sz);
598 uuids = NULL;
599 }
600
601 if (err == 0) {
602 nlr.nl_num_nx_uuids = ncuuids;
603 err = sooptcopyout(sopt, &nlr, sizeof(nlr));
604 }
605
606 return err;
607 }
608
609 /* Hoisted out of line to reduce kernel stack footprint */
610 SK_NO_INLINE_ATTRIBUTE
611 static int
nxctl_nexus_bind(struct nxctl * nxctl,struct sockopt * sopt)612 nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
613 {
614 boolean_t m_pid, m_exec_uuid, m_key;
615 struct nx_bind_req nbr;
616 struct proc *p = PROC_NULL;
617 struct nxbind *nxb = NULL;
618 uint64_t p_uniqueid = -1;
619 pid_t p_pid = -1;
620 struct kern_nexus *nx = NULL;
621 #if SK_LOG
622 uuid_string_t exec_uuidstr;
623 #endif /* SK_LOG */
624 uuid_t p_uuid;
625 void *key = NULL;
626 int err = 0;
627
628 NXCTL_LOCK_ASSERT_HELD(nxctl);
629
630 if (sopt->sopt_val == USER_ADDR_NULL) {
631 return EINVAL;
632 }
633
634 uuid_clear(p_uuid);
635 bzero(&nbr, sizeof(nbr));
636 err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
637 if (err != 0) {
638 return err;
639 }
640
641 if (uuid_is_null(nbr.nb_nx_uuid)) {
642 err = EINVAL;
643 goto done_unlocked;
644 }
645
646 nbr.nb_flags &= NBR_MATCH_MASK;
647 if (nbr.nb_flags == 0) {
648 /* must choose one of the match criteria */
649 err = EINVAL;
650 goto done_unlocked;
651 }
652 m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
653 m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
654 m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
655
656 if (m_pid || m_exec_uuid) {
657 /*
658 * Validate process ID. A valid PID is needed when we're
659 * asked to match by PID, or if asked to match by executable
660 * UUID with a NULL nb_exec_uuid supplied. The latter is
661 * to support the case when a userland Nexus provider isn't
662 * able to acquire its client's executable UUID, but is
663 * able to identify it via PID.
664 */
665 if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
666 (p = proc_find(nbr.nb_pid)) == PROC_NULL) {
667 err = ESRCH;
668 goto done_unlocked;
669 }
670 /* exclude kernel from the match criteria */
671 if (p == kernproc) {
672 err = EACCES;
673 goto done_unlocked;
674 } else if (p != PROC_NULL) {
675 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
676 p_uniqueid = proc_uniqueid(p);
677 p_pid = proc_pid(p);
678 } else {
679 uuid_copy(p_uuid, nbr.nb_exec_uuid);
680 }
681 }
682
683 if (m_key) {
684 if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
685 nbr.nb_key == USER_ADDR_NULL) {
686 err = EINVAL;
687 goto done_unlocked;
688 }
689
690 key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
691 if (__improbable(key == NULL)) {
692 err = ENOMEM;
693 goto done_unlocked;
694 }
695
696 if (sopt->sopt_p != kernproc) {
697 err = copyin(nbr.nb_key, key, nbr.nb_key_len);
698 if (err != 0) {
699 goto done_unlocked;
700 }
701 } else {
702 bcopy((void *)nbr.nb_key, key, nbr.nb_key_len);
703 }
704 }
705
706 SK_LOCK();
707 nx = nx_find(nbr.nb_nx_uuid, TRUE);
708 if (nx == NULL || (disable_nxctl_check == 0 &&
709 nx->nx_prov->nxprov_ctl != nxctl &&
710 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
711 err = ENOENT;
712 goto done;
713 }
714
715 /* bind isn't applicable on anonymous nexus provider */
716 if (NX_ANONYMOUS_PROV(nx)) {
717 err = ENXIO;
718 goto done;
719 }
720
721 /* port must be within the domain's range */
722 if (nbr.nb_port != NEXUS_PORT_ANY &&
723 nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
724 err = EDOM;
725 goto done;
726 } else if (nbr.nb_port == NEXUS_PORT_ANY) {
727 /* for now, this is allowed only for kernel clients */
728 if (sopt->sopt_p != kernproc) {
729 err = EPERM;
730 goto done;
731 }
732 }
733
734 nxb = nxb_alloc(Z_WAITOK);
735
736 if (m_pid) {
737 nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
738 nxb->nxb_uniqueid = p_uniqueid;
739 nxb->nxb_pid = p_pid;
740 }
741 if (m_exec_uuid) {
742 nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
743 ASSERT(!uuid_is_null(p_uuid));
744 uuid_copy(nxb->nxb_exec_uuid, p_uuid);
745 }
746 if (m_key) {
747 nxb->nxb_flags |= NXBF_MATCH_KEY;
748 ASSERT(key != NULL);
749 nxb->nxb_key = key;
750 key = NULL; /* let nxb_free() free it */
751 ASSERT(nbr.nb_key_len != 0 &&
752 nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
753 nxb->nxb_key_len = nbr.nb_key_len;
754 }
755
756 /*
757 * Bind the creds to the nexus port. If client doesn't have a port,
758 * find one, claim it, and associate the creds to it. Upon success,
759 * the nexus may move the nxbind contents (including the key) to
760 * its own nxbind instance; in that case, nxb_free() below will not
761 * be freeing the key within.
762 */
763 err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
764 if (err != 0) {
765 goto done;
766 }
767
768 ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
769 (void) sooptcopyout(sopt, &nbr, sizeof(nbr));
770
771 SK_D("nexus 0x%llx nxb 0x%llx port %u flags 0x%b pid %d "
772 "(uniqueid %llu) exec_uuid %s key 0x%llx key_len %u",
773 SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
774 NXBF_BITS, nxb->nxb_pid, nxb->nxb_uniqueid,
775 sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
776 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
777 nxb->nxb_key_len);
778
779 done:
780 if (nx != NULL) {
781 (void) nx_release_locked(nx);
782 nx = NULL;
783 }
784 SK_UNLOCK();
785
786 done_unlocked:
787 ASSERT(nx == NULL);
788
789 if (nxb != NULL) {
790 nxb_free(nxb);
791 nxb = NULL;
792 }
793 if (key != NULL) {
794 sk_free_data(key, nbr.nb_key_len);
795 key = NULL;
796 }
797 if (p != PROC_NULL) {
798 proc_rele(p);
799 }
800
801 return err;
802 }
803
804 /* Hoisted out of line to reduce kernel stack footprint */
805 SK_NO_INLINE_ATTRIBUTE
806 static int
nxctl_nexus_unbind(struct nxctl * nxctl,struct sockopt * sopt)807 nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
808 {
809 struct nx_unbind_req nur;
810 struct kern_nexus *nx = NULL;
811 int err = 0;
812
813 NXCTL_LOCK_ASSERT_HELD(nxctl);
814
815 if (sopt->sopt_val == USER_ADDR_NULL) {
816 return EINVAL;
817 }
818
819 bzero(&nur, sizeof(nur));
820 err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
821 if (err != 0) {
822 return err;
823 }
824
825 if (uuid_is_null(nur.nu_nx_uuid)) {
826 return EINVAL;
827 }
828
829 SK_LOCK();
830 nx = nx_find(nur.nu_nx_uuid, TRUE);
831 if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
832 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
833 err = ENOENT;
834 goto done;
835 }
836
837 /* unbind isn't applicable on anonymous nexus provider */
838 if (NX_ANONYMOUS_PROV(nx)) {
839 err = ENXIO;
840 goto done;
841 }
842
843 if (nur.nu_port == NEXUS_PORT_ANY) {
844 err = EINVAL;
845 goto done;
846 }
847
848 err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
849
850 done:
851 if (nx != NULL) {
852 (void) nx_release_locked(nx);
853 nx = NULL;
854 }
855 SK_UNLOCK();
856
857 return err;
858 }
859
860 /* Hoisted out of line to reduce kernel stack footprint */
861 SK_NO_INLINE_ATTRIBUTE
862 static int
nxctl_nexus_config(struct nxctl * nxctl,struct sockopt * sopt)863 nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
864 {
865 struct kern_nexus *nx = NULL;
866 struct nx_cfg_req ncr;
867 int err = 0;
868
869 NXCTL_LOCK_ASSERT_HELD(nxctl);
870
871 if (sopt->sopt_val == USER_ADDR_NULL) {
872 return EINVAL;
873 }
874
875 bzero(&ncr, sizeof(ncr));
876 err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
877 if (err != 0) {
878 return err;
879 }
880
881 if (uuid_is_null(ncr.nc_nx_uuid)) {
882 return EINVAL;
883 }
884
885 SK_LOCK();
886 nx = nx_find(ncr.nc_nx_uuid, TRUE);
887 if (nx == NULL || (disable_nxctl_check == 0 &&
888 nx->nx_prov->nxprov_ctl != nxctl &&
889 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
890 err = ENOENT;
891 goto done;
892 }
893
894 if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
895 err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
896 nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
897 } else {
898 err = EPERM;
899 }
900
901 if (err == 0) {
902 (void) sooptcopyout(sopt, &ncr, sizeof(ncr));
903 }
904 done:
905 if (nx != NULL) {
906 (void) nx_release_locked(nx);
907 nx = NULL;
908 }
909 SK_UNLOCK();
910
911 return err;
912 }
913
914 struct nxbind *
nxb_alloc(zalloc_flags_t how)915 nxb_alloc(zalloc_flags_t how)
916 {
917 struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
918
919 if (nxb) {
920 SK_DF(SK_VERB_MEM, "nxb 0x%llx ALLOC", SK_KVA(nxb));
921 }
922 return nxb;
923 }
924
925 void
nxb_free(struct nxbind * nxb)926 nxb_free(struct nxbind *nxb)
927 {
928 SK_DF(SK_VERB_MEM, "nxb 0x%llx key 0x%llx FREE", SK_KVA(nxb),
929 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
930
931 if (nxb->nxb_key != NULL) {
932 sk_free_data(nxb->nxb_key, nxb->nxb_key_len);
933 nxb->nxb_key = NULL;
934 }
935 zfree(nxbind_zone, nxb);
936 }
937
938 /*
939 * nxb0 is assumed to possess the truth, compare nxb1 against it.
940 */
941 boolean_t
nxb_is_equal(struct nxbind * nxb0,struct nxbind * nxb1)942 nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
943 {
944 ASSERT(nxb0 != NULL && nxb1 != NULL);
945 ASSERT(nxb0 != nxb1);
946
947 /* we always compare using uniqueid and not pid */
948 if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
949 nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
950 return FALSE;
951 }
952
953 if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
954 uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
955 return FALSE;
956 }
957
958 ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
959 (nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
960
961 if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
962 (nxb0->nxb_key_len != nxb1->nxb_key_len ||
963 nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
964 nxb1->nxb_key_len) != 0)) {
965 return FALSE;
966 }
967
968 return TRUE;
969 }
970
971 void
nxb_move(struct nxbind * snxb,struct nxbind * dnxb)972 nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
973 {
974 ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
975 (snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
976
977 /* in case the destination has a key attached, free it first */
978 if (dnxb->nxb_key != NULL) {
979 sk_free_data(dnxb->nxb_key, dnxb->nxb_key_len);
980 dnxb->nxb_key = NULL;
981 }
982
983 /* move everything from src to dst, and then wipe out src */
984 bcopy(snxb, dnxb, sizeof(*dnxb));
985 bzero(snxb, sizeof(*snxb));
986 }
987
988 /* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
989 #define MAX_NUM_CH_UUIDS 4096
990
991 /* Hoisted out of line to reduce kernel stack footprint */
992 SK_NO_INLINE_ATTRIBUTE
993 static int
nxctl_get_channel_list(struct nxctl * nxctl,struct sockopt * sopt)994 nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
995 {
996 user_addr_t tmp_ptr = USER_ADDR_NULL;
997 uint32_t nuuids = 0, ncuuids = 0;
998 uuid_t *puuid, *uuids = NULL;
999 size_t uuids_sz;
1000 struct ch_list_req clr;
1001 struct kern_channel *ch = NULL;
1002 struct kern_nexus *nx = NULL;
1003 struct kern_nexus find;
1004 int err = 0, observeall;
1005
1006 NXCTL_LOCK_ASSERT_HELD(nxctl);
1007
1008 ASSERT(sopt->sopt_p != NULL);
1009 if (sopt->sopt_val == USER_ADDR_NULL) {
1010 return EINVAL;
1011 }
1012
1013 err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
1014 if (err != 0) {
1015 return err;
1016 }
1017
1018 if (uuid_is_null(clr.cl_nx_uuid)) {
1019 return EINVAL;
1020 } else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
1021 clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
1022 }
1023
1024 /*
1025 * If the caller specified a buffer, copy out the Channel UUIDs to
1026 * caller gracefully. We only copy out the number of UUIDs which
1027 * caller has asked for, but we always tell caller how big the
1028 * buffer really needs to be.
1029 */
1030 tmp_ptr = clr.cl_ch_uuids;
1031 if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
1032 uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
1033 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
1034 if (uuids == NULL) {
1035 return ENOBUFS;
1036 }
1037 }
1038
1039 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
1040 PRIV_SKYWALK_OBSERVE_ALL) == 0);
1041
1042 SK_LOCK();
1043 uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
1044 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1045 if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
1046 /*
1047 * Return only entries that are visible to the caller,
1048 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
1049 */
1050 nx = NULL;
1051 }
1052 if (nx != NULL) {
1053 /*
1054 * Count number of Channels. If buffer space exists
1055 * and remains, copy out the Channel UUIDs.
1056 */
1057 nuuids = clr.cl_num_ch_uuids;
1058 puuid = uuids;
1059
1060 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
1061 ++ncuuids;
1062 if (uuids != NULL && nuuids > 0) {
1063 uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
1064 --nuuids;
1065 ++puuid;
1066 }
1067 }
1068 } else {
1069 err = ENOENT;
1070 }
1071 SK_UNLOCK();
1072
1073 if (uuids != NULL) {
1074 if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
1075 uintptr_t cnt_uuid;
1076
1077 /* Note: Pointer arithmetic */
1078 cnt_uuid = (uintptr_t)(puuid - uuids);
1079 ASSERT(cnt_uuid > 0);
1080
1081 if (sopt->sopt_p != kernproc) {
1082 err = copyout(uuids, tmp_ptr,
1083 cnt_uuid * sizeof(uuid_t));
1084 } else {
1085 bcopy(uuids, CAST_DOWN(caddr_t, tmp_ptr),
1086 cnt_uuid * sizeof(uuid_t));
1087 }
1088 }
1089 sk_free_data(uuids, uuids_sz);
1090 uuids = NULL;
1091 }
1092
1093 if (err == 0) {
1094 clr.cl_num_ch_uuids = ncuuids;
1095 err = sooptcopyout(sopt, &clr, sizeof(clr));
1096 }
1097
1098 return err;
1099 }
1100
1101 static void
nxctl_init(struct nxctl * nxctl,struct proc * p,struct fileproc * fp)1102 nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
1103 {
1104 uuid_t p_uuid;
1105
1106 bzero(nxctl, sizeof(*nxctl));
1107
1108 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
1109
1110 lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
1111 uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
1112 nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
1113 nxctl->nxctl_cred = kauth_cred_proc_ref(p);
1114 nxctl->nxctl_fp = fp;
1115 if (nxctl == &_kernnxctl) {
1116 ASSERT(p == kernproc);
1117 nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
1118 }
1119 if (fp == NULL) {
1120 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
1121 }
1122 }
1123
1124 static struct nxctl *
nxctl_alloc(struct proc * p,struct fileproc * fp,zalloc_flags_t how)1125 nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
1126 {
1127 struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
1128
1129 if (nxctl != NULL) {
1130 nxctl_init(nxctl, p, fp);
1131 }
1132 return nxctl;
1133 }
1134
1135 static void
nxctl_free(struct nxctl * nxctl)1136 nxctl_free(struct nxctl *nxctl)
1137 {
1138 ASSERT(nxctl->nxctl_refcnt == 0);
1139 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
1140 kauth_cred_unref(&nxctl->nxctl_cred);
1141 lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
1142 SK_D("nxctl 0x%llx FREE", SK_KVA(nxctl));
1143 if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
1144 zfree(nxctl_zone, nxctl);
1145 }
1146 }
1147
1148 static void
nxctl_retain_locked(struct nxctl * nxctl)1149 nxctl_retain_locked(struct nxctl *nxctl)
1150 {
1151 SK_LOCK_ASSERT_HELD();
1152
1153 nxctl->nxctl_refcnt++;
1154 ASSERT(nxctl->nxctl_refcnt != 0);
1155 }
1156
1157 void
nxctl_retain(struct nxctl * nxctl)1158 nxctl_retain(struct nxctl *nxctl)
1159 {
1160 SK_LOCK();
1161 nxctl_retain_locked(nxctl);
1162 SK_UNLOCK();
1163 }
1164
1165 static int
nxctl_release_locked(struct nxctl * nxctl)1166 nxctl_release_locked(struct nxctl *nxctl)
1167 {
1168 int oldref = nxctl->nxctl_refcnt;
1169
1170 SK_LOCK_ASSERT_HELD();
1171
1172 ASSERT(nxctl->nxctl_refcnt != 0);
1173 if (--nxctl->nxctl_refcnt == 0) {
1174 nxctl_free(nxctl);
1175 }
1176
1177 return oldref == 1;
1178 }
1179
1180 int
nxctl_release(struct nxctl * nxctl)1181 nxctl_release(struct nxctl *nxctl)
1182 {
1183 int lastref;
1184
1185 SK_LOCK();
1186 lastref = nxctl_release_locked(nxctl);
1187 SK_UNLOCK();
1188
1189 return lastref;
1190 }
1191
1192 void
nxctl_dtor(void * arg)1193 nxctl_dtor(void *arg)
1194 {
1195 struct nxctl *nxctl = arg;
1196
1197 nxctl_close(nxctl);
1198 SK_LOCK();
1199 (void) nxctl_release_locked(nxctl);
1200 SK_UNLOCK();
1201 }
1202
1203 int
nxprov_advise_connect(struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1204 nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
1205 struct proc *p)
1206 {
1207 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1208 int err = 0;
1209
1210 ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
1211 ASSERT(ch->ch_ctx == NULL);
1212
1213 SK_LOCK_ASSERT_HELD();
1214 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1215
1216 /* monitor channels aren't externally visible/usable, so ignore */
1217 if ((ch->ch_info->cinfo_ch_mode & CHMODE_MONITOR) ||
1218 (ch->ch_flags & CHANF_EXT_SKIP) ||
1219 (nxprov->nxprov_ext.nxpi_pre_connect == NULL ||
1220 nxprov->nxprov_ext.nxpi_connected == NULL)) {
1221 return 0;
1222 }
1223
1224 ch_retain_locked(ch);
1225 lck_mtx_unlock(&ch->ch_lock);
1226 SK_UNLOCK();
1227 lck_mtx_lock(&ch->ch_lock);
1228
1229 err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
1230 ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
1231 if (err != 0) {
1232 SK_D("ch 0x%llx flags %b nx 0x%llx pre_connect "
1233 "error %d", SK_KVA(ch), ch->ch_flags,
1234 CHANF_BITS, SK_KVA(nx), err);
1235 ch->ch_ctx = NULL;
1236 goto done;
1237 }
1238 /*
1239 * Upon ring/slot init failure, this is cleared
1240 * by nxprov_advise_disconnect() below.
1241 */
1242 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1243 if (NXPROV_LLINK(nxprov)) {
1244 err = nx_netif_llink_ext_init_default_queues(nx);
1245 } else {
1246 err = nx_init_rings(nx, ch);
1247 }
1248 if (err != 0) {
1249 goto done;
1250 }
1251 ASSERT(err == 0);
1252 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
1253 CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
1254
1255 err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
1256 if (err != 0) {
1257 SK_D("ch 0x%llx flags %b nx 0x%llx connected error %d",
1258 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx), err);
1259 goto done;
1260 }
1261 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1262 SK_D("ch 0x%llx flags %b nx 0x%llx connected",
1263 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1264
1265
1266 done:
1267 lck_mtx_unlock(&ch->ch_lock);
1268 SK_LOCK();
1269 lck_mtx_lock(&ch->ch_lock);
1270 if ((err != 0) &&
1271 (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
1272 nxprov_advise_disconnect(nx, ch);
1273 }
1274 /* caller is expected to hold one, in addition to ourselves */
1275 VERIFY(ch->ch_refcnt >= 2);
1276 ch_release_locked(ch);
1277
1278 return err;
1279 }
1280
1281 void
nxprov_advise_disconnect(struct kern_nexus * nx,struct kern_channel * ch)1282 nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
1283 {
1284 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1285
1286 SK_LOCK_ASSERT_HELD();
1287 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1288
1289 /* check as we might be called in the error handling path */
1290 if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
1291 ch_retain_locked(ch);
1292 lck_mtx_unlock(&ch->ch_lock);
1293 SK_UNLOCK();
1294 lck_mtx_lock(&ch->ch_lock);
1295
1296 ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
1297 if (ch->ch_flags & CHANF_EXT_CONNECTED) {
1298 nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
1299 atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1300 }
1301
1302 /*
1303 * Inform the external domain provider that the rings
1304 * and slots for this channel are no longer valid.
1305 */
1306 if (NXPROV_LLINK(nxprov)) {
1307 nx_netif_llink_ext_fini_default_queues(nx);
1308 } else {
1309 nx_fini_rings(nx, ch);
1310 }
1311
1312 ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
1313 nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
1314 atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1315
1316 SK_D("ch 0x%llx flags %b nx 0x%llx disconnected",
1317 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1318
1319 /* We're done with this channel */
1320 ch->ch_ctx = NULL;
1321
1322 lck_mtx_unlock(&ch->ch_lock);
1323 SK_LOCK();
1324 lck_mtx_lock(&ch->ch_lock);
1325 /* caller is expected to hold one, in addition to ourselves */
1326 VERIFY(ch->ch_refcnt >= 2);
1327 ch_release_locked(ch);
1328 }
1329 ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
1330 ASSERT(ch->ch_ctx == NULL);
1331 }
1332
1333 static struct kern_nexus_provider *
nxprov_create_common(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1334 nxprov_create_common(struct nxctl *nxctl,
1335 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1336 const struct kern_nexus_provider_init *init, int *err)
1337 {
1338 struct skmem_region_params srp[SKMEM_REGIONS];
1339 struct kern_nexus_provider *nxprov = NULL;
1340 struct nxprov_params nxp;
1341 uint32_t override = 0;
1342 uint32_t pp_region_config_flags;
1343 int i;
1344
1345 _CASSERT(sizeof(*init) == sizeof(nxprov->nxprov_ext));
1346 _CASSERT(sizeof(*init) >=
1347 sizeof(struct kern_nexus_netif_provider_init));
1348
1349 SK_LOCK_ASSERT_HELD();
1350 ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
1351
1352 pp_region_config_flags = PP_REGION_CONFIG_MD_MAGAZINE_ENABLE |
1353 PP_REGION_CONFIG_BUF_IODIR_BIDIR;
1354 /*
1355 * Special handling for external nexus providers; similar
1356 * logic to what's done in kern_pbufpool_create().
1357 */
1358 if (init != NULL) {
1359 if (init->nxpi_flags & NXPIF_MONOLITHIC) {
1360 pp_region_config_flags |=
1361 PP_REGION_CONFIG_BUF_MONOLITHIC;
1362 }
1363
1364 if (init->nxpi_flags & NXPIF_INHIBIT_CACHE) {
1365 pp_region_config_flags |=
1366 PP_REGION_CONFIG_BUF_NOCACHE;
1367 }
1368 }
1369
1370 /*
1371 * For network devices, set the packet metadata memory as persistent
1372 * so that it is wired at segment creation. This allows us to access
1373 * it with preemption disabled, as well as for rdar://problem/46511741.
1374 */
1375 if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
1376 pp_region_config_flags |= PP_REGION_CONFIG_MD_PERSISTENT;
1377 }
1378
1379 /* process and validate provider parameters */
1380 if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
1381 &nxp, srp, override, pp_region_config_flags)) != 0) {
1382 goto done;
1383 }
1384
1385 nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
1386 ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
1387
1388 STAILQ_INIT(&nxprov->nxprov_nx_head);
1389 STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
1390 nxprov->nxprov_flags |= NXPROVF_ATTACHED;
1391 nxprov->nxprov_ctl = nxctl;
1392 uuid_generate_random(nxprov->nxprov_uuid);
1393 bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
1394
1395 if (init != NULL) {
1396 if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
1397 ASSERT(NXPROV_LLINK(nxprov));
1398 bcopy(init, &nxprov->nxprov_netif_ext,
1399 sizeof(nxprov->nxprov_netif_ext));
1400 } else {
1401 ASSERT(!NXPROV_LLINK(nxprov));
1402 ASSERT(init->nxpi_version ==
1403 KERN_NEXUS_PROVIDER_CURRENT_VERSION);
1404 bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
1405 }
1406 nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
1407 }
1408
1409 /* store validated region parameters to the provider */
1410 for (i = 0; i < SKMEM_REGIONS; i++) {
1411 nxprov->nxprov_region_params[i] = srp[i];
1412 }
1413
1414 if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
1415 uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
1416
1417 if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
1418 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1419 }
1420 } else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
1421 NEXUS_TYPE_NET_IF) {
1422 /*
1423 * Treat non-netif built-in nexus providers as those
1424 * meant for inter-process communications, i.e. there
1425 * is no actual networking hardware involved.
1426 */
1427 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1428 }
1429
1430 nxprov_retain_locked(nxprov); /* one for being in the list */
1431 nxprov_retain_locked(nxprov); /* one for the caller */
1432
1433 #if SK_LOG
1434 uuid_string_t uuidstr;
1435 SK_D("nxprov 0x%llx UUID %s", SK_KVA(nxprov),
1436 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
1437 #endif /* SK_LOG */
1438
1439 done:
1440 return nxprov;
1441 }
1442
1443 struct kern_nexus_provider *
nxprov_create(struct proc * p,struct nxctl * nxctl,struct nxprov_reg * reg,int * err)1444 nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
1445 int *err)
1446 {
1447 struct nxprov_params *nxp = ®->nxpreg_params;
1448 struct kern_nexus_domain_provider *nxdom_prov = NULL;
1449 struct kern_nexus_provider *nxprov = NULL;
1450
1451 NXCTL_LOCK_ASSERT_HELD(nxctl);
1452
1453 ASSERT(nxctl->nxctl_cred != proc_ucred(kernproc));
1454 *err = 0;
1455
1456 switch (nxp->nxp_type) {
1457 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1458 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1459 PRIV_SKYWALK_REGISTER_USER_PIPE);
1460 break;
1461
1462 case NEXUS_TYPE_FLOW_SWITCH: /* allowed for userland */
1463 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1464 PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
1465 break;
1466
1467 case NEXUS_TYPE_NET_IF: /* allowed for userland */
1468 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1469 PRIV_SKYWALK_REGISTER_NET_IF);
1470 break;
1471
1472 case NEXUS_TYPE_KERNEL_PIPE: /* only for kernel */
1473 case NEXUS_TYPE_MONITOR: /* invalid */
1474 default:
1475 *err = EINVAL;
1476 goto done;
1477 }
1478
1479 if (*err != 0) {
1480 goto done;
1481 }
1482
1483 ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
1484 if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
1485 *err = ENXIO;
1486 goto done;
1487 }
1488
1489 #if CONFIG_NEXUS_NETIF
1490 /* make sure netif_compat is the default here */
1491 ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
1492 strcmp(nxdom_prov->nxdom_prov_name,
1493 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
1494 #endif /* CONFIG_NEXUS_NETIF */
1495
1496 SK_LOCK();
1497 /* callee holds a reference for our caller upon success */
1498 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
1499 SK_UNLOCK();
1500 done:
1501 return nxprov;
1502 }
1503
1504 struct kern_nexus_provider *
nxprov_create_kern(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1505 nxprov_create_kern(struct nxctl *nxctl,
1506 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1507 const struct kern_nexus_provider_init *init, int *err)
1508 {
1509 struct nxprov_params *nxp = ®->nxpreg_params;
1510 struct kern_nexus_provider *nxprov = NULL;
1511
1512 NXCTL_LOCK_ASSERT_HELD(nxctl);
1513 SK_LOCK_ASSERT_HELD();
1514
1515 ASSERT(nxctl->nxctl_cred == proc_ucred(kernproc));
1516 ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
1517 ASSERT(init == NULL ||
1518 init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
1519 init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
1520
1521 *err = 0;
1522
1523 switch (nxp->nxp_type) {
1524 case NEXUS_TYPE_NET_IF:
1525 break;
1526 case NEXUS_TYPE_KERNEL_PIPE:
1527 if (init == NULL) {
1528 *err = EINVAL;
1529 goto done;
1530 }
1531 break;
1532 case NEXUS_TYPE_FLOW_SWITCH:
1533 if (init != NULL) {
1534 *err = EINVAL;
1535 goto done;
1536 }
1537 break;
1538
1539 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1540 case NEXUS_TYPE_MONITOR: /* invalid */
1541 default:
1542 *err = EINVAL;
1543 goto done;
1544 }
1545
1546 /* callee holds a reference for our caller upon success */
1547 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
1548
1549 done:
1550 return nxprov;
1551 }
1552
1553 int
nxprov_destroy(struct nxctl * nxctl,const uuid_t nxprov_uuid)1554 nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
1555 {
1556 struct kern_nexus_provider *nxprov = NULL;
1557 int err = 0;
1558
1559 NXCTL_LOCK_ASSERT_HELD(nxctl);
1560
1561 SK_LOCK();
1562
1563 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1564 if (nxctl == nxprov->nxprov_ctl &&
1565 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1566 nxprov_retain_locked(nxprov);
1567 break;
1568 }
1569 }
1570
1571 if (nxprov == NULL) {
1572 err = ENOENT;
1573 } else {
1574 err = nxprov_close(nxprov, TRUE);
1575 }
1576
1577 if (nxprov != NULL) {
1578 (void) nxprov_release_locked(nxprov);
1579 }
1580
1581 SK_UNLOCK();
1582
1583 return err;
1584 }
1585
1586 int
nxprov_close(struct kern_nexus_provider * nxprov,boolean_t locked)1587 nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
1588 {
1589 int err = 0;
1590
1591 if (!locked) {
1592 SK_LOCK();
1593 }
1594
1595 SK_LOCK_ASSERT_HELD();
1596
1597 #if SK_LOG
1598 uuid_string_t uuidstr;
1599 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1600 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1601 nxprov->nxprov_flags, NXPROVF_BITS);
1602 #endif /* SK_LOG */
1603
1604 if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
1605 err = EALREADY;
1606 } else {
1607 struct kern_nexus *nx, *tnx;
1608
1609 nxprov->nxprov_ctl = NULL;
1610
1611 STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
1612 nx_prov_link, tnx) {
1613 nx_retain_locked(nx);
1614 (void) nx_close(nx, TRUE);
1615 (void) nx_release_locked(nx);
1616 }
1617
1618 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
1619 /* no nexus created on this, so detach now */
1620 nxprov_detach(nxprov, TRUE);
1621 } else {
1622 /* detach when last nexus is destroyed */
1623 ASSERT(nxprov->nxprov_refcnt > 1);
1624 nxprov->nxprov_flags |= NXPROVF_CLOSED;
1625 }
1626 }
1627
1628 if (!locked) {
1629 SK_UNLOCK();
1630 }
1631
1632 return err;
1633 }
1634
1635 static void
nxprov_detach(struct kern_nexus_provider * nxprov,boolean_t locked)1636 nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
1637 {
1638 if (!locked) {
1639 SK_LOCK();
1640 }
1641
1642 SK_LOCK_ASSERT_HELD();
1643
1644 #if SK_LOG
1645 uuid_string_t uuidstr;
1646 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1647 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1648 nxprov->nxprov_flags, NXPROVF_BITS);
1649 #endif /* SK_LOG */
1650
1651 ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
1652 STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
1653 nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
1654
1655 /* caller must hold an extra ref */
1656 ASSERT(nxprov->nxprov_refcnt > 1);
1657 (void) nxprov_release_locked(nxprov);
1658
1659 if (!locked) {
1660 SK_UNLOCK();
1661 }
1662 }
1663
1664 static struct kern_nexus_provider *
nxprov_alloc(struct kern_nexus_domain_provider * nxdom_prov,zalloc_flags_t how)1665 nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
1666 {
1667 struct kern_nexus_provider *nxprov;
1668 struct nxprov_params *nxp;
1669
1670 ASSERT(nxdom_prov != NULL);
1671
1672 nxp = nxprov_params_alloc(how);
1673 if (nxp == NULL) {
1674 SK_ERR("Failed to allocate nxprov_params");
1675 return NULL;
1676 }
1677
1678 nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
1679 if (nxprov == NULL) {
1680 SK_ERR("Failed to allocate nxprov");
1681 nxprov_params_free(nxp);
1682 return NULL;
1683 }
1684
1685 nxprov->nxprov_dom_prov = nxdom_prov;
1686 nxprov->nxprov_params = nxp;
1687 /* hold a reference for nxprov */
1688 nxdom_prov_retain_locked(nxdom_prov);
1689
1690 return nxprov;
1691 }
1692
1693 static void
nxprov_free(struct kern_nexus_provider * nxprov)1694 nxprov_free(struct kern_nexus_provider *nxprov)
1695 {
1696 struct kern_nexus_domain_provider *nxdom_prov =
1697 nxprov->nxprov_dom_prov;
1698
1699 SK_LOCK_ASSERT_HELD();
1700
1701 ASSERT(nxdom_prov != NULL);
1702 (void) nxdom_prov_release_locked(nxdom_prov);
1703 nxprov->nxprov_dom_prov = NULL;
1704 ASSERT(nxprov->nxprov_params != NULL);
1705 nxprov_params_free(nxprov->nxprov_params);
1706 nxprov->nxprov_params = NULL;
1707 ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
1708 SK_DF(SK_VERB_MEM, "nxprov 0x%llx FREE", SK_KVA(nxprov));
1709 zfree(nxprov_zone, nxprov);
1710 }
1711
1712 static void
nxprov_retain_locked(struct kern_nexus_provider * nxprov)1713 nxprov_retain_locked(struct kern_nexus_provider *nxprov)
1714 {
1715 SK_LOCK_ASSERT_HELD();
1716
1717 nxprov->nxprov_refcnt++;
1718 ASSERT(nxprov->nxprov_refcnt != 0);
1719 }
1720
1721 void
nxprov_retain(struct kern_nexus_provider * nxprov)1722 nxprov_retain(struct kern_nexus_provider *nxprov)
1723 {
1724 SK_LOCK();
1725 nxprov_retain_locked(nxprov);
1726 SK_UNLOCK();
1727 }
1728
1729 static int
nxprov_release_locked(struct kern_nexus_provider * nxprov)1730 nxprov_release_locked(struct kern_nexus_provider *nxprov)
1731 {
1732 int oldref = nxprov->nxprov_refcnt;
1733
1734 SK_LOCK_ASSERT_HELD();
1735
1736 ASSERT(nxprov->nxprov_refcnt != 0);
1737 if (--nxprov->nxprov_refcnt == 0) {
1738 nxprov_free(nxprov);
1739 }
1740
1741 return oldref == 1;
1742 }
1743
1744 int
nxprov_release(struct kern_nexus_provider * nxprov)1745 nxprov_release(struct kern_nexus_provider *nxprov)
1746 {
1747 int lastref;
1748
1749 SK_LOCK();
1750 lastref = nxprov_release_locked(nxprov);
1751 SK_UNLOCK();
1752
1753 return lastref;
1754 }
1755
1756 struct nxprov_params *
nxprov_params_alloc(zalloc_flags_t how)1757 nxprov_params_alloc(zalloc_flags_t how)
1758 {
1759 return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
1760 }
1761
1762 void
nxprov_params_free(struct nxprov_params * nxp)1763 nxprov_params_free(struct nxprov_params *nxp)
1764 {
1765 SK_DF(SK_VERB_MEM, "nxp 0x%llx FREE", SK_KVA(nxp));
1766 zfree(nxprov_params_zone, nxp);
1767 }
1768
1769 static int
nx_check_pp(struct kern_nexus_provider * nxprov,struct kern_pbufpool * pp)1770 nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
1771 {
1772 struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
1773
1774 if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
1775 SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
1776 return ENOTSUP;
1777 }
1778
1779 /*
1780 * Require that the nexus domain metadata type and the
1781 * metadata type of the caller-provided pbufpool match.
1782 */
1783 if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
1784 pp->pp_md_type ||
1785 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
1786 pp->pp_md_subtype) {
1787 SK_ERR("Mismatch in metadata type/subtype "
1788 "(%u/%u != %u/%u)", pp->pp_md_type,
1789 nxdom_prov->nxdom_prov_dom->nxdom_md_type,
1790 pp->pp_md_subtype,
1791 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
1792 return EINVAL;
1793 }
1794
1795 /*
1796 * Require that the nexus provider memory configuration
1797 * has the same impedance as the caller-provided one.
1798 * Both need to be lacking or present; if one of them
1799 * is set and the other isn't, then we bail.
1800 */
1801 if (!!(PP_BUF_REGION_DEF(pp)->skr_mode & SKR_MODE_MONOLITHIC) ^
1802 !!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
1803 SK_ERR("Memory config mismatch: monolithic mode");
1804 return EINVAL;
1805 }
1806
1807 return 0;
1808 }
1809
1810 struct kern_nexus *
nx_create(struct nxctl * nxctl,const uuid_t nxprov_uuid,const nexus_type_t dom_type,const void * nx_ctx,nexus_ctx_release_fn_t nx_ctx_release,struct kern_pbufpool * tx_pp,struct kern_pbufpool * rx_pp,int * err)1811 nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
1812 const nexus_type_t dom_type, const void *nx_ctx,
1813 nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
1814 struct kern_pbufpool *rx_pp, int *err)
1815 {
1816 struct kern_nexus_domain_provider *nxdom_prov;
1817 struct kern_nexus_provider *nxprov = NULL;
1818 struct kern_nexus *nx = NULL;
1819 #if SK_LOG
1820 uuid_string_t uuidstr;
1821 #endif /* SK_LOG */
1822
1823 NXCTL_LOCK_ASSERT_HELD(nxctl);
1824
1825 ASSERT(dom_type < NEXUS_TYPE_MAX);
1826 ASSERT(!uuid_is_null(nxprov_uuid));
1827 *err = 0;
1828
1829 SK_LOCK();
1830
1831 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1832 if (nxctl == nxprov->nxprov_ctl &&
1833 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1834 break;
1835 }
1836 }
1837
1838 if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
1839 SK_ERR("Provider not found or has been closed");
1840 *err = ENOENT;
1841 goto done;
1842 }
1843
1844 nxdom_prov = nxprov->nxprov_dom_prov;
1845 if (dom_type != NEXUS_TYPE_UNDEFINED &&
1846 (nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
1847 SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
1848 dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
1849 nxdom_prov = NULL;
1850 nxprov = NULL;
1851 *err = ENODEV;
1852 goto done;
1853 }
1854
1855 if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
1856 (!tx_pp || !rx_pp)) {
1857 #if SK_LOG
1858 SK_ERR("TX/RX packet pool is required for netif logical link "
1859 "nexus provider UUID: %s",
1860 sk_uuid_unparse(nxprov_uuid, uuidstr));
1861 #endif /* SK_LOG */
1862 nxdom_prov = NULL;
1863 nxprov = NULL;
1864 *err = EINVAL;
1865 goto done;
1866 }
1867
1868 if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
1869 (rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
1870 goto done;
1871 }
1872
1873 nx = nx_alloc(Z_WAITOK);
1874
1875 STAILQ_INIT(&nx->nx_ch_head);
1876 STAILQ_INIT(&nx->nx_ch_nonxref_head);
1877 lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
1878 &nexus_lock_attr);
1879 STAILQ_INIT(&nx->nx_ch_if_adv_head);
1880 uuid_generate_random(nx->nx_uuid);
1881 nx->nx_prov = nxprov;
1882 nx->nx_ctx = (void *)(uintptr_t)nx_ctx;
1883 nx->nx_ctx_release = nx_ctx_release;
1884 nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
1885
1886 if (tx_pp != NULL) {
1887 nx->nx_tx_pp = tx_pp;
1888 pp_retain(tx_pp); /* released by nx_free */
1889 }
1890
1891 if (rx_pp != NULL) {
1892 nx->nx_rx_pp = rx_pp;
1893 pp_retain(rx_pp); /* released by nx_free */
1894 }
1895
1896 /* this nexus is alive; tell the nexus constructor to set it up */
1897 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
1898 *err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
1899 if (*err != 0) {
1900 nx->nx_prov = NULL;
1901 goto done;
1902 }
1903 }
1904
1905 nxprov_retain_locked(nxprov); /* hold a ref on the nexus reg */
1906
1907 STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
1908 nxprov->nxprov_nx_count++;
1909 RB_INSERT(kern_nexus_tree, &nx_head, nx);
1910 atomic_bitset_32(&nx->nx_flags, NXF_ATTACHED);
1911
1912 nx_retain_locked(nx); /* one for the provider list */
1913 nx_retain_locked(nx); /* one for the global list */
1914 nx_retain_locked(nx); /* one for the caller */
1915
1916 #if SK_LOG
1917 SK_D("nexus 0x%llx (%s:%s) UUID %s", SK_KVA(nx),
1918 nxdom_prov->nxdom_prov_dom->nxdom_name,
1919 nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
1920 #endif /* SK_LOG */
1921 done:
1922 SK_UNLOCK();
1923
1924 if (*err != 0) {
1925 if (nx != NULL) {
1926 nx_free(nx);
1927 nx = NULL;
1928 }
1929 }
1930 return nx;
1931 }
1932
1933 int
nx_destroy(struct nxctl * nxctl,const uuid_t nx_uuid)1934 nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
1935 {
1936 struct kern_nexus *nx = NULL;
1937 struct kern_nexus find;
1938 int err = 0;
1939
1940 NXCTL_LOCK_ASSERT_HELD(nxctl);
1941
1942 SK_LOCK();
1943
1944 uuid_copy(find.nx_uuid, nx_uuid);
1945 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1946 if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
1947 nx = NULL;
1948 }
1949
1950 if (nx != NULL) {
1951 nx_retain_locked(nx);
1952 }
1953
1954 if (nx == NULL) {
1955 err = ENOENT;
1956 } else {
1957 err = nx_close(nx, TRUE);
1958 (void) nx_release_locked(nx);
1959 }
1960
1961 SK_UNLOCK();
1962
1963 return err;
1964 }
1965
1966 static inline int
nx_cmp(const struct kern_nexus * a,const struct kern_nexus * b)1967 nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
1968 {
1969 return uuid_compare(a->nx_uuid, b->nx_uuid);
1970 }
1971
1972 struct kern_nexus *
nx_find(const uuid_t nx_uuid,boolean_t locked)1973 nx_find(const uuid_t nx_uuid, boolean_t locked)
1974 {
1975 struct kern_nexus *nx = NULL;
1976 struct kern_nexus find;
1977
1978 if (!locked) {
1979 SK_LOCK();
1980 }
1981
1982 SK_LOCK_ASSERT_HELD();
1983
1984 uuid_copy(find.nx_uuid, nx_uuid);
1985 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1986 if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
1987 nx = NULL;
1988 }
1989
1990 /* return reference to caller */
1991 if (nx != NULL) {
1992 nx_retain_locked(nx);
1993 }
1994
1995 if (!locked) {
1996 SK_UNLOCK();
1997 }
1998
1999 return nx;
2000 }
2001
2002 int
nx_close(struct kern_nexus * nx,boolean_t locked)2003 nx_close(struct kern_nexus *nx, boolean_t locked)
2004 {
2005 int err = 0;
2006
2007 if (!locked) {
2008 SK_LOCK();
2009 }
2010
2011 SK_LOCK_ASSERT_HELD();
2012
2013
2014 if (nx->nx_flags & NXF_CLOSED) {
2015 err = EALREADY;
2016 } else {
2017 #if SK_LOG
2018 uuid_string_t uuidstr;
2019 SK_D("nexus 0x%llx (%s:%s) UUID %s flags 0x%b", SK_KVA(nx),
2020 NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
2021 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags,
2022 NXF_BITS);
2023 #endif /* SK_LOG */
2024
2025 if (STAILQ_EMPTY(&nx->nx_ch_head)) {
2026 /* no regular channels open to it, so detach now */
2027 nx_detach(nx);
2028 } else {
2029 /* detach when the last channel closes */
2030 ASSERT(nx->nx_refcnt > 3);
2031 atomic_bitset_32(&nx->nx_flags, NXF_CLOSED);
2032 }
2033 }
2034
2035 if (!locked) {
2036 SK_UNLOCK();
2037 }
2038
2039 return err;
2040 }
2041
2042 void
nx_stop(struct kern_nexus * nx)2043 nx_stop(struct kern_nexus *nx)
2044 {
2045 struct kern_nexus_provider *nxprov = nx->nx_prov;
2046
2047 SK_LOCK_ASSERT_HELD();
2048
2049 /* send a stop message */
2050 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
2051 nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
2052 }
2053 }
2054
2055 void
nx_detach(struct kern_nexus * nx)2056 nx_detach(struct kern_nexus *nx)
2057 {
2058 struct kern_nexus_provider *nxprov = nx->nx_prov;
2059
2060 SK_LOCK_ASSERT_HELD();
2061
2062 #if SK_LOG
2063 uuid_string_t uuidstr;
2064 SK_D("nexus 0x%llx UUID %s flags 0x%b", SK_KVA(nx),
2065 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags, NXF_BITS);
2066 #endif /* SK_LOG */
2067
2068 /* Caller must hold extra refs, on top of the two in reg/global lists */
2069 ASSERT(nx->nx_refcnt >= 3);
2070 ASSERT(nx->nx_flags & NXF_ATTACHED);
2071
2072 /* this nexus is done; let the nexus destructor do final cleanups */
2073 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
2074 nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
2075 }
2076
2077 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2078 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2079
2080 STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
2081 nxprov->nxprov_nx_count--;
2082 RB_REMOVE(kern_nexus_tree, &nx_head, nx);
2083 atomic_bitclear_32(&nx->nx_flags, NXF_ATTACHED);
2084 nx->nx_prov = NULL;
2085 if (nx->nx_ctx_release != NULL) {
2086 nx->nx_ctx_release(nx->nx_ctx);
2087 }
2088 nx->nx_ctx = NULL;
2089
2090 (void) nx_release_locked(nx); /* one for the reg list */
2091 (void) nx_release_locked(nx); /* one for the global list */
2092
2093 /*
2094 * If this was the last nexus and the provider has been closed,
2095 * detach the provider and and finish up the postponed job.
2096 */
2097 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
2098 (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
2099 nxprov_detach(nxprov, TRUE);
2100 }
2101 (void) nxprov_release_locked(nxprov);
2102 }
2103
2104 int
nx_advisory_alloc(struct kern_nexus * nx,const char * name,struct skmem_region_params * srp_nexusadv,nexus_advisory_type_t type)2105 nx_advisory_alloc(struct kern_nexus *nx, const char *name,
2106 struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
2107 {
2108 struct __kern_nexus_adv_metadata *adv_md;
2109
2110 _CASSERT(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
2111 _CASSERT((sizeof(struct sk_nexusadv) +
2112 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2113 _CASSERT((sizeof(struct netif_nexus_advisory) +
2114 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2115 ASSERT(nx->nx_adv.nxv_reg == NULL);
2116 ASSERT(nx->nx_adv.nxv_adv == NULL);
2117 ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
2118 type == NEXUS_ADVISORY_TYPE_NETIF);
2119
2120 if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
2121 NULL, NULL, NULL)) == NULL) {
2122 return ENOMEM;
2123 }
2124
2125 nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, NULL,
2126 NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC));
2127 adv_md = nx->nx_adv.nxv_adv;
2128 adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
2129 adv_md->knam_type = type;
2130 adv_md->__reserved = 0;
2131 nx->nx_adv.nxv_adv_type = type;
2132 nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
2133 if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
2134 nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
2135 NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
2136 } else {
2137 nx->nx_adv.netif_nxv_adv->nna_version =
2138 NX_NETIF_ADVISORY_CURRENT_VERSION;
2139 }
2140 return 0;
2141 }
2142
2143 void
nx_advisory_free(struct kern_nexus * nx)2144 nx_advisory_free(struct kern_nexus *nx)
2145 {
2146 if (nx->nx_adv.nxv_reg != NULL) {
2147 ASSERT(nx->nx_adv.nxv_adv != NULL);
2148 skmem_region_free(nx->nx_adv.nxv_reg,
2149 nx->nx_adv.nxv_adv, NULL);
2150 nx->nx_adv.nxv_adv = NULL;
2151 nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
2152 nx->nx_adv.flowswitch_nxv_adv = NULL;
2153 skmem_region_release(nx->nx_adv.nxv_reg);
2154 nx->nx_adv.nxv_reg = NULL;
2155 }
2156
2157 ASSERT(nx->nx_adv.nxv_reg == NULL);
2158 ASSERT(nx->nx_adv.nxv_adv == NULL);
2159 ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
2160 ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
2161 }
2162
2163 static struct kern_nexus *
nx_alloc(zalloc_flags_t how)2164 nx_alloc(zalloc_flags_t how)
2165 {
2166 SK_LOCK_ASSERT_HELD();
2167
2168 return zalloc_flags(nx_zone, how | Z_ZERO);
2169 }
2170
2171 static void
nx_free(struct kern_nexus * nx)2172 nx_free(struct kern_nexus *nx)
2173 {
2174 ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
2175 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2176 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2177
2178 nx_port_free_all(nx);
2179
2180 if (nx->nx_tx_pp != NULL) {
2181 pp_release(nx->nx_tx_pp);
2182 nx->nx_tx_pp = NULL;
2183 }
2184 if (nx->nx_rx_pp != NULL) {
2185 pp_release(nx->nx_rx_pp);
2186 nx->nx_rx_pp = NULL;
2187 }
2188
2189 ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
2190 lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
2191
2192 SK_DF(SK_VERB_MEM, "nexus 0x%llx FREE", SK_KVA(nx));
2193 zfree(nx_zone, nx);
2194 }
2195
2196 void
nx_retain_locked(struct kern_nexus * nx)2197 nx_retain_locked(struct kern_nexus *nx)
2198 {
2199 SK_LOCK_ASSERT_HELD();
2200
2201 nx->nx_refcnt++;
2202 VERIFY(nx->nx_refcnt > 0);
2203 }
2204
2205 void
nx_retain(struct kern_nexus * nx)2206 nx_retain(struct kern_nexus *nx)
2207 {
2208 SK_LOCK();
2209 nx_retain_locked(nx);
2210 SK_UNLOCK();
2211 }
2212
2213 int
nx_release_locked(struct kern_nexus * nx)2214 nx_release_locked(struct kern_nexus *nx)
2215 {
2216 int oldref = nx->nx_refcnt;
2217
2218 SK_LOCK_ASSERT_HELD();
2219
2220 VERIFY(nx->nx_refcnt > 0);
2221 if (--nx->nx_refcnt == 0) {
2222 nx_free(nx);
2223 }
2224
2225 return oldref == 1;
2226 }
2227
2228 int
nx_release(struct kern_nexus * nx)2229 nx_release(struct kern_nexus *nx)
2230 {
2231 int lastref;
2232
2233 SK_LOCK_ASSERT_NOTHELD();
2234
2235 SK_LOCK();
2236 lastref = nx_release_locked(nx);
2237 SK_UNLOCK();
2238
2239 return lastref;
2240 }
2241
2242 static int
nx_init_rings(struct kern_nexus * nx,struct kern_channel * ch)2243 nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
2244 {
2245 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2246 struct nexus_adapter *na = ch->ch_na;
2247 boolean_t undo = FALSE;
2248 int ksd_retains = 0;
2249 enum txrx t;
2250 int err = 0;
2251
2252 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
2253 CHANF_EXT_PRECONNECT);
2254
2255 if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
2256 return 0;
2257 }
2258
2259 for_rx_tx(t) {
2260 uint32_t i;
2261
2262 for (i = 0; i < na_get_nrings(na, t); i++) {
2263 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2264
2265 /* skip host rings */
2266 if (kring->ckr_flags & CKRF_HOST) {
2267 continue;
2268 }
2269
2270 if ((err = nxprov->nxprov_ext.nxpi_ring_init(
2271 nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
2272 &kring->ckr_ctx)) != 0) {
2273 SK_D("ch 0x%llx flags %b nx 0x%llx kr \"%s\" "
2274 "(0x%llx) krflags %b ring_init error %d",
2275 SK_KVA(ch), ch->ch_flags, CHANF_BITS,
2276 SK_KVA(nx), kring->ckr_name, SK_KVA(kring),
2277 kring->ckr_flags, CKRF_BITS, err);
2278 kring->ckr_ctx = NULL;
2279 undo = TRUE;
2280 break;
2281 }
2282 kring->ckr_flags |= CKRF_EXT_RING_INITED;
2283
2284 if ((err = nx_init_slots(nx, kring)) != 0) {
2285 undo = TRUE;
2286 break;
2287 }
2288
2289 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2290 ++ksd_retains;
2291 }
2292 }
2293 if (undo) {
2294 break;
2295 }
2296 }
2297
2298 /*
2299 * Note: retain KSD even in case of error, as we have set
2300 * CKRF_EXT_SLOTS_INITED flag for some of the rings
2301 * nx_fini_rings would take care of release based on it.
2302 */
2303 if (ksd_retains != 0) {
2304 /*
2305 * Mark the kernel slot descriptor region as busy; this
2306 * prevents it from being torn-down at channel defunct
2307 * time, as we need to invoke the slot_fini() callback
2308 * for each slot and we need the descriptors until then.
2309 */
2310 skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
2311 ksd_retains);
2312 }
2313
2314 if (err != 0) {
2315 ASSERT(undo);
2316 nx_fini_rings(nx, ch);
2317 }
2318
2319 return err;
2320 }
2321
2322 static void
nx_fini_rings(struct kern_nexus * nx,struct kern_channel * ch)2323 nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
2324 {
2325 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2326 struct nexus_adapter *na = ch->ch_na;
2327 int ksd_releases = 0;
2328 enum txrx t;
2329
2330 for_rx_tx(t) {
2331 uint32_t i;
2332
2333 for (i = 0; i < na_get_nrings(na, t); i++) {
2334 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2335
2336 if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
2337 continue;
2338 }
2339
2340 ASSERT(!(kring->ckr_flags & CKRF_HOST));
2341 ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
2342 nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
2343 kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
2344
2345 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2346 ++ksd_releases;
2347 }
2348
2349 /*
2350 * Undo the work done in nx_init_slots() and inform
2351 * the external domain provider, if applicable, that
2352 * the slots for this ring are no longer valid.
2353 */
2354 nx_fini_slots(nx, kring);
2355 kring->ckr_ctx = NULL;
2356 }
2357 }
2358
2359 if (ksd_releases != 0) {
2360 /*
2361 * Now that we've finished invoking the slot_fini()
2362 * callbacks, release the busy retain counts held
2363 * earlier in nx_init_rings(). This will allow the
2364 * kernel slot descriptor region to be torn down.
2365 */
2366 skmem_arena_nexus_sd_set_noidle(
2367 skmem_arena_nexus(na->na_arena), -ksd_releases);
2368 }
2369 }
2370
2371 static int
nx_init_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2372 nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2373 {
2374 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2375 struct __slot_desc *slot = kring->ckr_ksds;
2376 int err = 0;
2377 uint32_t i;
2378
2379 /*
2380 * If the slot init callback was not provided, or if the
2381 * kring was not created to hold any slot contexts, don't
2382 * go any further.
2383 */
2384 if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
2385 kring->ckr_slot_ctxs == NULL) {
2386 return 0;
2387 }
2388
2389 ASSERT(kring->ckr_slot_ctxs_set == 0);
2390 ASSERT(slot != NULL);
2391
2392 for (i = 0; i < kring->ckr_num_slots; i++) {
2393 struct kern_slot_prop *slot_ctx_prop = NULL;
2394 void *slot_ctx_arg = NULL;
2395
2396 ASSERT(&slot[i] <= kring->ckr_ksds_last);
2397 if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
2398 &slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
2399 SK_D("nx 0x%llx kr \"%s\" (0x%llx) krflags %b slot %u "
2400 "slot_init error %d", SK_KVA(nx), kring->ckr_name,
2401 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, i, err);
2402 break;
2403 }
2404 /* we don't want this to be used by client, so verify here */
2405 ASSERT(slot_ctx_prop == NULL);
2406 kring->ckr_slot_ctxs[i].slot_ctx_arg =
2407 (mach_vm_address_t)slot_ctx_arg;
2408 kring->ckr_slot_ctxs_set++;
2409 }
2410
2411 if (err != 0) {
2412 nx_fini_slots(nx, kring);
2413 } else {
2414 kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
2415 }
2416
2417 return err;
2418 }
2419
2420 static void
nx_fini_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2421 nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2422 {
2423 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2424 struct __slot_desc *slot = kring->ckr_ksds;
2425 uint32_t i;
2426
2427 ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
2428 nxprov->nxprov_ext.nxpi_slot_fini != NULL);
2429 ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
2430
2431 for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
2432 ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
2433 if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
2434 nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
2435 kring, &slot[i], i);
2436 }
2437 if (kring->ckr_slot_ctxs != NULL) {
2438 kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
2439 }
2440 }
2441 kring->ckr_slot_ctxs_set = 0;
2442
2443 /* We're done with this kring */
2444 kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
2445 }
2446
2447
2448 /* 64-bit mask with range */
2449 #define BMASK64(_beg, _end) \
2450 ((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
2451
2452 int
nx_port_find(struct kern_nexus * nx,nexus_port_t first,nexus_port_t last,nexus_port_t * nx_port)2453 nx_port_find(struct kern_nexus *nx, nexus_port_t first,
2454 nexus_port_t last, nexus_port_t *nx_port)
2455 {
2456 int err = 0;
2457
2458 ASSERT(first < last);
2459 *nx_port = NEXUS_PORT_ANY;
2460
2461 if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
2462 /*
2463 * Left edge of the range is beyond the current map;
2464 * let nx_port_alloc() handle the growing later.
2465 */
2466 *nx_port = first;
2467 } else {
2468 nexus_port_size_t fc = (first / NX_PORT_CHUNK);
2469 nexus_port_size_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
2470 nexus_port_size_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
2471 nexus_port_size_t i, j;
2472 bitmap_t *bmap;
2473
2474 /*
2475 * The right edge of the range is either within or
2476 * beyond the current map; scan thru the current
2477 * map and find the first available port.
2478 */
2479 for (i = fc; i <= lc; i++) {
2480 bitmap_t mask;
2481 nexus_port_size_t beg = 0, end = 63;
2482
2483 if (i == fc) {
2484 beg = (first % NX_PORT_CHUNK);
2485 }
2486 if (i == (last / NX_PORT_CHUNK)) {
2487 end = (last % NX_PORT_CHUNK);
2488 }
2489
2490 if (i < lim) {
2491 bmap = &nx->nx_ports_bmap[i];
2492 mask = BMASK64(beg, end);
2493
2494 j = (nexus_port_size_t)ffsll((*bmap) & mask);
2495 if (j == 0) {
2496 continue;
2497 }
2498
2499 --j;
2500 *nx_port = (i * NX_PORT_CHUNK) + j;
2501 }
2502 break;
2503 }
2504
2505 /*
2506 * If the requested range is within the current map and we
2507 * couldn't find a port, return an err. Otherwise, return
2508 * the next port index to trigger growing later.
2509 */
2510 if (*nx_port == NEXUS_PORT_ANY) {
2511 if (lc == (last / NX_PORT_CHUNK)) {
2512 err = EBUSY;
2513 SK_ERR("port unavail in [%u, %u)", first, last);
2514 } else {
2515 *nx_port = nx->nx_num_ports;
2516 }
2517 }
2518 }
2519
2520 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d (err %d)", SK_KVA(nx),
2521 (int)*nx_port, err);
2522
2523 return err;
2524 }
2525
2526 static int
nx_port_grow(struct kern_nexus * nx,nexus_port_size_t grow)2527 nx_port_grow(struct kern_nexus *nx, nexus_port_size_t grow)
2528 {
2529 ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX);
2530 nexus_port_t dom_port_max = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports);
2531 struct nx_port_info *ports;
2532 size_t limit;
2533 nexus_port_size_t i, num_ports, old_num_ports;
2534 bitmap_t *bmap;
2535
2536 ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
2537 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2538 _CASSERT((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
2539 ASSERT(powerof2(dom_port_max));
2540 ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
2541
2542 old_num_ports = nx->nx_num_ports;
2543 num_ports = nx->nx_num_ports + grow;
2544 limit = P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
2545 if (num_ports > limit) {
2546 SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
2547 nx->nx_num_ports, grow, num_ports, limit);
2548 return EDOM;
2549 }
2550
2551 if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
2552 (old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2553 (num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2554 Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2555 SK_ERR("bmap alloc failed, num_port %u", num_ports);
2556 return ENOMEM;
2557 }
2558 nx->nx_ports_bmap = bmap;
2559
2560 if ((ports = sk_realloc_type_array(struct nx_port_info, old_num_ports,
2561 num_ports, nx->nx_ports, Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2562 /* can't free bmap here, otherwise nexus won't work */
2563 SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
2564 return ENOMEM;
2565 }
2566
2567 /* initialize the additional new ports */
2568 bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
2569 nx->nx_ports = ports;
2570
2571 /* initialize new bitmaps (set all bits) */
2572 for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
2573 i < (num_ports / NX_PORT_CHUNK); i++) {
2574 bmap[i] = NX_PORT_CHUNK_FREE;
2575 }
2576
2577 nx->nx_num_ports = num_ports;
2578
2579 SK_DF(SK_VERB_NXPORT, "!!! nx 0x%llx ports %u/%u, %u ports added",
2580 SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
2581
2582 return 0;
2583 }
2584
2585 int
nx_port_alloc(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb,struct nexus_adapter ** na,struct proc * p)2586 nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
2587 struct nexus_adapter **na, struct proc *p)
2588 {
2589 struct nx_port_info *npi = NULL;
2590 struct nxbind *nxb0;
2591 size_t g;
2592 uint32_t i, j;
2593 bitmap_t *bmap;
2594 bool refonly = false;
2595 int err = 0;
2596
2597 ASSERT(nx_port != NEXUS_PORT_ANY);
2598 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2599
2600 /* port is zero-based, so adjust here */
2601 if ((nx_port + 1) > nx->nx_num_ports) {
2602 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2603 VERIFY(g <= NEXUS_PORT_MAX);
2604 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2605 goto done;
2606 }
2607 }
2608 ASSERT(err == 0);
2609 ASSERT(nx_port < nx->nx_num_ports);
2610 npi = &nx->nx_ports[nx_port];
2611 nxb0 = npi->npi_nxb;
2612 i = nx_port / NX_PORT_CHUNK;
2613 j = nx_port % NX_PORT_CHUNK;
2614 bmap = &nx->nx_ports_bmap[i];
2615
2616 if (bit_test(*bmap, j)) {
2617 /* port is not (yet) bound or allocated */
2618 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2619 if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
2620 /*
2621 * If the port allocation is requested by userland
2622 * and the nexus is non-anonymous, then fail the
2623 * request.
2624 */
2625 err = EACCES;
2626 SK_ERR("user proc alloc on named nexus needs binding");
2627 } else if (na != NULL && *na != NULL) {
2628 /*
2629 * Otherwise claim it (clear bit) if the caller
2630 * supplied an adapter for this port; else, it
2631 * is just an existential check and so there's
2632 * no action needed at this point (we'll skip
2633 * the init below since vpna is NULL).
2634 */
2635 bit_clear(*bmap, j);
2636 }
2637 } else {
2638 /* if port is bound, check if credentials match */
2639 if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
2640 (nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
2641 SK_ERR("nexus binding mismatch");
2642 err = EACCES;
2643 } else {
2644 /*
2645 * If port is already occupied by an adapter,
2646 * see if the client is requesting a reference
2647 * to it; if so, return the adapter. Otherwise,
2648 * if unoccupied and vpna is non-NULL, associate
2649 * it with this nexus port via the below init.
2650 */
2651 if (NPI_NA(npi) != NULL) {
2652 if (na != NULL && *na == NULL) {
2653 *na = NPI_NA(npi);
2654 na_retain_locked(*na);
2655 /* skip the init below */
2656 refonly = true;
2657 } else {
2658 /*
2659 * If the client supplied an adapter
2660 * (regardless of its value) for a
2661 * nexus port that's already occupied,
2662 * then we fail the request.
2663 */
2664 SK_ERR("nexus adapted exits");
2665 err = EEXIST;
2666 }
2667 }
2668 }
2669 }
2670
2671 done:
2672 /* initialize the nexus port and the adapter occupying it */
2673 if (err == 0 && na != NULL && *na != NULL && !refonly) {
2674 ASSERT(nx_port < nx->nx_num_ports);
2675 ASSERT(npi->npi_nah == 0);
2676 ASSERT(nx->nx_active_ports < nx->nx_num_ports);
2677 ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
2678 (nx_port % NX_PORT_CHUNK)));
2679
2680 nx->nx_active_ports++;
2681 npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
2682 (*na)->na_nx_port = nx_port;
2683 }
2684
2685 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d, ports %u/%u (err %d)",
2686 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
2687 err);
2688
2689 return err;
2690 }
2691
2692 void
nx_port_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2693 nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2694 {
2695 struct nx_port_info *npi = &nx->nx_ports[nx_port];
2696
2697 npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
2698 NEXUS_PORT_STATE_DEFUNCT);
2699 }
2700
2701 void
nx_port_free(struct kern_nexus * nx,nexus_port_t nx_port)2702 nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
2703 {
2704 struct nx_port_info *npi = NULL;
2705 bitmap_t *bmap;
2706 uint32_t i, j;
2707
2708 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2709 ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
2710 ASSERT(nx->nx_active_ports != 0);
2711
2712 i = nx_port / NX_PORT_CHUNK;
2713 j = nx_port % NX_PORT_CHUNK;
2714 bmap = &nx->nx_ports_bmap[i];
2715 ASSERT(!bit_test(*bmap, j));
2716
2717 npi = &nx->nx_ports[nx_port];
2718 npi->npi_nah = 0;
2719 if (npi->npi_nxb == NULL) {
2720 /* it's vacant, release it (set bit) */
2721 bit_set(*bmap, j);
2722 }
2723
2724 nx->nx_active_ports--;
2725
2726 //XXX [email protected] --- try to shrink bitmap & nx_ports ???
2727
2728 SK_DF(SK_VERB_NXPORT, "--- nx 0x%llx nx_port %d, ports %u/%u",
2729 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
2730 }
2731
2732 int
nx_port_bind_info(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0,void * info)2733 nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
2734 struct nxbind *nxb0, void *info)
2735 {
2736 struct nx_port_info *npi = NULL;
2737 size_t g;
2738 uint32_t i, j;
2739 bitmap_t *bmap;
2740 int err = 0;
2741
2742 ASSERT(nx_port != NEXUS_PORT_ANY);
2743 ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
2744 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2745 ASSERT(nxb0 != NULL);
2746
2747 if ((nx_port) + 1 > nx->nx_num_ports) {
2748 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2749 VERIFY(g <= NEXUS_PORT_MAX);
2750 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2751 goto done;
2752 }
2753 }
2754 ASSERT(err == 0);
2755
2756 npi = &nx->nx_ports[nx_port];
2757 i = nx_port / NX_PORT_CHUNK;
2758 j = nx_port % NX_PORT_CHUNK;
2759 bmap = &nx->nx_ports_bmap[i];
2760 if (bit_test(*bmap, j)) {
2761 /* port is not (yet) bound or allocated */
2762 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2763
2764 bit_clear(*bmap, j);
2765 struct nxbind *nxb = nxb_alloc(Z_WAITOK);
2766 nxb_move(nxb0, nxb);
2767 npi->npi_nxb = nxb;
2768 npi->npi_info = info;
2769 /* claim it (clear bit) */
2770 bit_clear(*bmap, j);
2771 ASSERT(err == 0);
2772 } else {
2773 /* port is already taken */
2774 ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
2775 err = EEXIST;
2776 }
2777 done:
2778
2779 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2780 "+++ nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2781 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2782
2783 return err;
2784 }
2785
2786 int
nx_port_bind(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0)2787 nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
2788 {
2789 return nx_port_bind_info(nx, nx_port, nxb0, NULL);
2790 }
2791
2792 static int
nx_port_info_size(void * info,size_t * sz)2793 nx_port_info_size(void *info, size_t *sz)
2794 {
2795 struct nx_port_info_header *hdr = info;
2796
2797 switch (hdr->ih_type) {
2798 case NX_PORT_INFO_TYPE_NETIF:
2799 break;
2800 default:
2801 return EINVAL;
2802 }
2803 *sz = hdr->ih_size;
2804 return 0;
2805 }
2806
2807 int
nx_port_unbind(struct kern_nexus * nx,nexus_port_t nx_port)2808 nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
2809 {
2810 struct nx_port_info *npi = NULL;
2811 struct nxbind *nxb;
2812 uint32_t i, j;
2813 bitmap_t *bmap;
2814 int err = 0;
2815
2816 ASSERT(nx_port != NEXUS_PORT_ANY);
2817
2818 if (nx_port >= nx->nx_num_ports) {
2819 err = EDOM;
2820 goto done;
2821 }
2822
2823 npi = &nx->nx_ports[nx_port];
2824 i = nx_port / NX_PORT_CHUNK;
2825 j = nx_port % NX_PORT_CHUNK;
2826 bmap = &nx->nx_ports_bmap[i];
2827
2828 if ((nxb = npi->npi_nxb) == NULL) {
2829 /* must be either free or allocated */
2830 ASSERT(NPI_NA(npi) == NULL ||
2831 (!bit_test(*bmap, j) && nx->nx_active_ports > 0));
2832 err = ENOENT;
2833 } else {
2834 nxb_free(nxb);
2835 npi->npi_nxb = NULL;
2836 if (npi->npi_info != NULL) {
2837 size_t sz;
2838
2839 VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
2840 sk_free_data(npi->npi_info, sz);
2841 npi->npi_info = NULL;
2842 }
2843 ASSERT(!bit_test(*bmap, j));
2844 if (NPI_NA(npi) == NULL) {
2845 /* it's vacant, release it (set bit) */
2846 bit_set(*bmap, j);
2847 }
2848 }
2849
2850 done:
2851 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2852 "--- nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2853 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2854
2855 return err;
2856 }
2857
2858 struct nexus_adapter *
nx_port_get_na(struct kern_nexus * nx,nexus_port_t nx_port)2859 nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
2860 {
2861 if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
2862 return NPI_NA(&nx->nx_ports[nx_port]);
2863 } else {
2864 return NULL;
2865 }
2866 }
2867
2868 int
nx_port_get_info(struct kern_nexus * nx,nexus_port_t port,nx_port_info_type_t type,void * info,uint32_t len)2869 nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
2870 nx_port_info_type_t type, void *info, uint32_t len)
2871 {
2872 struct nx_port_info *npi;
2873 struct nx_port_info_header *hdr;
2874
2875 if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
2876 return ENXIO;
2877 }
2878 npi = &nx->nx_ports[port];
2879 hdr = npi->npi_info;
2880 if (hdr == NULL) {
2881 return ENOENT;
2882 }
2883
2884 if (hdr->ih_type != type) {
2885 return EINVAL;
2886 }
2887
2888 bcopy(npi->npi_info, info, len);
2889 return 0;
2890 }
2891
2892 bool
nx_port_is_valid(struct kern_nexus * nx,nexus_port_t nx_port)2893 nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
2894 {
2895 return nx_port < nx->nx_num_ports;
2896 }
2897
2898 bool
nx_port_is_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2899 nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2900 {
2901 ASSERT(nx_port_is_valid(nx, nx_port));
2902
2903 return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
2904 }
2905
2906 void
nx_port_free_all(struct kern_nexus * nx)2907 nx_port_free_all(struct kern_nexus *nx)
2908 {
2909 uint32_t num_ports;
2910
2911 /* uncrustify doesn't handle C blocks properly */
2912 /* BEGIN IGNORE CODESTYLE */
2913 nx_port_foreach(nx, ^(nexus_port_t p) {
2914 struct nxbind *nxb;
2915 void *info;
2916 nxb = nx->nx_ports[p].npi_nxb;
2917 info = nx->nx_ports[p].npi_info;
2918 if (nxb != NULL) {
2919 nxb_free(nxb);
2920 nx->nx_ports[p].npi_nxb = NULL;
2921 }
2922 if (info != NULL) {
2923 size_t sz;
2924
2925 VERIFY(nx_port_info_size(info, &sz) == 0);
2926 skn_free_data(info, info, sz);
2927 nx->nx_ports[p].npi_info = NULL;
2928 }
2929 });
2930 /* END IGNORE CODESTYLE */
2931
2932 num_ports = nx->nx_num_ports;
2933 nx->nx_num_ports = 0;
2934 nx->nx_active_ports = 0;
2935 skn_free_data(ports_bmap,
2936 nx->nx_ports_bmap, (num_ports / NX_PORT_CHUNK) * sizeof(bitmap_t));
2937 nx->nx_ports_bmap = NULL;
2938 sk_free_type_array(struct nx_port_info, num_ports, nx->nx_ports);
2939 nx->nx_ports = NULL;
2940 }
2941
2942 void
2943 nx_port_foreach(struct kern_nexus *nx,
2944 void (^port_handle)(nexus_port_t nx_port))
2945 {
2946 for (nexus_port_size_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
2947 bitmap_t bmap = nx->nx_ports_bmap[i];
2948
2949 if (bmap == NX_PORT_CHUNK_FREE) {
2950 continue;
2951 }
2952
2953 for (nexus_port_size_t j = 0; j < NX_PORT_CHUNK; j++) {
2954 if (bit_test(bmap, j)) {
2955 continue;
2956 }
2957 port_handle((i * NX_PORT_CHUNK) + j);
2958 }
2959 }
2960 }
2961
2962 /*
2963 * sysctl interfaces
2964 */
2965 static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
2966 static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
2967 static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
2968
2969 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
2970 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2971 0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
2972
2973 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
2974 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2975 0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
2976
2977 SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
2978 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2979 0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
2980 "A list of logical links");
2981
2982 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
2983 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
2984 0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
2985 "Nexus inet flows with stats collected in kernel");
2986
2987 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
2988 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2989 0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
2990 "Nexus flow owners");
2991
2992 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
2993 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2994 0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
2995 "Nexus flow routes");
2996
2997 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
2998 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2999 0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
3000 "Nexus netif statistics collected in kernel");
3001
3002 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
3003 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3004 0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
3005 "Nexus flowswitch statistics collected in kernel");
3006
3007 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
3008 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3009 0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
3010 "Nexus userstack statistics counter");
3011
3012 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
3013 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3014 0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
3015 "Nexus flow advisory dump");
3016
3017 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netif_queue,
3018 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3019 0, NXMIB_NETIF_QUEUE_STATS, nexus_mib_get_sysctl, "S,netif_qstats_info",
3020 "A list of netif queue stats entries");
3021
3022 /*
3023 * Provider list sysctl
3024 */
3025 static void
nexus_provider_info_populate(struct kern_nexus_provider * nxprov,nexus_provider_info_t info)3026 nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
3027 nexus_provider_info_t info)
3028 {
3029 struct kern_nexus *nx;
3030 uuid_t *uuids;
3031
3032 SK_LOCK_ASSERT_HELD();
3033
3034 /* provider UUID + params */
3035 uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
3036 bcopy(nxprov->nxprov_params, &info->npi_prov_params,
3037 sizeof(struct nxprov_params));
3038 info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
3039
3040 /* instance UUID list */
3041 uuids = info->npi_instance_uuids;
3042 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
3043 uuid_copy(*uuids, nx->nx_uuid);
3044 uuids++;
3045 }
3046 }
3047
3048 static int
3049 nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
3050 {
3051 #pragma unused(arg1, arg2, oidp)
3052 size_t actual_space;
3053 caddr_t buffer = NULL;
3054 size_t buffer_space;
3055 size_t allocated_space;
3056 int out_error;
3057 int error = 0;
3058 struct kern_nexus_provider *nxprov;
3059 caddr_t scan;
3060
3061 if (!kauth_cred_issuser(kauth_cred_get())) {
3062 return EPERM;
3063 }
3064
3065 net_update_uptime();
3066 buffer_space = req->oldlen;
3067 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3068 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3069 buffer_space = SK_SYSCTL_ALLOC_MAX;
3070 }
3071 allocated_space = buffer_space;
3072 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3073 if (__improbable(buffer == NULL)) {
3074 return ENOBUFS;
3075 }
3076 } else if (req->oldptr == USER_ADDR_NULL) {
3077 buffer_space = 0;
3078 }
3079 actual_space = 0;
3080 scan = buffer;
3081 SK_LOCK();
3082 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
3083 size_t info_size;
3084
3085 info_size
3086 = NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
3087 if (scan != NULL) {
3088 if (buffer_space < info_size) {
3089 /* supplied buffer too small, stop copying */
3090 error = ENOMEM;
3091 break;
3092 }
3093 nexus_provider_info_populate(nxprov, (void *)scan);
3094 scan += info_size;
3095 buffer_space -= info_size;
3096 }
3097 actual_space += info_size;
3098 }
3099 SK_UNLOCK();
3100
3101 out_error = SYSCTL_OUT(req, buffer, actual_space);
3102 if (out_error != 0) {
3103 error = out_error;
3104 }
3105
3106 if (buffer != NULL) {
3107 sk_free_data(buffer, allocated_space);
3108 }
3109
3110 return error;
3111 }
3112
3113 /*
3114 * Channel list sysctl
3115 */
3116 static uint32_t
channel_ring_count(struct kern_channel * ch,enum txrx which)3117 channel_ring_count(struct kern_channel *ch, enum txrx which)
3118 {
3119 return ch->ch_last[which] - ch->ch_first[which];
3120 }
3121
3122 static void
populate_ring_entries(struct __kern_channel_ring * kring,ring_id_t first,ring_id_t last,nexus_channel_ring_entry_t entries)3123 populate_ring_entries(struct __kern_channel_ring *kring,
3124 ring_id_t first, ring_id_t last, nexus_channel_ring_entry_t entries)
3125 {
3126 ring_id_t i;
3127 nexus_channel_ring_entry_t scan;
3128 struct __kern_channel_ring *ring;
3129
3130 scan = entries;
3131 for (i = first; i < last; i++, scan++) {
3132 ring = &kring[i];
3133
3134 DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
3135 ring);
3136 if (kr_stat_enable == 0) {
3137 bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
3138 bzero(&scan->ncre_user_stats,
3139 sizeof(scan->ncre_user_stats));
3140 } else {
3141 scan->ncre_stats = ring->ckr_stats;
3142 scan->ncre_user_stats = ring->ckr_usr_stats;
3143 }
3144 scan->ncre_error_stats = ring->ckr_err_stats;
3145 scan->ncre_ring_id = i;
3146 }
3147 }
3148
3149 /* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
3150 static uint32_t
nexus_channel_get_flags(uint32_t ch_mode,uint32_t ch_flags)3151 nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
3152 {
3153 uint32_t flags = 0;
3154
3155 flags |= (ch_mode & CHMODE_MONITOR_TX) ? SCHF_MONITOR_TX : 0;
3156 flags |= (ch_mode & CHMODE_MONITOR_RX) ? SCHF_MONITOR_RX : 0;
3157 flags |= (ch_mode & CHMODE_MONITOR_NO_COPY) ? SCHF_MONITOR_NO_COPY : 0;
3158 flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
3159 flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
3160 flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
3161 flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
3162 flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
3163 flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
3164 flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
3165 flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
3166 flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
3167 flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
3168
3169 return flags;
3170 }
3171
3172 SK_NO_INLINE_ATTRIBUTE
3173 static void
nexus_channel_entry_populate(struct kern_channel * ch,nexus_channel_entry_t entry)3174 nexus_channel_entry_populate(struct kern_channel *ch,
3175 nexus_channel_entry_t entry)
3176 {
3177 uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
3178 uint32_t ch_flags = ch->ch_flags;
3179 ring_id_t rx_first = ch->ch_first[NR_RX];
3180 ring_id_t rx_last = ch->ch_last[NR_RX];
3181 ring_id_t tx_last = ch->ch_last[NR_TX];
3182 ring_id_t tx_first = ch->ch_first[NR_TX];
3183
3184 uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
3185 entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
3186 entry->nce_port = ch->ch_info->cinfo_nx_port;
3187 entry->nce_pid = ch->ch_pid;
3188 entry->nce_fd = ch->ch_fd;
3189 entry->nce_tx_rings = tx_last - tx_first;
3190 entry->nce_rx_rings = rx_last - rx_first;
3191 populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
3192 entry->nce_ring_entries);
3193 populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
3194 entry->nce_ring_entries + entry->nce_tx_rings);
3195 }
3196
3197 SK_NO_INLINE_ATTRIBUTE
3198 static size_t
nexus_channel_info_populate(struct kern_nexus * nx,nexus_channel_info_t info,size_t buffer_size)3199 nexus_channel_info_populate(struct kern_nexus *nx,
3200 nexus_channel_info_t info, size_t buffer_size)
3201 {
3202 struct kern_channel *ch = NULL;
3203 size_t info_size;
3204 caddr_t scan = NULL;
3205
3206 SK_LOCK_ASSERT_HELD();
3207
3208 info_size = sizeof(*info);
3209
3210 /* channel list */
3211 if (info != NULL) {
3212 if (buffer_size < info_size) {
3213 return info_size;
3214 }
3215
3216 /* instance UUID */
3217 uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
3218 info->nci_channel_entries_count = nx->nx_ch_count;
3219 scan = (caddr_t)info->nci_channel_entries;
3220 }
3221 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
3222 size_t entry_size;
3223 uint32_t ring_count;
3224
3225 ring_count = channel_ring_count(ch, NR_TX) +
3226 channel_ring_count(ch, NR_RX);
3227 entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
3228 info_size += entry_size;
3229 if (scan != NULL) {
3230 if (buffer_size < info_size) {
3231 return info_size;
3232 }
3233
3234 nexus_channel_entry_populate(ch, (void *)scan);
3235 scan += entry_size;
3236 }
3237 }
3238 return info_size;
3239 }
3240
3241 static int
3242 nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
3243 {
3244 #pragma unused(arg1, arg2, oidp)
3245 size_t actual_space;
3246 caddr_t buffer = NULL;
3247 size_t buffer_space;
3248 size_t allocated_space;
3249 int out_error;
3250 struct kern_nexus *nx;
3251 int error = 0;
3252 caddr_t scan;
3253
3254 if (!kauth_cred_issuser(kauth_cred_get())) {
3255 return EPERM;
3256 }
3257
3258 net_update_uptime();
3259 buffer_space = req->oldlen;
3260 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3261 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3262 buffer_space = SK_SYSCTL_ALLOC_MAX;
3263 }
3264 allocated_space = buffer_space;
3265 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3266 if (__improbable(buffer == NULL)) {
3267 return ENOBUFS;
3268 }
3269 } else if (req->oldptr == USER_ADDR_NULL) {
3270 buffer_space = 0;
3271 }
3272 actual_space = 0;
3273 scan = buffer;
3274 SK_LOCK();
3275 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3276 size_t info_size;
3277
3278 info_size = nexus_channel_info_populate(nx, (void *)scan,
3279 buffer_space);
3280 if (scan != NULL) {
3281 if (buffer_space < info_size) {
3282 /* supplied buffer too small, stop copying */
3283 error = ENOMEM;
3284 break;
3285 }
3286 scan += info_size;
3287 buffer_space -= info_size;
3288 }
3289 actual_space += info_size;
3290 }
3291 SK_UNLOCK();
3292
3293 if (actual_space != 0) {
3294 out_error = SYSCTL_OUT(req, buffer, actual_space);
3295 if (out_error != 0) {
3296 error = out_error;
3297 }
3298 }
3299 if (buffer != NULL) {
3300 sk_free_data(buffer, allocated_space);
3301 }
3302
3303 return error;
3304 }
3305
3306 static int
3307 nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
3308 {
3309 #pragma unused(arg1, arg2)
3310 struct proc *p = req->p;
3311 struct nexus_mib_filter filter;
3312 int error = 0;
3313 size_t actual_space;
3314 caddr_t buffer = NULL;
3315 size_t buffer_space;
3316 size_t allocated_space;
3317 int out_error;
3318 struct kern_nexus *nx;
3319 caddr_t scan;
3320
3321 /* Restrict protocol stats access to root user only (like netstat). */
3322 if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
3323 !kauth_cred_issuser(kauth_cred_get())) {
3324 SK_ERR("mib request rejected, EPERM");
3325 return EPERM;
3326 }
3327
3328 if (req->newptr == USER_ADDR_NULL) {
3329 /*
3330 * For flow stats requests, non-root users need to provide a
3331 * 5-tuple. Otherwise, we do not grant access.
3332 */
3333 if (oidp->oid_arg2 == NXMIB_FLOW &&
3334 !kauth_cred_issuser(kauth_cred_get())) {
3335 SK_ERR("mib request rejected: tuple not provided");
3336 return EPERM;
3337 }
3338 /* use subcommand for multiple nodes */
3339 filter.nmf_type = oidp->oid_arg2;
3340 filter.nmf_bitmap = 0x0;
3341 } else if (req->newlen != sizeof(struct nexus_mib_filter)) {
3342 SK_ERR("mis-matching newlen");
3343 return EINVAL;
3344 } else {
3345 error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
3346 if (error != 0) {
3347 SK_ERR("SYSCTL_IN err %d", error);
3348 return error;
3349 }
3350 if (filter.nmf_type != oidp->oid_arg2) {
3351 SK_ERR("mis-matching nmf_type");
3352 return EINVAL;
3353 }
3354 /*
3355 * For flow stats requests, non-root users need to set the nexus
3356 * mib filter to NXMIB_FILTER_INFO_TUPLE. Otherwise, we do not
3357 * grant access. This ensures that fsw_mib_get_flow looks for a
3358 * flow entry that matches the given tuple of the non-root user.
3359 */
3360 if (filter.nmf_type == NXMIB_FLOW &&
3361 (filter.nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) == 0 &&
3362 !kauth_cred_issuser(kauth_cred_get())) {
3363 SK_ERR("mib request rejected: tuple filter not set");
3364 return EPERM;
3365 }
3366 }
3367
3368 net_update_uptime();
3369 buffer_space = req->oldlen;
3370 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3371 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3372 buffer_space = SK_SYSCTL_ALLOC_MAX;
3373 }
3374 allocated_space = buffer_space;
3375 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3376 if (__improbable(buffer == NULL)) {
3377 return ENOBUFS;
3378 }
3379 } else if (req->oldptr == USER_ADDR_NULL) {
3380 buffer_space = 0;
3381 }
3382 actual_space = 0;
3383 scan = buffer;
3384
3385 SK_LOCK();
3386 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3387 if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
3388 continue;
3389 }
3390
3391 size_t size;
3392 struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
3393
3394 size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
3395 buffer_space, p);
3396
3397 if (scan != NULL) {
3398 if (buffer_space < size) {
3399 /* supplied buffer too small, stop copying */
3400 error = ENOMEM;
3401 break;
3402 }
3403 scan += size;
3404 buffer_space -= size;
3405 }
3406 actual_space += size;
3407 }
3408 SK_UNLOCK();
3409
3410 if (actual_space != 0) {
3411 out_error = SYSCTL_OUT(req, buffer, actual_space);
3412 if (out_error != 0) {
3413 error = out_error;
3414 }
3415 }
3416 if (buffer != NULL) {
3417 sk_free_data(buffer, allocated_space);
3418 }
3419
3420 return error;
3421 }
3422
3423 void
kern_nexus_walktree(kern_nexus_walktree_f_t * f,void * arg0,boolean_t is_sk_locked)3424 kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
3425 boolean_t is_sk_locked)
3426 {
3427 struct kern_nexus *nx = NULL;
3428
3429 if (!is_sk_locked) {
3430 SK_LOCK();
3431 } else {
3432 SK_LOCK_ASSERT_HELD();
3433 }
3434
3435 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3436 (*f)(nx, arg0);
3437 }
3438
3439 if (!is_sk_locked) {
3440 SK_UNLOCK();
3441 }
3442 }
3443
3444 errno_t
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,struct kern_pbufpool_memory_info * rx_pool_info,struct kern_pbufpool_memory_info * tx_pool_info)3445 kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
3446 struct kern_pbufpool_memory_info *rx_pool_info,
3447 struct kern_pbufpool_memory_info *tx_pool_info)
3448 {
3449 struct kern_pbufpool *tpp, *rpp;
3450 struct kern_nexus *nx;
3451 errno_t err = 0;
3452
3453 nx = nx_find(nx_uuid, FALSE);
3454 if (nx == NULL) {
3455 err = ENOENT;
3456 goto done;
3457 }
3458
3459 if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
3460 err = ENOTSUP;
3461 goto done;
3462 }
3463
3464 err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
3465 if (err != 0) {
3466 goto done;
3467 }
3468
3469 if ((tpp == NULL) && (rpp == NULL)) {
3470 err = ENOENT;
3471 goto done;
3472 }
3473
3474 if (tx_pool_info != NULL) {
3475 bzero(tx_pool_info, sizeof(*tx_pool_info));
3476 }
3477 if (rx_pool_info != NULL) {
3478 bzero(rx_pool_info, sizeof(*rx_pool_info));
3479 }
3480
3481 if ((tx_pool_info != NULL) && (tpp != NULL)) {
3482 err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
3483 if (err != 0) {
3484 goto done;
3485 }
3486 }
3487
3488 if ((rx_pool_info != NULL) && (rpp != NULL)) {
3489 err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
3490 }
3491
3492 done:
3493 if (nx != NULL) {
3494 (void) nx_release(nx);
3495 nx = NULL;
3496 }
3497 return err;
3498 }
3499
3500 void
nx_interface_advisory_notify(struct kern_nexus * nx)3501 nx_interface_advisory_notify(struct kern_nexus *nx)
3502 {
3503 struct kern_channel *ch;
3504 struct netif_stats *nifs;
3505 struct fsw_stats *fsw_stats;
3506 nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
3507
3508 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3509 nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
3510 } else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
3511 fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
3512 } else {
3513 VERIFY(0);
3514 __builtin_unreachable();
3515 }
3516 if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
3517 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3518 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
3519 } else {
3520 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
3521 }
3522 return;
3523 }
3524 /*
3525 * if the channel is in "nx_ch_if_adv_head" list, then we can
3526 * safely assume that the channel is not closed yet.
3527 * In ch_close_common(), the channel is removed from the
3528 * "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
3529 * exclusive mode, prior to closing the channel.
3530 */
3531 STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
3532 struct nexus_adapter *na = ch->ch_na;
3533
3534 ASSERT(na != NULL);
3535 na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
3536 TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
3537 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3538 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
3539 } else {
3540 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
3541 }
3542 }
3543 lck_rw_done(&nx->nx_ch_if_adv_lock);
3544 }
3545