1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <sys/sdt.h>
33
34 static uint32_t disable_nxctl_check = 0;
35 #if (DEVELOPMENT || DEBUG)
36 SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
37 CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
38 #endif
39
40 LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
41 LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
42 LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
43 LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
44
45 static STAILQ_HEAD(, nxctl) nxctl_head =
46 STAILQ_HEAD_INITIALIZER(nxctl_head);
47 static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
48 STAILQ_HEAD_INITIALIZER(nxprov_head);
49
50 static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
51 RB_HEAD(kern_nexus_tree, kern_nexus);
52 RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
53 RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
54 static struct kern_nexus_tree nx_head;
55
56 static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
57 static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
58 static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
59 static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
60 static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
61 static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
62 static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
63 static void nxctl_retain_locked(struct nxctl *);
64 static int nxctl_release_locked(struct nxctl *);
65 static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
66 static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
67 static void nxctl_free(struct nxctl *);
68
69 static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
70 struct kern_nexus_domain_provider *, struct nxprov_reg *,
71 const struct kern_nexus_provider_init *init, int *);
72 static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
73 static void nxprov_retain_locked(struct kern_nexus_provider *);
74 static int nxprov_release_locked(struct kern_nexus_provider *);
75 static struct kern_nexus_provider *nxprov_alloc(
76 struct kern_nexus_domain_provider *, zalloc_flags_t);
77 static void nxprov_free(struct kern_nexus_provider *);
78
79 static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
80 static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
81 static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
82 static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
83 static struct kern_nexus *nx_alloc(zalloc_flags_t);
84 static void nx_free(struct kern_nexus *);
85
86 static ZONE_DECLARE(nxctl_zone, SKMEM_ZONE_PREFIX ".nx.ctl",
87 sizeof(struct nxctl), ZC_ZFREE_CLEARMEM);
88
89 static ZONE_DECLARE(nxbind_zone, SKMEM_ZONE_PREFIX ".nx.bind",
90 sizeof(struct nxbind), ZC_ZFREE_CLEARMEM);
91
92 static ZONE_DECLARE(nxprov_zone, SKMEM_ZONE_PREFIX ".nx.kern.prov",
93 sizeof(struct kern_nexus_provider), ZC_ZFREE_CLEARMEM);
94
95 static ZONE_DECLARE(nxprov_params_zone, SKMEM_ZONE_PREFIX ".nx.kern.prov.params",
96 sizeof(struct nxprov_params), ZC_ZFREE_CLEARMEM);
97
98 static ZONE_DECLARE(nx_zone, SKMEM_ZONE_PREFIX ".nx",
99 sizeof(struct kern_nexus), ZC_ZFREE_CLEARMEM);
100
101 static int __nx_inited = 0;
102
103 #define SKMEM_TAG_NX_KEY "com.apple.skywalk.nexus.key"
104 kern_allocation_name_t skmem_tag_nx_key;
105
106 #define SKMEM_TAG_NX_MIB "com.apple.skywalk.nexus.mib"
107 static kern_allocation_name_t skmem_tag_nx_mib;
108
109 #define SKMEM_TAG_NX_PORT "com.apple.skywalk.nexus.port"
110 kern_allocation_name_t skmem_tag_nx_port;
111
112 #define SKMEM_TAG_NX_PORT_INFO "com.apple.skywalk.nexus.port.info"
113 kern_allocation_name_t skmem_tag_nx_port_info;
114
115 /*
116 * Special nexus controller handle for Skywalk internal use. Unlike all
117 * other nexus controller handles that are created by userland or kernel
118 * clients, this one never gets closed or freed. It is also not part of
119 * the global nxctl_head list.
120 */
121 static struct nxctl _kernnxctl;
122 struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
123
124 int
nexus_init(void)125 nexus_init(void)
126 {
127 SK_LOCK_ASSERT_HELD();
128 ASSERT(!__nx_inited);
129
130 RB_INIT(&nx_head);
131
132 na_init();
133
134 /* attach system built-in domains and domain providers */
135 nxdom_attach_all();
136
137 /*
138 * Initialize private kernel nexus controller handle; this is used
139 * internally for creating nexus providers and nexus instances from
140 * within the Skywalk code (e.g. netif_compat).
141 */
142 nxctl_init(&_kernnxctl, kernproc, NULL);
143 nxctl_retain_locked(&_kernnxctl); /* one for us */
144
145 ASSERT(skmem_tag_nx_key == NULL);
146 skmem_tag_nx_key = kern_allocation_name_allocate(SKMEM_TAG_NX_KEY, 0);
147 ASSERT(skmem_tag_nx_key != NULL);
148
149 ASSERT(skmem_tag_nx_mib == NULL);
150 skmem_tag_nx_mib = kern_allocation_name_allocate(SKMEM_TAG_NX_MIB, 0);
151 ASSERT(skmem_tag_nx_mib != NULL);
152
153 ASSERT(skmem_tag_nx_port == NULL);
154 skmem_tag_nx_port = kern_allocation_name_allocate(SKMEM_TAG_NX_PORT, 0);
155 ASSERT(skmem_tag_nx_port != NULL);
156
157 ASSERT(skmem_tag_nx_port_info == NULL);
158 skmem_tag_nx_port_info = kern_allocation_name_allocate(
159 SKMEM_TAG_NX_PORT_INFO, 0);
160 ASSERT(skmem_tag_nx_port_info != NULL);
161
162 __nx_inited = 1;
163
164 return 0;
165 }
166
167 void
nexus_fini(void)168 nexus_fini(void)
169 {
170 SK_LOCK_ASSERT_HELD();
171
172 if (__nx_inited) {
173 nxctl_release_locked(&_kernnxctl);
174
175 /* tell all domains they're going away */
176 nxdom_detach_all();
177
178 ASSERT(RB_EMPTY(&nx_head));
179
180 if (skmem_tag_nx_key != NULL) {
181 kern_allocation_name_release(skmem_tag_nx_key);
182 skmem_tag_nx_key = NULL;
183 }
184 if (skmem_tag_nx_mib != NULL) {
185 kern_allocation_name_release(skmem_tag_nx_mib);
186 skmem_tag_nx_mib = NULL;
187 }
188 if (skmem_tag_nx_port != NULL) {
189 kern_allocation_name_release(skmem_tag_nx_port);
190 skmem_tag_nx_port = NULL;
191 }
192 if (skmem_tag_nx_port_info != NULL) {
193 kern_allocation_name_release(skmem_tag_nx_port_info);
194 skmem_tag_nx_port_info = NULL;
195 }
196 na_fini();
197
198 __nx_inited = 0;
199 }
200 }
201
202 struct nxctl *
nxctl_create(struct proc * p,struct fileproc * fp,const uuid_t nxctl_uuid,int * err)203 nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
204 int *err)
205 {
206 struct nxctl *nxctl = NULL;
207
208 ASSERT(!uuid_is_null(nxctl_uuid));
209
210 /* privilege checks would be done when performing nxctl operations */
211
212 SK_LOCK();
213
214 nxctl = nxctl_alloc(p, fp, Z_WAITOK);
215
216 STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
217 nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
218 uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
219
220 nxctl_retain_locked(nxctl); /* one for being in the list */
221 nxctl_retain_locked(nxctl); /* one for the caller */
222
223 #if SK_LOG
224 uuid_string_t uuidstr;
225 SK_D("nxctl 0x%llx UUID %s", SK_KVA(nxctl),
226 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
227 #endif /* SK_LOG */
228
229 SK_UNLOCK();
230
231 if (*err != 0) {
232 nxctl_free(nxctl);
233 nxctl = NULL;
234 }
235 return nxctl;
236 }
237
238 void
nxctl_close(struct nxctl * nxctl)239 nxctl_close(struct nxctl *nxctl)
240 {
241 struct kern_nexus_provider *nxprov = NULL, *tnxprov;
242
243 lck_mtx_lock(&nxctl->nxctl_lock);
244 SK_LOCK();
245
246 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
247
248 #if SK_LOG
249 uuid_string_t uuidstr;
250 SK_D("nxctl 0x%llx UUID %s flags 0x%b", SK_KVA(nxctl),
251 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
252 nxctl->nxctl_flags, NEXUSCTLF_BITS);
253 #endif /* SK_LOG */
254
255 if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
256 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
257 nxctl->nxctl_fp = NULL;
258 }
259
260 /* may be called as part of failure cleanup, so check */
261 if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
262 /* caller must hold an extra ref */
263 ASSERT(nxctl->nxctl_refcnt > 1);
264 (void) nxctl_release_locked(nxctl);
265
266 STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
267 nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
268 }
269
270 repeat:
271 STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
272 /*
273 * Close provider only for those which are owned by
274 * this control instance. Note that if we close the
275 * provider, we need to repeat this search as the
276 * list might have been changed by another thread.
277 * That's possible since SK_UNLOCK() may be called
278 * as a result of calling nxprov_close().
279 */
280 if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
281 nxprov->nxprov_ctl == nxctl) {
282 nxprov_retain_locked(nxprov);
283 (void) nxprov_close(nxprov, TRUE);
284 (void) nxprov_release_locked(nxprov);
285 goto repeat;
286 }
287 }
288
289 SK_UNLOCK();
290 lck_mtx_unlock(&nxctl->nxctl_lock);
291 }
292
293 int
nxctl_set_opt(struct nxctl * nxctl,struct sockopt * sopt)294 nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
295 {
296 #pragma unused(nxctl)
297 int err = 0;
298
299 NXCTL_LOCK_ASSERT_HELD(nxctl);
300
301 if (sopt->sopt_dir != SOPT_SET) {
302 sopt->sopt_dir = SOPT_SET;
303 }
304
305 switch (sopt->sopt_name) {
306 case NXOPT_NEXUS_BIND:
307 err = nxctl_nexus_bind(nxctl, sopt);
308 break;
309
310 case NXOPT_NEXUS_UNBIND:
311 err = nxctl_nexus_unbind(nxctl, sopt);
312 break;
313
314 case NXOPT_NEXUS_CONFIG:
315 err = nxctl_nexus_config(nxctl, sopt);
316 break;
317
318 default:
319 err = ENOPROTOOPT;
320 break;
321 }
322
323 return err;
324 }
325
326 int
nxctl_get_opt(struct nxctl * nxctl,struct sockopt * sopt)327 nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
328 {
329 #pragma unused(nxctl)
330 int err = 0;
331
332 NXCTL_LOCK_ASSERT_HELD(nxctl);
333
334 if (sopt->sopt_dir != SOPT_GET) {
335 sopt->sopt_dir = SOPT_GET;
336 }
337
338 switch (sopt->sopt_name) {
339 case NXOPT_NEXUS_PROV_LIST:
340 err = nxctl_get_nexus_prov_list(nxctl, sopt);
341 break;
342
343 case NXOPT_NEXUS_PROV_ENTRY:
344 err = nxctl_get_nexus_prov_entry(nxctl, sopt);
345 break;
346
347 case NXOPT_NEXUS_LIST:
348 err = nxctl_get_nexus_list(nxctl, sopt);
349 break;
350
351 case NXOPT_CHANNEL_LIST:
352 err = nxctl_get_channel_list(nxctl, sopt);
353 break;
354
355 default:
356 err = ENOPROTOOPT;
357 break;
358 }
359
360 return err;
361 }
362
363 /* Upper bound on # of nrl_num_regs that we'd return to user space */
364 #define MAX_NUM_REG_ENTRIES 256
365
366 /* Hoisted out of line to reduce kernel stack footprint */
367 SK_NO_INLINE_ATTRIBUTE
368 static int
nxctl_get_nexus_prov_list(struct nxctl * nxctl,struct sockopt * sopt)369 nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
370 {
371 user_addr_t tmp_ptr = USER_ADDR_NULL;
372 struct nxprov_reg_ent *pnre, *nres = NULL;
373 struct nxprov_list_req nrlr;
374 struct kern_nexus_provider *nxprov = NULL;
375 uint32_t nregs = 0, ncregs = 0;
376 int err = 0, observeall;
377 size_t nres_sz;
378
379 NXCTL_LOCK_ASSERT_HELD(nxctl);
380
381 ASSERT(sopt->sopt_p != NULL);
382 if (sopt->sopt_val == USER_ADDR_NULL) {
383 return EINVAL;
384 }
385
386 err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
387 if (err != 0) {
388 return err;
389 }
390
391 if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
392 nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
393 }
394
395 /*
396 * If the caller specified a buffer, copy out the Nexus provider
397 * entries to caller gracefully. We only copy out the number of
398 * entries which caller has asked for, but we always tell caller
399 * how big the buffer really needs to be.
400 */
401 tmp_ptr = nrlr.nrl_regs;
402 if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
403 nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
404 nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
405 if (__improbable(nres == NULL)) {
406 return ENOBUFS;
407 }
408 }
409
410 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
411 PRIV_SKYWALK_OBSERVE_ALL) == 0);
412
413 SK_LOCK();
414 /*
415 * Count number of providers. If buffer space exists and
416 * remains, copy out provider entries.
417 */
418 nregs = nrlr.nrl_num_regs;
419 pnre = nres;
420
421 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
422 /*
423 * Return only entries that are visible to the caller,
424 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
425 */
426 if (nxprov->nxprov_ctl != nxctl && !observeall) {
427 continue;
428 }
429
430 if (nres != NULL && nregs > 0) {
431 uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
432 bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
433 sizeof(struct nxprov_params));
434 --nregs;
435 ++pnre;
436 ++ncregs;
437 }
438 }
439 SK_UNLOCK();
440
441 if (ncregs == 0) {
442 err = ENOENT;
443 }
444
445 if (nres != NULL) {
446 if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
447 if (sopt->sopt_p != kernproc) {
448 err = copyout(nres, tmp_ptr,
449 ncregs * sizeof(*nres));
450 } else {
451 bcopy(nres, CAST_DOWN(caddr_t, tmp_ptr),
452 ncregs * sizeof(*nres));
453 }
454 }
455 sk_free_data(nres, nres_sz);
456 nres = NULL;
457 }
458
459 if (err == 0) {
460 nrlr.nrl_num_regs = ncregs;
461 err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
462 }
463
464 return err;
465 }
466
467 /* Hoisted out of line to reduce kernel stack footprint */
468 SK_NO_INLINE_ATTRIBUTE
469 static int
nxctl_get_nexus_prov_entry(struct nxctl * nxctl,struct sockopt * sopt)470 nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
471 {
472 struct nxprov_reg_ent nre;
473 struct kern_nexus_provider *nxprov = NULL;
474 int err = 0;
475
476 NXCTL_LOCK_ASSERT_HELD(nxctl);
477
478 ASSERT(sopt->sopt_p != NULL);
479 if (sopt->sopt_val == USER_ADDR_NULL) {
480 return EINVAL;
481 }
482
483 bzero(&nre, sizeof(nre));
484 err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
485 if (err != 0) {
486 return err;
487 }
488
489 if (uuid_is_null(nre.npre_prov_uuid)) {
490 return EINVAL;
491 }
492
493 SK_LOCK();
494 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
495 if (uuid_compare(nxprov->nxprov_uuid,
496 nre.npre_prov_uuid) == 0) {
497 /*
498 * Return only entries that are visible to the caller,
499 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
500 */
501 if (nxprov->nxprov_ctl != nxctl) {
502 if (skywalk_priv_check_cred(sopt->sopt_p,
503 nxctl->nxctl_cred,
504 PRIV_SKYWALK_OBSERVE_ALL) != 0) {
505 nxprov = NULL;
506 break;
507 }
508 }
509
510 bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
511 sizeof(struct nxprov_params));
512 break;
513 }
514 }
515 SK_UNLOCK();
516
517 if (nxprov != NULL) {
518 err = sooptcopyout(sopt, &nre, sizeof(nre));
519 } else {
520 err = ENOENT;
521 }
522
523 return err;
524 }
525
526 /* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
527 #define MAX_NUM_NX_UUIDS 4096
528
529 /* Hoisted out of line to reduce kernel stack footprint */
530 SK_NO_INLINE_ATTRIBUTE
531 static int
nxctl_get_nexus_list(struct nxctl * nxctl,struct sockopt * sopt)532 nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
533 {
534 user_addr_t tmp_ptr = USER_ADDR_NULL;
535 uint32_t nuuids = 0, ncuuids = 0;
536 uuid_t *puuid, *uuids = NULL;
537 size_t uuids_sz;
538 struct nx_list_req nlr;
539 struct kern_nexus_provider *nxprov = NULL;
540 struct kern_nexus *nx = NULL;
541 int err = 0, observeall;
542
543 NXCTL_LOCK_ASSERT_HELD(nxctl);
544
545 ASSERT(sopt->sopt_p != NULL);
546 if (sopt->sopt_val == USER_ADDR_NULL) {
547 return EINVAL;
548 }
549
550 err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
551 if (err != 0) {
552 return err;
553 }
554
555 if (uuid_is_null(nlr.nl_prov_uuid)) {
556 return EINVAL;
557 } else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
558 nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
559 }
560
561 /*
562 * If the caller specified a buffer, copy out the Nexus UUIDs to
563 * caller gracefully. We only copy out the number of UUIDs which
564 * caller has asked for, but we always tell caller how big the
565 * buffer really needs to be.
566 */
567 tmp_ptr = nlr.nl_nx_uuids;
568 if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
569 uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
570 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
571 if (__improbable(uuids == NULL)) {
572 return ENOBUFS;
573 }
574 }
575
576 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
577 PRIV_SKYWALK_OBSERVE_ALL) == 0);
578
579 SK_LOCK();
580 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
581 /*
582 * Return only entries that are visible to the caller,
583 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
584 */
585 if (nxprov->nxprov_ctl != nxctl && !observeall) {
586 continue;
587 }
588
589 if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
590 break;
591 }
592 }
593
594 if (nxprov != NULL) {
595 /*
596 * Count number of Nexus. If buffer space exists
597 * and remains, copy out the Nexus UUIDs.
598 */
599 nuuids = nlr.nl_num_nx_uuids;
600 puuid = uuids;
601
602 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
603 ++ncuuids;
604 if (uuids != NULL && nuuids > 0) {
605 uuid_copy(*puuid, nx->nx_uuid);
606 --nuuids;
607 ++puuid;
608 }
609 }
610 } else {
611 err = ENOENT;
612 }
613 SK_UNLOCK();
614
615 if (uuids != NULL) {
616 if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
617 uintptr_t cnt_uuid;
618
619 /* Note: Pointer arithmetic */
620 cnt_uuid = (uintptr_t)(puuid - uuids);
621 if (cnt_uuid > 0) {
622 if (sopt->sopt_p != kernproc) {
623 err = copyout(uuids, tmp_ptr,
624 cnt_uuid * sizeof(uuid_t));
625 } else {
626 bcopy(uuids,
627 CAST_DOWN(caddr_t, tmp_ptr),
628 cnt_uuid * sizeof(uuid_t));
629 }
630 }
631 }
632 sk_free_data(uuids, uuids_sz);
633 uuids = NULL;
634 }
635
636 if (err == 0) {
637 nlr.nl_num_nx_uuids = ncuuids;
638 err = sooptcopyout(sopt, &nlr, sizeof(nlr));
639 }
640
641 return err;
642 }
643
644 /* Hoisted out of line to reduce kernel stack footprint */
645 SK_NO_INLINE_ATTRIBUTE
646 static int
nxctl_nexus_bind(struct nxctl * nxctl,struct sockopt * sopt)647 nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
648 {
649 boolean_t m_pid, m_exec_uuid, m_key;
650 struct nx_bind_req nbr;
651 struct proc *p = PROC_NULL;
652 struct nxbind *nxb = NULL;
653 uint64_t p_uniqueid = -1;
654 pid_t p_pid = -1;
655 struct kern_nexus *nx = NULL;
656 #if SK_LOG
657 uuid_string_t exec_uuidstr;
658 #endif /* SK_LOG */
659 uuid_t p_uuid;
660 void *key = NULL;
661 int err = 0;
662
663 NXCTL_LOCK_ASSERT_HELD(nxctl);
664
665 if (sopt->sopt_val == USER_ADDR_NULL) {
666 return EINVAL;
667 }
668
669 uuid_clear(p_uuid);
670 bzero(&nbr, sizeof(nbr));
671 err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
672 if (err != 0) {
673 return err;
674 }
675
676 if (uuid_is_null(nbr.nb_nx_uuid)) {
677 err = EINVAL;
678 goto done_unlocked;
679 }
680
681 nbr.nb_flags &= NBR_MATCH_MASK;
682 if (nbr.nb_flags == 0) {
683 /* must choose one of the match criteria */
684 err = EINVAL;
685 goto done_unlocked;
686 }
687 m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
688 m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
689 m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
690
691 if (m_pid || m_exec_uuid) {
692 /*
693 * Validate process ID. A valid PID is needed when we're
694 * asked to match by PID, or if asked to match by executable
695 * UUID with a NULL nb_exec_uuid supplied. The latter is
696 * to support the case when a userland Nexus provider isn't
697 * able to acquire its client's executable UUID, but is
698 * able to identify it via PID.
699 */
700 if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
701 (p = proc_find(nbr.nb_pid)) == PROC_NULL) {
702 err = ESRCH;
703 goto done_unlocked;
704 }
705 /* exclude kernel from the match criteria */
706 if (p == kernproc) {
707 err = EACCES;
708 goto done_unlocked;
709 } else if (p != PROC_NULL) {
710 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
711 p_uniqueid = proc_uniqueid(p);
712 p_pid = proc_pid(p);
713 } else {
714 uuid_copy(p_uuid, nbr.nb_exec_uuid);
715 }
716 }
717
718 if (m_key) {
719 if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
720 nbr.nb_key == USER_ADDR_NULL) {
721 err = EINVAL;
722 goto done_unlocked;
723 }
724
725 key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
726 if (__improbable(key == NULL)) {
727 err = ENOMEM;
728 goto done_unlocked;
729 }
730
731 if (sopt->sopt_p != kernproc) {
732 err = copyin(nbr.nb_key, key, nbr.nb_key_len);
733 if (err != 0) {
734 goto done_unlocked;
735 }
736 } else {
737 bcopy((void *)nbr.nb_key, key, nbr.nb_key_len);
738 }
739 }
740
741 SK_LOCK();
742 nx = nx_find(nbr.nb_nx_uuid, TRUE);
743 if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
744 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
745 err = ENOENT;
746 goto done;
747 }
748
749 /* bind isn't applicable on anonymous nexus provider */
750 if (NX_ANONYMOUS_PROV(nx)) {
751 err = ENXIO;
752 goto done;
753 }
754
755 /* port must be within the domain's range */
756 if (nbr.nb_port != NEXUS_PORT_ANY &&
757 nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
758 err = EDOM;
759 goto done;
760 } else if (nbr.nb_port == NEXUS_PORT_ANY) {
761 /* for now, this is allowed only for kernel clients */
762 if (sopt->sopt_p != kernproc) {
763 err = EPERM;
764 goto done;
765 }
766 }
767
768 nxb = nxb_alloc(Z_WAITOK);
769
770 if (m_pid) {
771 nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
772 nxb->nxb_uniqueid = p_uniqueid;
773 nxb->nxb_pid = p_pid;
774 }
775 if (m_exec_uuid) {
776 nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
777 ASSERT(!uuid_is_null(p_uuid));
778 uuid_copy(nxb->nxb_exec_uuid, p_uuid);
779 }
780 if (m_key) {
781 nxb->nxb_flags |= NXBF_MATCH_KEY;
782 ASSERT(key != NULL);
783 nxb->nxb_key = key;
784 key = NULL; /* let nxb_free() free it */
785 ASSERT(nbr.nb_key_len != 0 &&
786 nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
787 nxb->nxb_key_len = nbr.nb_key_len;
788 }
789
790 /*
791 * Bind the creds to the nexus port. If client doesn't have a port,
792 * find one, claim it, and associate the creds to it. Upon success,
793 * the nexus may move the nxbind contents (including the key) to
794 * its own nxbind instance; in that case, nxb_free() below will not
795 * be freeing the key within.
796 */
797 err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
798 if (err != 0) {
799 goto done;
800 }
801
802 ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
803 (void) sooptcopyout(sopt, &nbr, sizeof(nbr));
804
805 SK_D("nexus 0x%llx nxb 0x%llx port %u flags 0x%b pid %d "
806 "(uniqueid %llu) exec_uuid %s key 0x%llx key_len %u",
807 SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
808 NXBF_BITS, nxb->nxb_pid, nxb->nxb_uniqueid,
809 sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
810 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
811 nxb->nxb_key_len);
812
813 done:
814 if (nx != NULL) {
815 (void) nx_release_locked(nx);
816 nx = NULL;
817 }
818 SK_UNLOCK();
819
820 done_unlocked:
821 ASSERT(nx == NULL);
822
823 if (nxb != NULL) {
824 nxb_free(nxb);
825 nxb = NULL;
826 }
827 if (key != NULL) {
828 sk_free_data(key, nbr.nb_key_len);
829 key = NULL;
830 }
831 if (p != PROC_NULL) {
832 proc_rele(p);
833 }
834
835 return err;
836 }
837
838 /* Hoisted out of line to reduce kernel stack footprint */
839 SK_NO_INLINE_ATTRIBUTE
840 static int
nxctl_nexus_unbind(struct nxctl * nxctl,struct sockopt * sopt)841 nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
842 {
843 struct nx_unbind_req nur;
844 struct kern_nexus *nx = NULL;
845 int err = 0;
846
847 NXCTL_LOCK_ASSERT_HELD(nxctl);
848
849 if (sopt->sopt_val == USER_ADDR_NULL) {
850 return EINVAL;
851 }
852
853 bzero(&nur, sizeof(nur));
854 err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
855 if (err != 0) {
856 return err;
857 }
858
859 if (uuid_is_null(nur.nu_nx_uuid)) {
860 return EINVAL;
861 }
862
863 SK_LOCK();
864 nx = nx_find(nur.nu_nx_uuid, TRUE);
865 if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
866 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
867 err = ENOENT;
868 goto done;
869 }
870
871 /* unbind isn't applicable on anonymous nexus provider */
872 if (NX_ANONYMOUS_PROV(nx)) {
873 err = ENXIO;
874 goto done;
875 }
876
877 if (nur.nu_port == NEXUS_PORT_ANY) {
878 err = EINVAL;
879 goto done;
880 }
881
882 err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
883
884 done:
885 if (nx != NULL) {
886 (void) nx_release_locked(nx);
887 nx = NULL;
888 }
889 SK_UNLOCK();
890
891 return err;
892 }
893
894 /* Hoisted out of line to reduce kernel stack footprint */
895 SK_NO_INLINE_ATTRIBUTE
896 static int
nxctl_nexus_config(struct nxctl * nxctl,struct sockopt * sopt)897 nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
898 {
899 struct kern_nexus *nx = NULL;
900 struct nx_cfg_req ncr;
901 int err = 0;
902
903 NXCTL_LOCK_ASSERT_HELD(nxctl);
904
905 if (sopt->sopt_val == USER_ADDR_NULL) {
906 return EINVAL;
907 }
908
909 bzero(&ncr, sizeof(ncr));
910 err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
911 if (err != 0) {
912 return err;
913 }
914
915 if (uuid_is_null(ncr.nc_nx_uuid)) {
916 return EINVAL;
917 }
918
919 SK_LOCK();
920 nx = nx_find(ncr.nc_nx_uuid, TRUE);
921 if (nx == NULL || (disable_nxctl_check == 0 &&
922 nx->nx_prov->nxprov_ctl != nxctl &&
923 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
924 err = ENOENT;
925 goto done;
926 }
927
928 if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
929 err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
930 nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
931 } else {
932 err = EPERM;
933 }
934
935 if (err == 0) {
936 (void) sooptcopyout(sopt, &ncr, sizeof(ncr));
937 }
938 done:
939 if (nx != NULL) {
940 (void) nx_release_locked(nx);
941 nx = NULL;
942 }
943 SK_UNLOCK();
944
945 return err;
946 }
947
948 struct nxbind *
nxb_alloc(zalloc_flags_t how)949 nxb_alloc(zalloc_flags_t how)
950 {
951 struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
952
953 if (nxb) {
954 SK_DF(SK_VERB_MEM, "nxb 0x%llx ALLOC", SK_KVA(nxb));
955 }
956 return nxb;
957 }
958
959 void
nxb_free(struct nxbind * nxb)960 nxb_free(struct nxbind *nxb)
961 {
962 SK_DF(SK_VERB_MEM, "nxb 0x%llx key 0x%llx FREE", SK_KVA(nxb),
963 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
964
965 if (nxb->nxb_key != NULL) {
966 sk_free_data(nxb->nxb_key, nxb->nxb_key_len);
967 nxb->nxb_key = NULL;
968 }
969 zfree(nxbind_zone, nxb);
970 }
971
972 /*
973 * nxb0 is assumed to possess the truth, compare nxb1 against it.
974 */
975 boolean_t
nxb_is_equal(struct nxbind * nxb0,struct nxbind * nxb1)976 nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
977 {
978 ASSERT(nxb0 != NULL && nxb1 != NULL);
979 ASSERT(nxb0 != nxb1);
980
981 /* we always compare using uniqueid and not pid */
982 if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
983 nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
984 return FALSE;
985 }
986
987 if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
988 uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
989 return FALSE;
990 }
991
992 ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
993 (nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
994
995 if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
996 (nxb0->nxb_key_len != nxb1->nxb_key_len ||
997 nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
998 nxb1->nxb_key_len) != 0)) {
999 return FALSE;
1000 }
1001
1002 return TRUE;
1003 }
1004
1005 void
nxb_move(struct nxbind * snxb,struct nxbind * dnxb)1006 nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
1007 {
1008 ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
1009 (snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
1010
1011 /* in case the destination has a key attached, free it first */
1012 if (dnxb->nxb_key != NULL) {
1013 sk_free_data(dnxb->nxb_key, dnxb->nxb_key_len);
1014 dnxb->nxb_key = NULL;
1015 }
1016
1017 /* move everything from src to dst, and then wipe out src */
1018 bcopy(snxb, dnxb, sizeof(*dnxb));
1019 bzero(snxb, sizeof(*snxb));
1020 }
1021
1022 /* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
1023 #define MAX_NUM_CH_UUIDS 4096
1024
1025 /* Hoisted out of line to reduce kernel stack footprint */
1026 SK_NO_INLINE_ATTRIBUTE
1027 static int
nxctl_get_channel_list(struct nxctl * nxctl,struct sockopt * sopt)1028 nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
1029 {
1030 user_addr_t tmp_ptr = USER_ADDR_NULL;
1031 uint32_t nuuids = 0, ncuuids = 0;
1032 uuid_t *puuid, *uuids = NULL;
1033 size_t uuids_sz;
1034 struct ch_list_req clr;
1035 struct kern_channel *ch = NULL;
1036 struct kern_nexus *nx = NULL;
1037 struct kern_nexus find;
1038 int err = 0, observeall;
1039
1040 NXCTL_LOCK_ASSERT_HELD(nxctl);
1041
1042 ASSERT(sopt->sopt_p != NULL);
1043 if (sopt->sopt_val == USER_ADDR_NULL) {
1044 return EINVAL;
1045 }
1046
1047 err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
1048 if (err != 0) {
1049 return err;
1050 }
1051
1052 if (uuid_is_null(clr.cl_nx_uuid)) {
1053 return EINVAL;
1054 } else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
1055 clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
1056 }
1057
1058 /*
1059 * If the caller specified a buffer, copy out the Channel UUIDs to
1060 * caller gracefully. We only copy out the number of UUIDs which
1061 * caller has asked for, but we always tell caller how big the
1062 * buffer really needs to be.
1063 */
1064 tmp_ptr = clr.cl_ch_uuids;
1065 if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
1066 uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
1067 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
1068 if (uuids == NULL) {
1069 return ENOBUFS;
1070 }
1071 }
1072
1073 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
1074 PRIV_SKYWALK_OBSERVE_ALL) == 0);
1075
1076 SK_LOCK();
1077 uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
1078 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1079 if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
1080 /*
1081 * Return only entries that are visible to the caller,
1082 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
1083 */
1084 nx = NULL;
1085 }
1086 if (nx != NULL) {
1087 /*
1088 * Count number of Channels. If buffer space exists
1089 * and remains, copy out the Channel UUIDs.
1090 */
1091 nuuids = clr.cl_num_ch_uuids;
1092 puuid = uuids;
1093
1094 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
1095 ++ncuuids;
1096 if (uuids != NULL && nuuids > 0) {
1097 uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
1098 --nuuids;
1099 ++puuid;
1100 }
1101 }
1102 } else {
1103 err = ENOENT;
1104 }
1105 SK_UNLOCK();
1106
1107 if (uuids != NULL) {
1108 if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
1109 uintptr_t cnt_uuid;
1110
1111 /* Note: Pointer arithmetic */
1112 cnt_uuid = (uintptr_t)(puuid - uuids);
1113 ASSERT(cnt_uuid > 0);
1114
1115 if (sopt->sopt_p != kernproc) {
1116 err = copyout(uuids, tmp_ptr,
1117 cnt_uuid * sizeof(uuid_t));
1118 } else {
1119 bcopy(uuids, CAST_DOWN(caddr_t, tmp_ptr),
1120 cnt_uuid * sizeof(uuid_t));
1121 }
1122 }
1123 sk_free_data(uuids, uuids_sz);
1124 uuids = NULL;
1125 }
1126
1127 if (err == 0) {
1128 clr.cl_num_ch_uuids = ncuuids;
1129 err = sooptcopyout(sopt, &clr, sizeof(clr));
1130 }
1131
1132 return err;
1133 }
1134
1135 static void
nxctl_init(struct nxctl * nxctl,struct proc * p,struct fileproc * fp)1136 nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
1137 {
1138 uuid_t p_uuid;
1139
1140 bzero(nxctl, sizeof(*nxctl));
1141
1142 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
1143
1144 lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
1145 uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
1146 nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
1147 nxctl->nxctl_cred = kauth_cred_proc_ref(p);
1148 nxctl->nxctl_fp = fp;
1149 if (nxctl == &_kernnxctl) {
1150 ASSERT(p == kernproc);
1151 nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
1152 }
1153 if (fp == NULL) {
1154 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
1155 }
1156 }
1157
1158 static struct nxctl *
nxctl_alloc(struct proc * p,struct fileproc * fp,zalloc_flags_t how)1159 nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
1160 {
1161 struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
1162
1163 if (nxctl != NULL) {
1164 nxctl_init(nxctl, p, fp);
1165 }
1166 return nxctl;
1167 }
1168
1169 static void
nxctl_free(struct nxctl * nxctl)1170 nxctl_free(struct nxctl *nxctl)
1171 {
1172 ASSERT(nxctl->nxctl_refcnt == 0);
1173 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
1174 kauth_cred_unref(&nxctl->nxctl_cred);
1175 lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
1176 SK_D("nxctl 0x%llx FREE", SK_KVA(nxctl));
1177 if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
1178 zfree(nxctl_zone, nxctl);
1179 }
1180 }
1181
1182 static void
nxctl_retain_locked(struct nxctl * nxctl)1183 nxctl_retain_locked(struct nxctl *nxctl)
1184 {
1185 SK_LOCK_ASSERT_HELD();
1186
1187 nxctl->nxctl_refcnt++;
1188 ASSERT(nxctl->nxctl_refcnt != 0);
1189 }
1190
1191 void
nxctl_retain(struct nxctl * nxctl)1192 nxctl_retain(struct nxctl *nxctl)
1193 {
1194 SK_LOCK();
1195 nxctl_retain_locked(nxctl);
1196 SK_UNLOCK();
1197 }
1198
1199 static int
nxctl_release_locked(struct nxctl * nxctl)1200 nxctl_release_locked(struct nxctl *nxctl)
1201 {
1202 int oldref = nxctl->nxctl_refcnt;
1203
1204 SK_LOCK_ASSERT_HELD();
1205
1206 ASSERT(nxctl->nxctl_refcnt != 0);
1207 if (--nxctl->nxctl_refcnt == 0) {
1208 nxctl_free(nxctl);
1209 }
1210
1211 return oldref == 1;
1212 }
1213
1214 int
nxctl_release(struct nxctl * nxctl)1215 nxctl_release(struct nxctl *nxctl)
1216 {
1217 int lastref;
1218
1219 SK_LOCK();
1220 lastref = nxctl_release_locked(nxctl);
1221 SK_UNLOCK();
1222
1223 return lastref;
1224 }
1225
1226 void
nxctl_dtor(void * arg)1227 nxctl_dtor(void *arg)
1228 {
1229 struct nxctl *nxctl = arg;
1230
1231 nxctl_close(nxctl);
1232 SK_LOCK();
1233 (void) nxctl_release_locked(nxctl);
1234 SK_UNLOCK();
1235 }
1236
1237 int
nxprov_advise_connect(struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1238 nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
1239 struct proc *p)
1240 {
1241 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1242 int err = 0;
1243
1244 ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
1245 ASSERT(ch->ch_ctx == NULL);
1246
1247 SK_LOCK_ASSERT_HELD();
1248 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1249
1250 /* monitor channels aren't externally visible/usable, so ignore */
1251 if ((ch->ch_info->cinfo_ch_mode & CHMODE_MONITOR) ||
1252 (ch->ch_flags & CHANF_EXT_SKIP) ||
1253 (nxprov->nxprov_ext.nxpi_pre_connect == NULL &&
1254 nxprov->nxprov_ext.nxpi_connected == NULL)) {
1255 return 0;
1256 }
1257
1258 ch_retain_locked(ch);
1259 lck_mtx_unlock(&ch->ch_lock);
1260 SK_UNLOCK();
1261 lck_mtx_lock(&ch->ch_lock);
1262
1263 err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
1264 ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
1265 if (err != 0) {
1266 SK_D("ch 0x%llx flags %b nx 0x%llx pre_connect "
1267 "error %d", SK_KVA(ch), ch->ch_flags,
1268 CHANF_BITS, SK_KVA(nx), err);
1269 ch->ch_ctx = NULL;
1270 goto done;
1271 }
1272 /*
1273 * Upon ring/slot init failure, this is cleared
1274 * by nxprov_advise_disconnect() below.
1275 */
1276 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1277 if (NXPROV_LLINK(nxprov)) {
1278 err = nx_netif_llink_ext_init_default_queues(nx);
1279 } else {
1280 err = nx_init_rings(nx, ch);
1281 }
1282 if (err != 0) {
1283 goto done;
1284 }
1285 ASSERT(err == 0);
1286 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
1287 CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
1288
1289 err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
1290 if (err != 0) {
1291 SK_D("ch 0x%llx flags %b nx 0x%llx connected error %d",
1292 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx), err);
1293 goto done;
1294 }
1295 atomic_bitset_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1296 SK_D("ch 0x%llx flags %b nx 0x%llx connected",
1297 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1298
1299
1300 done:
1301 lck_mtx_unlock(&ch->ch_lock);
1302 SK_LOCK();
1303 lck_mtx_lock(&ch->ch_lock);
1304 if ((err != 0) &&
1305 (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
1306 nxprov_advise_disconnect(nx, ch);
1307 }
1308 /* caller is expected to hold one, in addition to ourselves */
1309 VERIFY(ch->ch_refcnt >= 2);
1310 ch_release_locked(ch);
1311
1312 return err;
1313 }
1314
1315 void
nxprov_advise_disconnect(struct kern_nexus * nx,struct kern_channel * ch)1316 nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
1317 {
1318 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1319
1320 SK_LOCK_ASSERT_HELD();
1321 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1322
1323 /* check as we might be called in the error handling path */
1324 if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
1325 ch_retain_locked(ch);
1326 lck_mtx_unlock(&ch->ch_lock);
1327 SK_UNLOCK();
1328 lck_mtx_lock(&ch->ch_lock);
1329
1330 ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
1331 if (ch->ch_flags & CHANF_EXT_CONNECTED) {
1332 nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
1333 atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_CONNECTED);
1334 }
1335
1336 /*
1337 * Inform the external domain provider that the rings
1338 * and slots for this channel are no longer valid.
1339 */
1340 if (NXPROV_LLINK(nxprov)) {
1341 nx_netif_llink_ext_fini_default_queues(nx);
1342 } else {
1343 nx_fini_rings(nx, ch);
1344 }
1345
1346 ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
1347 nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
1348 atomic_bitclear_32(&ch->ch_flags, CHANF_EXT_PRECONNECT);
1349
1350 SK_D("ch 0x%llx flags %b nx 0x%llx disconnected",
1351 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1352
1353 /* We're done with this channel */
1354 ch->ch_ctx = NULL;
1355
1356 lck_mtx_unlock(&ch->ch_lock);
1357 SK_LOCK();
1358 lck_mtx_lock(&ch->ch_lock);
1359 /* caller is expected to hold one, in addition to ourselves */
1360 VERIFY(ch->ch_refcnt >= 2);
1361 ch_release_locked(ch);
1362 }
1363 ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
1364 ASSERT(ch->ch_ctx == NULL);
1365 }
1366
1367 static struct kern_nexus_provider *
nxprov_create_common(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1368 nxprov_create_common(struct nxctl *nxctl,
1369 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1370 const struct kern_nexus_provider_init *init, int *err)
1371 {
1372 struct skmem_region_params srp[SKMEM_REGIONS];
1373 struct kern_nexus_provider *nxprov = NULL;
1374 struct skmem_region_params *bsrp;
1375 struct nxprov_params nxp;
1376 uint32_t override = 0;
1377 int i;
1378
1379 _CASSERT(sizeof(*init) == sizeof(nxprov->nxprov_ext));
1380 _CASSERT(sizeof(*init) >=
1381 sizeof(struct kern_nexus_netif_provider_init));
1382
1383 SK_LOCK_ASSERT_HELD();
1384 ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
1385
1386 /* process and validate provider parameters */
1387 if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
1388 &nxp, srp, override)) != 0) {
1389 goto done;
1390 }
1391
1392 nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
1393 ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
1394
1395 STAILQ_INIT(&nxprov->nxprov_nx_head);
1396 STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
1397 nxprov->nxprov_flags |= NXPROVF_ATTACHED;
1398 nxprov->nxprov_ctl = nxctl;
1399 uuid_generate_random(nxprov->nxprov_uuid);
1400 bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
1401
1402 if (init != NULL) {
1403 if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
1404 ASSERT(NXPROV_LLINK(nxprov));
1405 bcopy(init, &nxprov->nxprov_netif_ext,
1406 sizeof(nxprov->nxprov_netif_ext));
1407 } else {
1408 ASSERT(!NXPROV_LLINK(nxprov));
1409 ASSERT(init->nxpi_version ==
1410 KERN_NEXUS_PROVIDER_CURRENT_VERSION);
1411 bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
1412 }
1413 nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
1414 }
1415
1416 /* store validated region parameters to the provider */
1417 for (i = 0; i < SKMEM_REGIONS; i++) {
1418 nxprov->nxprov_region_params[i] = srp[i];
1419 }
1420
1421 bsrp = &nxprov->nxprov_region_params[SKMEM_REGION_BUF];
1422 /*
1423 * Special handling for external nexus providers; similar
1424 * logic to what's done in kern_pbufpool_create().
1425 */
1426 if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
1427 uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
1428 /*
1429 * Set SKMEM_REGION_CR_MONOLITHIC if the provider does
1430 * not want more than a single segment for entire region.
1431 */
1432 if (nxpi_flags & NXPIF_MONOLITHIC) {
1433 bsrp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
1434 } else {
1435 bsrp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
1436 }
1437
1438 if (nxpi_flags & NXPIF_INHIBIT_CACHE) {
1439 bsrp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
1440 } else {
1441 bsrp->srp_cflags &= ~SKMEM_REGION_CR_NOCACHE;
1442 }
1443
1444 /* recalculate what's done by nxprov_params_adjust() earlier */
1445 skmem_region_params_config(bsrp);
1446
1447 if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
1448 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1449 }
1450 } else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
1451 NEXUS_TYPE_NET_IF) {
1452 /*
1453 * Treat non-netif built-in nexus providers as those
1454 * meant for inter-process communications, i.e. there
1455 * is no actual networking hardware involved.
1456 */
1457 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1458 }
1459
1460 if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
1461 struct skmem_region_params *kmd_srp =
1462 &nxprov->nxprov_region_params[SKMEM_REGION_KMD];
1463 struct skmem_region_params *umd_srp =
1464 &nxprov->nxprov_region_params[SKMEM_REGION_UMD];
1465
1466 kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
1467 umd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
1468 skmem_region_params_config(kmd_srp);
1469 skmem_region_params_config(umd_srp);
1470 }
1471
1472 nxprov_retain_locked(nxprov); /* one for being in the list */
1473 nxprov_retain_locked(nxprov); /* one for the caller */
1474
1475 #if SK_LOG
1476 uuid_string_t uuidstr;
1477 SK_D("nxprov 0x%llx UUID %s", SK_KVA(nxprov),
1478 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
1479 #endif /* SK_LOG */
1480
1481 done:
1482 return nxprov;
1483 }
1484
1485 struct kern_nexus_provider *
nxprov_create(struct proc * p,struct nxctl * nxctl,struct nxprov_reg * reg,int * err)1486 nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
1487 int *err)
1488 {
1489 struct nxprov_params *nxp = ®->nxpreg_params;
1490 struct kern_nexus_domain_provider *nxdom_prov = NULL;
1491 struct kern_nexus_provider *nxprov = NULL;
1492
1493 NXCTL_LOCK_ASSERT_HELD(nxctl);
1494
1495 ASSERT(nxctl->nxctl_cred != proc_ucred(kernproc));
1496 *err = 0;
1497
1498 switch (nxp->nxp_type) {
1499 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1500 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1501 PRIV_SKYWALK_REGISTER_USER_PIPE);
1502 break;
1503
1504 case NEXUS_TYPE_FLOW_SWITCH: /* allowed for userland */
1505 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1506 PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
1507 break;
1508
1509 case NEXUS_TYPE_NET_IF: /* allowed for userland */
1510 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1511 PRIV_SKYWALK_REGISTER_NET_IF);
1512 break;
1513
1514 case NEXUS_TYPE_KERNEL_PIPE: /* only for kernel */
1515 case NEXUS_TYPE_MONITOR: /* invalid */
1516 default:
1517 *err = EINVAL;
1518 goto done;
1519 }
1520
1521 if (*err != 0) {
1522 goto done;
1523 }
1524
1525 ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
1526 if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
1527 *err = ENXIO;
1528 goto done;
1529 }
1530
1531 #if CONFIG_NEXUS_NETIF
1532 /* make sure netif_compat is the default here */
1533 ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
1534 strcmp(nxdom_prov->nxdom_prov_name,
1535 NEXUS_PROVIDER_NET_IF_COMPAT) == 0);
1536 #endif /* CONFIG_NEXUS_NETIF */
1537
1538 SK_LOCK();
1539 /* callee holds a reference for our caller upon success */
1540 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
1541 SK_UNLOCK();
1542 done:
1543 return nxprov;
1544 }
1545
1546 struct kern_nexus_provider *
nxprov_create_kern(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1547 nxprov_create_kern(struct nxctl *nxctl,
1548 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1549 const struct kern_nexus_provider_init *init, int *err)
1550 {
1551 struct nxprov_params *nxp = ®->nxpreg_params;
1552 struct kern_nexus_provider *nxprov = NULL;
1553
1554 NXCTL_LOCK_ASSERT_HELD(nxctl);
1555 SK_LOCK_ASSERT_HELD();
1556
1557 ASSERT(nxctl->nxctl_cred == proc_ucred(kernproc));
1558 ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
1559 ASSERT(init == NULL ||
1560 init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
1561 init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
1562
1563 *err = 0;
1564
1565 switch (nxp->nxp_type) {
1566 case NEXUS_TYPE_NET_IF:
1567 break;
1568 case NEXUS_TYPE_KERNEL_PIPE:
1569 if (init == NULL) {
1570 *err = EINVAL;
1571 goto done;
1572 }
1573 break;
1574 case NEXUS_TYPE_FLOW_SWITCH:
1575 if (init != NULL) {
1576 *err = EINVAL;
1577 goto done;
1578 }
1579 break;
1580
1581 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1582 case NEXUS_TYPE_MONITOR: /* invalid */
1583 default:
1584 *err = EINVAL;
1585 goto done;
1586 }
1587
1588 /* callee holds a reference for our caller upon success */
1589 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
1590
1591 done:
1592 return nxprov;
1593 }
1594
1595 int
nxprov_destroy(struct nxctl * nxctl,const uuid_t nxprov_uuid)1596 nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
1597 {
1598 struct kern_nexus_provider *nxprov = NULL;
1599 int err = 0;
1600
1601 NXCTL_LOCK_ASSERT_HELD(nxctl);
1602
1603 SK_LOCK();
1604
1605 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1606 if (nxctl == nxprov->nxprov_ctl &&
1607 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1608 nxprov_retain_locked(nxprov);
1609 break;
1610 }
1611 }
1612
1613 if (nxprov == NULL) {
1614 err = ENOENT;
1615 } else {
1616 err = nxprov_close(nxprov, TRUE);
1617 }
1618
1619 if (nxprov != NULL) {
1620 (void) nxprov_release_locked(nxprov);
1621 }
1622
1623 SK_UNLOCK();
1624
1625 return err;
1626 }
1627
1628 int
nxprov_close(struct kern_nexus_provider * nxprov,boolean_t locked)1629 nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
1630 {
1631 int err = 0;
1632
1633 if (!locked) {
1634 SK_LOCK();
1635 }
1636
1637 SK_LOCK_ASSERT_HELD();
1638
1639 #if SK_LOG
1640 uuid_string_t uuidstr;
1641 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1642 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1643 nxprov->nxprov_flags, NXPROVF_BITS);
1644 #endif /* SK_LOG */
1645
1646 if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
1647 err = EALREADY;
1648 } else {
1649 struct kern_nexus *nx, *tnx;
1650
1651 nxprov->nxprov_ctl = NULL;
1652
1653 STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
1654 nx_prov_link, tnx) {
1655 nx_retain_locked(nx);
1656 (void) nx_close(nx, TRUE);
1657 (void) nx_release_locked(nx);
1658 }
1659
1660 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
1661 /* no nexus created on this, so detach now */
1662 nxprov_detach(nxprov, TRUE);
1663 } else {
1664 /* detach when last nexus is destroyed */
1665 ASSERT(nxprov->nxprov_refcnt > 1);
1666 nxprov->nxprov_flags |= NXPROVF_CLOSED;
1667 }
1668 }
1669
1670 if (!locked) {
1671 SK_UNLOCK();
1672 }
1673
1674 return err;
1675 }
1676
1677 static void
nxprov_detach(struct kern_nexus_provider * nxprov,boolean_t locked)1678 nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
1679 {
1680 if (!locked) {
1681 SK_LOCK();
1682 }
1683
1684 SK_LOCK_ASSERT_HELD();
1685
1686 #if SK_LOG
1687 uuid_string_t uuidstr;
1688 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1689 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1690 nxprov->nxprov_flags, NXPROVF_BITS);
1691 #endif /* SK_LOG */
1692
1693 ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
1694 STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
1695 nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
1696
1697 /* caller must hold an extra ref */
1698 ASSERT(nxprov->nxprov_refcnt > 1);
1699 (void) nxprov_release_locked(nxprov);
1700
1701 if (!locked) {
1702 SK_UNLOCK();
1703 }
1704 }
1705
1706 static struct kern_nexus_provider *
nxprov_alloc(struct kern_nexus_domain_provider * nxdom_prov,zalloc_flags_t how)1707 nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
1708 {
1709 struct kern_nexus_provider *nxprov;
1710 struct nxprov_params *nxp;
1711
1712 ASSERT(nxdom_prov != NULL);
1713
1714 nxp = nxprov_params_alloc(how);
1715 if (nxp == NULL) {
1716 SK_ERR("Failed to allocate nxprov_params");
1717 return NULL;
1718 }
1719
1720 nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
1721 if (nxprov == NULL) {
1722 SK_ERR("Failed to allocate nxprov");
1723 nxprov_params_free(nxp);
1724 return NULL;
1725 }
1726
1727 nxprov->nxprov_dom_prov = nxdom_prov;
1728 nxprov->nxprov_params = nxp;
1729 /* hold a reference for nxprov */
1730 nxdom_prov_retain_locked(nxdom_prov);
1731
1732 return nxprov;
1733 }
1734
1735 static void
nxprov_free(struct kern_nexus_provider * nxprov)1736 nxprov_free(struct kern_nexus_provider *nxprov)
1737 {
1738 struct kern_nexus_domain_provider *nxdom_prov =
1739 nxprov->nxprov_dom_prov;
1740
1741 SK_LOCK_ASSERT_HELD();
1742
1743 ASSERT(nxdom_prov != NULL);
1744 (void) nxdom_prov_release_locked(nxdom_prov);
1745 nxprov->nxprov_dom_prov = NULL;
1746 ASSERT(nxprov->nxprov_params != NULL);
1747 nxprov_params_free(nxprov->nxprov_params);
1748 nxprov->nxprov_params = NULL;
1749 ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
1750 SK_DF(SK_VERB_MEM, "nxprov 0x%llx FREE", SK_KVA(nxprov));
1751 zfree(nxprov_zone, nxprov);
1752 }
1753
1754 static void
nxprov_retain_locked(struct kern_nexus_provider * nxprov)1755 nxprov_retain_locked(struct kern_nexus_provider *nxprov)
1756 {
1757 SK_LOCK_ASSERT_HELD();
1758
1759 nxprov->nxprov_refcnt++;
1760 ASSERT(nxprov->nxprov_refcnt != 0);
1761 }
1762
1763 void
nxprov_retain(struct kern_nexus_provider * nxprov)1764 nxprov_retain(struct kern_nexus_provider *nxprov)
1765 {
1766 SK_LOCK();
1767 nxprov_retain_locked(nxprov);
1768 SK_UNLOCK();
1769 }
1770
1771 static int
nxprov_release_locked(struct kern_nexus_provider * nxprov)1772 nxprov_release_locked(struct kern_nexus_provider *nxprov)
1773 {
1774 int oldref = nxprov->nxprov_refcnt;
1775
1776 SK_LOCK_ASSERT_HELD();
1777
1778 ASSERT(nxprov->nxprov_refcnt != 0);
1779 if (--nxprov->nxprov_refcnt == 0) {
1780 nxprov_free(nxprov);
1781 }
1782
1783 return oldref == 1;
1784 }
1785
1786 int
nxprov_release(struct kern_nexus_provider * nxprov)1787 nxprov_release(struct kern_nexus_provider *nxprov)
1788 {
1789 int lastref;
1790
1791 SK_LOCK();
1792 lastref = nxprov_release_locked(nxprov);
1793 SK_UNLOCK();
1794
1795 return lastref;
1796 }
1797
1798 struct nxprov_params *
nxprov_params_alloc(zalloc_flags_t how)1799 nxprov_params_alloc(zalloc_flags_t how)
1800 {
1801 return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
1802 }
1803
1804 void
nxprov_params_free(struct nxprov_params * nxp)1805 nxprov_params_free(struct nxprov_params *nxp)
1806 {
1807 SK_DF(SK_VERB_MEM, "nxp 0x%llx FREE", SK_KVA(nxp));
1808 zfree(nxprov_params_zone, nxp);
1809 }
1810
1811 static int
nx_check_pp(struct kern_nexus_provider * nxprov,struct kern_pbufpool * pp)1812 nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
1813 {
1814 struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
1815
1816 if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
1817 SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
1818 return ENOTSUP;
1819 }
1820
1821 /*
1822 * Require that the nexus domain metadata type and the
1823 * metadata type of the caller-provided pbufpool match.
1824 */
1825 if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
1826 pp->pp_md_type ||
1827 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
1828 pp->pp_md_subtype) {
1829 SK_ERR("Mismatch in metadata type/subtype "
1830 "(%u/%u != %u/%u)", pp->pp_md_type,
1831 nxdom_prov->nxdom_prov_dom->nxdom_md_type,
1832 pp->pp_md_subtype,
1833 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
1834 return EINVAL;
1835 }
1836
1837 /*
1838 * Require that the nexus provider memory configuration
1839 * has the same impedance as the caller-provided one.
1840 * Both need to be lacking or present; if one of them
1841 * is set and the other isn't, then we bail.
1842 */
1843 if (!!(pp->pp_buf_region->skr_mode & SKR_MODE_MONOLITHIC) ^
1844 !!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
1845 SK_ERR("Memory config mismatch: monolithic mode");
1846 return EINVAL;
1847 }
1848
1849 return 0;
1850 }
1851
1852 struct kern_nexus *
nx_create(struct nxctl * nxctl,const uuid_t nxprov_uuid,const nexus_type_t dom_type,const void * nx_ctx,nexus_ctx_release_fn_t nx_ctx_release,struct kern_pbufpool * tx_pp,struct kern_pbufpool * rx_pp,int * err)1853 nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
1854 const nexus_type_t dom_type, const void *nx_ctx,
1855 nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
1856 struct kern_pbufpool *rx_pp, int *err)
1857 {
1858 struct kern_nexus_domain_provider *nxdom_prov;
1859 struct kern_nexus_provider *nxprov = NULL;
1860 struct kern_nexus *nx = NULL;
1861 #if SK_LOG
1862 uuid_string_t uuidstr;
1863 #endif /* SK_LOG */
1864
1865 NXCTL_LOCK_ASSERT_HELD(nxctl);
1866
1867 ASSERT(dom_type < NEXUS_TYPE_MAX);
1868 ASSERT(!uuid_is_null(nxprov_uuid));
1869 *err = 0;
1870
1871 SK_LOCK();
1872
1873 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1874 if (nxctl == nxprov->nxprov_ctl &&
1875 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1876 break;
1877 }
1878 }
1879
1880 if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
1881 SK_ERR("Provider not found or has been closed");
1882 *err = ENOENT;
1883 goto done;
1884 }
1885
1886 nxdom_prov = nxprov->nxprov_dom_prov;
1887 if (dom_type != NEXUS_TYPE_UNDEFINED &&
1888 (nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
1889 SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
1890 dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
1891 nxdom_prov = NULL;
1892 nxprov = NULL;
1893 *err = ENODEV;
1894 goto done;
1895 }
1896
1897 if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
1898 (!tx_pp || !rx_pp)) {
1899 #if SK_LOG
1900 SK_ERR("TX/RX packet pool is required for netif logical link "
1901 "nexus provider UUID: %s",
1902 sk_uuid_unparse(nxprov_uuid, uuidstr));
1903 #endif /* SK_LOG */
1904 nxdom_prov = NULL;
1905 nxprov = NULL;
1906 *err = EINVAL;
1907 goto done;
1908 }
1909
1910 if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
1911 (rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
1912 goto done;
1913 }
1914
1915 nx = nx_alloc(Z_WAITOK);
1916
1917 STAILQ_INIT(&nx->nx_ch_head);
1918 STAILQ_INIT(&nx->nx_ch_nonxref_head);
1919 lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
1920 &nexus_lock_attr);
1921 STAILQ_INIT(&nx->nx_ch_if_adv_head);
1922 uuid_generate_random(nx->nx_uuid);
1923 nx->nx_prov = nxprov;
1924 nx->nx_ctx = (void *)(uintptr_t)nx_ctx;
1925 nx->nx_ctx_release = nx_ctx_release;
1926 nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
1927
1928 if (tx_pp != NULL) {
1929 nx->nx_tx_pp = tx_pp;
1930 pp_retain(tx_pp); /* released by nx_free */
1931 }
1932
1933 if (rx_pp != NULL) {
1934 nx->nx_rx_pp = rx_pp;
1935 pp_retain(rx_pp); /* released by nx_free */
1936 }
1937
1938 /* this nexus is alive; tell the nexus constructor to set it up */
1939 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
1940 *err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
1941 if (*err != 0) {
1942 nx->nx_prov = NULL;
1943 goto done;
1944 }
1945 }
1946
1947 nxprov_retain_locked(nxprov); /* hold a ref on the nexus reg */
1948
1949 STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
1950 nxprov->nxprov_nx_count++;
1951 RB_INSERT(kern_nexus_tree, &nx_head, nx);
1952 atomic_bitset_32(&nx->nx_flags, NXF_ATTACHED);
1953
1954 nx_retain_locked(nx); /* one for the provider list */
1955 nx_retain_locked(nx); /* one for the global list */
1956 nx_retain_locked(nx); /* one for the caller */
1957
1958 #if SK_LOG
1959 SK_D("nexus 0x%llx (%s:%s) UUID %s", SK_KVA(nx),
1960 nxdom_prov->nxdom_prov_dom->nxdom_name,
1961 nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
1962 #endif /* SK_LOG */
1963 done:
1964 SK_UNLOCK();
1965
1966 if (*err != 0) {
1967 if (nx != NULL) {
1968 nx_free(nx);
1969 nx = NULL;
1970 }
1971 }
1972 return nx;
1973 }
1974
1975 int
nx_destroy(struct nxctl * nxctl,const uuid_t nx_uuid)1976 nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
1977 {
1978 struct kern_nexus *nx = NULL;
1979 struct kern_nexus find;
1980 int err = 0;
1981
1982 NXCTL_LOCK_ASSERT_HELD(nxctl);
1983
1984 SK_LOCK();
1985
1986 uuid_copy(find.nx_uuid, nx_uuid);
1987 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1988 if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
1989 nx = NULL;
1990 }
1991
1992 if (nx != NULL) {
1993 nx_retain_locked(nx);
1994 }
1995
1996 if (nx == NULL) {
1997 err = ENOENT;
1998 } else {
1999 err = nx_close(nx, TRUE);
2000 (void) nx_release_locked(nx);
2001 }
2002
2003 SK_UNLOCK();
2004
2005 return err;
2006 }
2007
2008 static inline int
nx_cmp(const struct kern_nexus * a,const struct kern_nexus * b)2009 nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
2010 {
2011 return uuid_compare(a->nx_uuid, b->nx_uuid);
2012 }
2013
2014 struct kern_nexus *
nx_find(const uuid_t nx_uuid,boolean_t locked)2015 nx_find(const uuid_t nx_uuid, boolean_t locked)
2016 {
2017 struct kern_nexus *nx = NULL;
2018 struct kern_nexus find;
2019
2020 if (!locked) {
2021 SK_LOCK();
2022 }
2023
2024 SK_LOCK_ASSERT_HELD();
2025
2026 uuid_copy(find.nx_uuid, nx_uuid);
2027 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
2028 if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
2029 nx = NULL;
2030 }
2031
2032 /* return reference to caller */
2033 if (nx != NULL) {
2034 nx_retain_locked(nx);
2035 }
2036
2037 if (!locked) {
2038 SK_UNLOCK();
2039 }
2040
2041 return nx;
2042 }
2043
2044 int
nx_close(struct kern_nexus * nx,boolean_t locked)2045 nx_close(struct kern_nexus *nx, boolean_t locked)
2046 {
2047 int err = 0;
2048
2049 if (!locked) {
2050 SK_LOCK();
2051 }
2052
2053 SK_LOCK_ASSERT_HELD();
2054
2055
2056 if (nx->nx_flags & NXF_CLOSED) {
2057 err = EALREADY;
2058 } else {
2059 #if SK_LOG
2060 uuid_string_t uuidstr;
2061 SK_D("nexus 0x%llx (%s:%s) UUID %s flags 0x%b", SK_KVA(nx),
2062 NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
2063 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags,
2064 NXF_BITS);
2065 #endif /* SK_LOG */
2066
2067 if (STAILQ_EMPTY(&nx->nx_ch_head)) {
2068 /* no regular channels open to it, so detach now */
2069 nx_detach(nx);
2070 } else {
2071 /* detach when the last channel closes */
2072 ASSERT(nx->nx_refcnt > 3);
2073 atomic_bitset_32(&nx->nx_flags, NXF_CLOSED);
2074 }
2075 }
2076
2077 if (!locked) {
2078 SK_UNLOCK();
2079 }
2080
2081 return err;
2082 }
2083
2084 void
nx_stop(struct kern_nexus * nx)2085 nx_stop(struct kern_nexus *nx)
2086 {
2087 struct kern_nexus_provider *nxprov = nx->nx_prov;
2088
2089 SK_LOCK_ASSERT_HELD();
2090
2091 /* send a stop message */
2092 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
2093 nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
2094 }
2095 }
2096
2097 void
nx_detach(struct kern_nexus * nx)2098 nx_detach(struct kern_nexus *nx)
2099 {
2100 struct kern_nexus_provider *nxprov = nx->nx_prov;
2101
2102 SK_LOCK_ASSERT_HELD();
2103
2104 #if SK_LOG
2105 uuid_string_t uuidstr;
2106 SK_D("nexus 0x%llx UUID %s flags 0x%b", SK_KVA(nx),
2107 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags, NXF_BITS);
2108 #endif /* SK_LOG */
2109
2110 /* Caller must hold extra refs, on top of the two in reg/global lists */
2111 ASSERT(nx->nx_refcnt >= 3);
2112 ASSERT(nx->nx_flags & NXF_ATTACHED);
2113
2114 /* this nexus is done; let the nexus destructor do final cleanups */
2115 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
2116 nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
2117 }
2118
2119 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2120 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2121
2122 STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
2123 nxprov->nxprov_nx_count--;
2124 RB_REMOVE(kern_nexus_tree, &nx_head, nx);
2125 atomic_bitclear_32(&nx->nx_flags, NXF_ATTACHED);
2126 nx->nx_prov = NULL;
2127 if (nx->nx_ctx_release != NULL) {
2128 nx->nx_ctx_release(nx->nx_ctx);
2129 }
2130 nx->nx_ctx = NULL;
2131
2132 (void) nx_release_locked(nx); /* one for the reg list */
2133 (void) nx_release_locked(nx); /* one for the global list */
2134
2135 /*
2136 * If this was the last nexus and the provider has been closed,
2137 * detach the provider and and finish up the postponed job.
2138 */
2139 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
2140 (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
2141 nxprov_detach(nxprov, TRUE);
2142 }
2143 (void) nxprov_release_locked(nxprov);
2144 }
2145
2146 int
nx_advisory_alloc(struct kern_nexus * nx,const char * name,struct skmem_region_params * srp_nexusadv,nexus_advisory_type_t type)2147 nx_advisory_alloc(struct kern_nexus *nx, const char *name,
2148 struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
2149 {
2150 struct __kern_nexus_adv_metadata *adv_md;
2151
2152 _CASSERT(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
2153 _CASSERT((sizeof(struct sk_nexusadv) +
2154 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2155 _CASSERT((sizeof(struct netif_nexus_advisory) +
2156 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2157 ASSERT(nx->nx_adv.nxv_reg == NULL);
2158 ASSERT(nx->nx_adv.nxv_adv == NULL);
2159 ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
2160 type == NEXUS_ADVISORY_TYPE_NETIF);
2161
2162 if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
2163 NULL, NULL, NULL)) == NULL) {
2164 return ENOMEM;
2165 }
2166
2167 nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, NULL,
2168 NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC));
2169 adv_md = nx->nx_adv.nxv_adv;
2170 adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
2171 adv_md->knam_type = type;
2172 adv_md->__reserved = 0;
2173 nx->nx_adv.nxv_adv_type = type;
2174 nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
2175 if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
2176 nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
2177 NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
2178 } else {
2179 nx->nx_adv.netif_nxv_adv->nna_version =
2180 NX_NETIF_ADVISORY_CURRENT_VERSION;
2181 }
2182 return 0;
2183 }
2184
2185 void
nx_advisory_free(struct kern_nexus * nx)2186 nx_advisory_free(struct kern_nexus *nx)
2187 {
2188 if (nx->nx_adv.nxv_reg != NULL) {
2189 ASSERT(nx->nx_adv.nxv_adv != NULL);
2190 skmem_region_free(nx->nx_adv.nxv_reg,
2191 nx->nx_adv.nxv_adv, NULL);
2192 nx->nx_adv.nxv_adv = NULL;
2193 nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
2194 nx->nx_adv.flowswitch_nxv_adv = NULL;
2195 skmem_region_release(nx->nx_adv.nxv_reg);
2196 nx->nx_adv.nxv_reg = NULL;
2197 }
2198
2199 ASSERT(nx->nx_adv.nxv_reg == NULL);
2200 ASSERT(nx->nx_adv.nxv_adv == NULL);
2201 ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
2202 ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
2203 }
2204
2205 static struct kern_nexus *
nx_alloc(zalloc_flags_t how)2206 nx_alloc(zalloc_flags_t how)
2207 {
2208 SK_LOCK_ASSERT_HELD();
2209
2210 return zalloc_flags(nx_zone, how | Z_ZERO);
2211 }
2212
2213 static void
nx_free(struct kern_nexus * nx)2214 nx_free(struct kern_nexus *nx)
2215 {
2216 ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
2217 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2218 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2219
2220 nx_port_free_all(nx);
2221
2222 if (nx->nx_tx_pp != NULL) {
2223 pp_release(nx->nx_tx_pp);
2224 nx->nx_tx_pp = NULL;
2225 }
2226 if (nx->nx_rx_pp != NULL) {
2227 pp_release(nx->nx_rx_pp);
2228 nx->nx_rx_pp = NULL;
2229 }
2230
2231 ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
2232 lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
2233
2234 SK_DF(SK_VERB_MEM, "nexus 0x%llx FREE", SK_KVA(nx));
2235 zfree(nx_zone, nx);
2236 }
2237
2238 void
nx_retain_locked(struct kern_nexus * nx)2239 nx_retain_locked(struct kern_nexus *nx)
2240 {
2241 SK_LOCK_ASSERT_HELD();
2242
2243 nx->nx_refcnt++;
2244 VERIFY(nx->nx_refcnt > 0);
2245 }
2246
2247 void
nx_retain(struct kern_nexus * nx)2248 nx_retain(struct kern_nexus *nx)
2249 {
2250 SK_LOCK();
2251 nx_retain_locked(nx);
2252 SK_UNLOCK();
2253 }
2254
2255 int
nx_release_locked(struct kern_nexus * nx)2256 nx_release_locked(struct kern_nexus *nx)
2257 {
2258 int oldref = nx->nx_refcnt;
2259
2260 SK_LOCK_ASSERT_HELD();
2261
2262 VERIFY(nx->nx_refcnt > 0);
2263 if (--nx->nx_refcnt == 0) {
2264 nx_free(nx);
2265 }
2266
2267 return oldref == 1;
2268 }
2269
2270 int
nx_release(struct kern_nexus * nx)2271 nx_release(struct kern_nexus *nx)
2272 {
2273 int lastref;
2274
2275 SK_LOCK_ASSERT_NOTHELD();
2276
2277 SK_LOCK();
2278 lastref = nx_release_locked(nx);
2279 SK_UNLOCK();
2280
2281 return lastref;
2282 }
2283
2284 static int
nx_init_rings(struct kern_nexus * nx,struct kern_channel * ch)2285 nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
2286 {
2287 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2288 struct nexus_adapter *na = ch->ch_na;
2289 boolean_t undo = FALSE;
2290 int ksd_retains = 0;
2291 enum txrx t;
2292 int err = 0;
2293
2294 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
2295 CHANF_EXT_PRECONNECT);
2296
2297 if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
2298 return 0;
2299 }
2300
2301 for_rx_tx(t) {
2302 uint32_t i;
2303
2304 for (i = 0; i < na_get_nrings(na, t); i++) {
2305 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2306
2307 /* skip host rings */
2308 if (kring->ckr_flags & CKRF_HOST) {
2309 continue;
2310 }
2311
2312 if ((err = nxprov->nxprov_ext.nxpi_ring_init(
2313 nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
2314 &kring->ckr_ctx)) != 0) {
2315 SK_D("ch 0x%llx flags %b nx 0x%llx kr \"%s\" "
2316 "(0x%llx) krflags %b ring_init error %d",
2317 SK_KVA(ch), ch->ch_flags, CHANF_BITS,
2318 SK_KVA(nx), kring->ckr_name, SK_KVA(kring),
2319 kring->ckr_flags, CKRF_BITS, err);
2320 kring->ckr_ctx = NULL;
2321 undo = TRUE;
2322 break;
2323 }
2324 kring->ckr_flags |= CKRF_EXT_RING_INITED;
2325
2326 if ((err = nx_init_slots(nx, kring)) != 0) {
2327 undo = TRUE;
2328 break;
2329 }
2330
2331 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2332 ++ksd_retains;
2333 }
2334 }
2335 if (undo) {
2336 break;
2337 }
2338 }
2339
2340 /*
2341 * Note: retain KSD even in case of error, as we have set
2342 * CKRF_EXT_SLOTS_INITED flag for some of the rings
2343 * nx_fini_rings would take care of release based on it.
2344 */
2345 if (ksd_retains != 0) {
2346 /*
2347 * Mark the kernel slot descriptor region as busy; this
2348 * prevents it from being torn-down at channel defunct
2349 * time, as we need to invoke the slot_fini() callback
2350 * for each slot and we need the descriptors until then.
2351 */
2352 skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
2353 ksd_retains);
2354 }
2355
2356 if (err != 0) {
2357 ASSERT(undo);
2358 nx_fini_rings(nx, ch);
2359 }
2360
2361 return err;
2362 }
2363
2364 static void
nx_fini_rings(struct kern_nexus * nx,struct kern_channel * ch)2365 nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
2366 {
2367 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2368 struct nexus_adapter *na = ch->ch_na;
2369 int ksd_releases = 0;
2370 enum txrx t;
2371
2372 for_rx_tx(t) {
2373 uint32_t i;
2374
2375 for (i = 0; i < na_get_nrings(na, t); i++) {
2376 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2377
2378 if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
2379 continue;
2380 }
2381
2382 ASSERT(!(kring->ckr_flags & CKRF_HOST));
2383 ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
2384 nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
2385 kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
2386
2387 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2388 ++ksd_releases;
2389 }
2390
2391 /*
2392 * Undo the work done in nx_init_slots() and inform
2393 * the external domain provider, if applicable, that
2394 * the slots for this ring are no longer valid.
2395 */
2396 nx_fini_slots(nx, kring);
2397 kring->ckr_ctx = NULL;
2398 }
2399 }
2400
2401 if (ksd_releases != 0) {
2402 /*
2403 * Now that we've finished invoking the slot_fini()
2404 * callbacks, release the busy retain counts held
2405 * earlier in nx_init_rings(). This will allow the
2406 * kernel slot descriptor region to be torn down.
2407 */
2408 skmem_arena_nexus_sd_set_noidle(
2409 skmem_arena_nexus(na->na_arena), -ksd_releases);
2410 }
2411 }
2412
2413 static int
nx_init_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2414 nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2415 {
2416 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2417 struct __slot_desc *slot = kring->ckr_ksds;
2418 int err = 0;
2419 uint32_t i;
2420
2421 /*
2422 * If the slot init callback was not provided, or if the
2423 * kring was not created to hold any slot contexts, don't
2424 * go any further.
2425 */
2426 if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
2427 kring->ckr_slot_ctxs == NULL) {
2428 return 0;
2429 }
2430
2431 ASSERT(kring->ckr_slot_ctxs_set == 0);
2432 ASSERT(slot != NULL);
2433
2434 for (i = 0; i < kring->ckr_num_slots; i++) {
2435 struct kern_slot_prop *slot_ctx_prop = NULL;
2436 void *slot_ctx_arg = NULL;
2437
2438 ASSERT(&slot[i] <= kring->ckr_ksds_last);
2439 if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
2440 &slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
2441 SK_D("nx 0x%llx kr \"%s\" (0x%llx) krflags %b slot %u "
2442 "slot_init error %d", SK_KVA(nx), kring->ckr_name,
2443 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, i, err);
2444 break;
2445 }
2446 /* we don't want this to be used by client, so verify here */
2447 ASSERT(slot_ctx_prop == NULL);
2448 kring->ckr_slot_ctxs[i].slot_ctx_arg =
2449 (mach_vm_address_t)slot_ctx_arg;
2450 kring->ckr_slot_ctxs_set++;
2451 }
2452
2453 if (err != 0) {
2454 nx_fini_slots(nx, kring);
2455 } else {
2456 kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
2457 }
2458
2459 return err;
2460 }
2461
2462 static void
nx_fini_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2463 nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2464 {
2465 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2466 struct __slot_desc *slot = kring->ckr_ksds;
2467 uint32_t i;
2468
2469 ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
2470 nxprov->nxprov_ext.nxpi_slot_fini != NULL);
2471 ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
2472
2473 for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
2474 ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
2475 if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
2476 nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
2477 kring, &slot[i], i);
2478 }
2479 if (kring->ckr_slot_ctxs != NULL) {
2480 kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
2481 }
2482 }
2483 kring->ckr_slot_ctxs_set = 0;
2484
2485 /* We're done with this kring */
2486 kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
2487 }
2488
2489
2490 /* 64-bit mask with range */
2491 #define BMASK64(_beg, _end) \
2492 ((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
2493
2494 int
nx_port_find(struct kern_nexus * nx,nexus_port_t first,nexus_port_t last,nexus_port_t * nx_port)2495 nx_port_find(struct kern_nexus *nx, nexus_port_t first,
2496 nexus_port_t last, nexus_port_t *nx_port)
2497 {
2498 int err = 0;
2499
2500 ASSERT(first < last);
2501 *nx_port = NEXUS_PORT_ANY;
2502
2503 if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
2504 /*
2505 * Left edge of the range is beyond the current map;
2506 * let nx_port_alloc() handle the growing later.
2507 */
2508 *nx_port = first;
2509 } else {
2510 uint32_t fc = (first / NX_PORT_CHUNK);
2511 uint32_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
2512 uint32_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
2513 uint32_t i, j;
2514 bitmap_t *bmap;
2515
2516 /*
2517 * The right edge of the range is either within or
2518 * beyond the current map; scan thru the current
2519 * map and find the first available port.
2520 */
2521 for (i = fc; i <= lc; i++) {
2522 bitmap_t mask;
2523 uint32_t beg = 0, end = 63;
2524
2525 if (i == fc) {
2526 beg = (first % NX_PORT_CHUNK);
2527 }
2528 if (i == (last / NX_PORT_CHUNK)) {
2529 end = (last % NX_PORT_CHUNK);
2530 }
2531
2532 if (i < lim) {
2533 bmap = &nx->nx_ports_bmap[i];
2534 mask = BMASK64(beg, end);
2535
2536 j = ffsll((*bmap) & mask);
2537 if (j == 0) {
2538 continue;
2539 }
2540
2541 --j;
2542 *nx_port = (i * NX_PORT_CHUNK) + j;
2543 }
2544 break;
2545 }
2546
2547 /*
2548 * If the requested range is within the current map and we
2549 * couldn't find a port, return an err. Otherwise, return
2550 * the next port index to trigger growing later.
2551 */
2552 if (*nx_port == NEXUS_PORT_ANY) {
2553 if (lc == (last / NX_PORT_CHUNK)) {
2554 err = EBUSY;
2555 SK_ERR("port unavail in [%u, %u)", first, last);
2556 } else {
2557 *nx_port = nx->nx_num_ports;
2558 }
2559 }
2560 }
2561
2562 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d (err %d)", SK_KVA(nx),
2563 (int)*nx_port, err);
2564
2565 return err;
2566 }
2567
2568 static int
nx_port_grow(struct kern_nexus * nx,uint32_t grow)2569 nx_port_grow(struct kern_nexus *nx, uint32_t grow)
2570 {
2571 nexus_port_t dom_port_max = NXDOM_MAX(NX_DOM(nx), ports);
2572 struct nx_port_info *ports;
2573 size_t limit;
2574 uint32_t i, num_ports, old_num_ports;
2575 bitmap_t *bmap;
2576
2577 ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
2578 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2579 _CASSERT((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
2580 ASSERT(powerof2(dom_port_max));
2581 ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
2582
2583 old_num_ports = nx->nx_num_ports;
2584 num_ports = nx->nx_num_ports + grow;
2585 limit = P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
2586 if (num_ports > limit) {
2587 SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
2588 nx->nx_num_ports, grow, num_ports, limit);
2589 return EDOM;
2590 }
2591
2592 if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
2593 (old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2594 (num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2595 Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2596 SK_ERR("bmap alloc failed, num_port %u", num_ports);
2597 return ENOMEM;
2598 }
2599 nx->nx_ports_bmap = bmap;
2600
2601 if ((ports = sk_realloc_data(nx->nx_ports, old_num_ports * sizeof(*ports),
2602 num_ports * sizeof(*ports), Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2603 /* can't free bmap here, otherwise nexus won't work */
2604 SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
2605 return ENOMEM;
2606 }
2607
2608 /* initialize the additional new ports */
2609 bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
2610 nx->nx_ports = ports;
2611
2612 /* initialize new bitmaps (set all bits) */
2613 for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
2614 i < (num_ports / NX_PORT_CHUNK); i++) {
2615 bmap[i] = NX_PORT_CHUNK_FREE;
2616 }
2617
2618 nx->nx_num_ports = num_ports;
2619
2620 SK_DF(SK_VERB_NXPORT, "!!! nx 0x%llx ports %u/%u, %u ports added",
2621 SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
2622
2623 return 0;
2624 }
2625
2626 int
nx_port_alloc(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb,struct nexus_adapter ** na,struct proc * p)2627 nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
2628 struct nexus_adapter **na, struct proc *p)
2629 {
2630 struct nx_port_info *npi = NULL;
2631 struct nxbind *nxb0;
2632 size_t g;
2633 uint32_t i, j;
2634 bitmap_t *bmap;
2635 bool refonly = false;
2636 int err = 0;
2637
2638 ASSERT(nx_port != NEXUS_PORT_ANY);
2639 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2640
2641 /* port is zero-based, so adjust here */
2642 if ((nx_port + 1) > nx->nx_num_ports) {
2643 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2644 VERIFY(g <= UINT32_MAX);
2645 if ((err = nx_port_grow(nx, (uint32_t)g)) != 0) {
2646 goto done;
2647 }
2648 }
2649 ASSERT(err == 0);
2650 ASSERT(nx_port < nx->nx_num_ports);
2651 npi = &nx->nx_ports[nx_port];
2652 nxb0 = npi->npi_nxb;
2653 i = nx_port / NX_PORT_CHUNK;
2654 j = nx_port % NX_PORT_CHUNK;
2655 bmap = &nx->nx_ports_bmap[i];
2656
2657 if (bit_test(*bmap, j)) {
2658 /* port is not (yet) bound or allocated */
2659 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2660 if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
2661 /*
2662 * If the port allocation is requested by userland
2663 * and the nexus is non-anonymous, then fail the
2664 * request.
2665 */
2666 err = EACCES;
2667 SK_ERR("user proc alloc on named nexus needs binding");
2668 } else if (na != NULL && *na != NULL) {
2669 /*
2670 * Otherwise claim it (clear bit) if the caller
2671 * supplied an adapter for this port; else, it
2672 * is just an existential check and so there's
2673 * no action needed at this point (we'll skip
2674 * the init below since vpna is NULL).
2675 */
2676 bit_clear(*bmap, j);
2677 }
2678 } else {
2679 /* if port is bound, check if credentials match */
2680 if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
2681 (nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
2682 SK_ERR("nexus binding mismatch");
2683 err = EACCES;
2684 } else {
2685 /*
2686 * If port is already occupied by an adapter,
2687 * see if the client is requesting a reference
2688 * to it; if so, return the adapter. Otherwise,
2689 * if unoccupied and vpna is non-NULL, associate
2690 * it with this nexus port via the below init.
2691 */
2692 if (NPI_NA(npi) != NULL) {
2693 if (na != NULL && *na == NULL) {
2694 *na = NPI_NA(npi);
2695 na_retain_locked(*na);
2696 /* skip the init below */
2697 refonly = true;
2698 } else {
2699 /*
2700 * If the client supplied an adapter
2701 * (regardless of its value) for a
2702 * nexus port that's already occupied,
2703 * then we fail the request.
2704 */
2705 SK_ERR("nexus adapted exits");
2706 err = EEXIST;
2707 }
2708 }
2709 }
2710 }
2711
2712 done:
2713 /* initialize the nexus port and the adapter occupying it */
2714 if (err == 0 && na != NULL && *na != NULL && !refonly) {
2715 ASSERT(nx_port < nx->nx_num_ports);
2716 ASSERT(npi->npi_nah == 0);
2717 ASSERT(nx->nx_active_ports < nx->nx_num_ports);
2718 ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
2719 (nx_port % NX_PORT_CHUNK)));
2720
2721 nx->nx_active_ports++;
2722 npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
2723 (*na)->na_nx_port = nx_port;
2724 }
2725
2726 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d, ports %u/%u (err %d)",
2727 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
2728 err);
2729
2730 return err;
2731 }
2732
2733 void
nx_port_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2734 nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2735 {
2736 struct nx_port_info *npi = &nx->nx_ports[nx_port];
2737
2738 npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
2739 NEXUS_PORT_STATE_DEFUNCT);
2740 }
2741
2742 void
nx_port_free(struct kern_nexus * nx,nexus_port_t nx_port)2743 nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
2744 {
2745 struct nx_port_info *npi = NULL;
2746 bitmap_t *bmap;
2747 uint32_t i, j;
2748
2749 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2750 ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
2751 ASSERT(nx->nx_active_ports != 0);
2752
2753 i = nx_port / NX_PORT_CHUNK;
2754 j = nx_port % NX_PORT_CHUNK;
2755 bmap = &nx->nx_ports_bmap[i];
2756 ASSERT(!bit_test(*bmap, j));
2757
2758 npi = &nx->nx_ports[nx_port];
2759 npi->npi_nah = 0;
2760 if (npi->npi_nxb == NULL) {
2761 /* it's vacant, release it (set bit) */
2762 bit_set(*bmap, j);
2763 }
2764
2765 nx->nx_active_ports--;
2766
2767 //XXX [email protected] --- try to shrink bitmap & nx_ports ???
2768
2769 SK_DF(SK_VERB_NXPORT, "--- nx 0x%llx nx_port %d, ports %u/%u",
2770 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
2771 }
2772
2773 int
nx_port_bind_info(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0,void * info)2774 nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
2775 struct nxbind *nxb0, void *info)
2776 {
2777 struct nx_port_info *npi = NULL;
2778 size_t g;
2779 uint32_t i, j;
2780 bitmap_t *bmap;
2781 int err = 0;
2782
2783 ASSERT(nx_port != NEXUS_PORT_ANY);
2784 ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
2785 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2786 ASSERT(nxb0 != NULL);
2787
2788 if ((nx_port) + 1 > nx->nx_num_ports) {
2789 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2790 VERIFY(g <= UINT32_MAX);
2791 if ((err = nx_port_grow(nx, (uint32_t)g)) != 0) {
2792 goto done;
2793 }
2794 }
2795 ASSERT(err == 0);
2796
2797 npi = &nx->nx_ports[nx_port];
2798 i = nx_port / NX_PORT_CHUNK;
2799 j = nx_port % NX_PORT_CHUNK;
2800 bmap = &nx->nx_ports_bmap[i];
2801 if (bit_test(*bmap, j)) {
2802 /* port is not (yet) bound or allocated */
2803 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2804
2805 bit_clear(*bmap, j);
2806 struct nxbind *nxb = nxb_alloc(Z_WAITOK);
2807 nxb_move(nxb0, nxb);
2808 npi->npi_nxb = nxb;
2809 npi->npi_info = info;
2810 /* claim it (clear bit) */
2811 bit_clear(*bmap, j);
2812 ASSERT(err == 0);
2813 } else {
2814 /* port is already taken */
2815 ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
2816 err = EEXIST;
2817 }
2818 done:
2819
2820 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2821 "+++ nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2822 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2823
2824 return err;
2825 }
2826
2827 int
nx_port_bind(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0)2828 nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
2829 {
2830 return nx_port_bind_info(nx, nx_port, nxb0, NULL);
2831 }
2832
2833 static int
nx_port_info_size(void * info,size_t * sz)2834 nx_port_info_size(void *info, size_t *sz)
2835 {
2836 struct nx_port_info_header *hdr = info;
2837
2838 switch (hdr->ih_type) {
2839 case NX_PORT_INFO_TYPE_NETIF:
2840 break;
2841 default:
2842 return EINVAL;
2843 }
2844 *sz = hdr->ih_size;
2845 return 0;
2846 }
2847
2848 int
nx_port_unbind(struct kern_nexus * nx,nexus_port_t nx_port)2849 nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
2850 {
2851 struct nx_port_info *npi = NULL;
2852 struct nxbind *nxb;
2853 uint32_t i, j;
2854 bitmap_t *bmap;
2855 int err = 0;
2856
2857 ASSERT(nx_port != NEXUS_PORT_ANY);
2858
2859 if (nx_port >= nx->nx_num_ports) {
2860 err = EDOM;
2861 goto done;
2862 }
2863
2864 npi = &nx->nx_ports[nx_port];
2865 i = nx_port / NX_PORT_CHUNK;
2866 j = nx_port % NX_PORT_CHUNK;
2867 bmap = &nx->nx_ports_bmap[i];
2868
2869 if ((nxb = npi->npi_nxb) == NULL) {
2870 /* must be either free or allocated */
2871 ASSERT(NPI_NA(npi) == NULL ||
2872 (!bit_test(*bmap, j) && nx->nx_active_ports > 0));
2873 err = ENOENT;
2874 } else {
2875 nxb_free(nxb);
2876 npi->npi_nxb = NULL;
2877 if (npi->npi_info != NULL) {
2878 size_t sz;
2879
2880 VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
2881 sk_free_data(npi->npi_info, sz);
2882 npi->npi_info = NULL;
2883 }
2884 ASSERT(!bit_test(*bmap, j));
2885 if (NPI_NA(npi) == NULL) {
2886 /* it's vacant, release it (set bit) */
2887 bit_set(*bmap, j);
2888 }
2889 }
2890
2891 done:
2892 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2893 "--- nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2894 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2895
2896 return err;
2897 }
2898
2899 struct nexus_adapter *
nx_port_get_na(struct kern_nexus * nx,nexus_port_t nx_port)2900 nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
2901 {
2902 if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
2903 return NPI_NA(&nx->nx_ports[nx_port]);
2904 } else {
2905 return NULL;
2906 }
2907 }
2908
2909 int
nx_port_get_info(struct kern_nexus * nx,nexus_port_t port,nx_port_info_type_t type,void * info,uint32_t len)2910 nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
2911 nx_port_info_type_t type, void *info, uint32_t len)
2912 {
2913 struct nx_port_info *npi;
2914 struct nx_port_info_header *hdr;
2915
2916 if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
2917 return ENXIO;
2918 }
2919 npi = &nx->nx_ports[port];
2920 hdr = npi->npi_info;
2921 if (hdr == NULL) {
2922 return ENOENT;
2923 }
2924
2925 if (hdr->ih_type != type) {
2926 return EINVAL;
2927 }
2928
2929 bcopy(npi->npi_info, info, len);
2930 return 0;
2931 }
2932
2933 bool
nx_port_is_valid(struct kern_nexus * nx,nexus_port_t nx_port)2934 nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
2935 {
2936 return nx_port < nx->nx_num_ports;
2937 }
2938
2939 bool
nx_port_is_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2940 nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2941 {
2942 ASSERT(nx_port_is_valid(nx, nx_port));
2943
2944 return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
2945 }
2946
2947 void
nx_port_free_all(struct kern_nexus * nx)2948 nx_port_free_all(struct kern_nexus *nx)
2949 {
2950 uint32_t num_ports;
2951
2952 /* uncrustify doesn't handle C blocks properly */
2953 /* BEGIN IGNORE CODESTYLE */
2954 nx_port_foreach(nx, ^(nexus_port_t p) {
2955 struct nxbind *nxb;
2956 void *info;
2957 nxb = nx->nx_ports[p].npi_nxb;
2958 info = nx->nx_ports[p].npi_info;
2959 if (nxb != NULL) {
2960 nxb_free(nxb);
2961 nx->nx_ports[p].npi_nxb = NULL;
2962 }
2963 if (info != NULL) {
2964 size_t sz;
2965
2966 VERIFY(nx_port_info_size(info, &sz) == 0);
2967 skn_free_data(info, info, sz);
2968 nx->nx_ports[p].npi_info = NULL;
2969 }
2970 });
2971 /* END IGNORE CODESTYLE */
2972
2973 num_ports = nx->nx_num_ports;
2974 nx->nx_num_ports = 0;
2975 nx->nx_active_ports = 0;
2976 skn_free_data(ports_bmap,
2977 nx->nx_ports_bmap, (num_ports / NX_PORT_CHUNK) * sizeof(bitmap_t));
2978 nx->nx_ports_bmap = NULL;
2979 skn_free_data(ports,
2980 nx->nx_ports, num_ports * sizeof(struct nx_port_info));
2981 nx->nx_ports = NULL;
2982 }
2983
2984 void
2985 nx_port_foreach(struct kern_nexus *nx,
2986 void (^port_handle)(nexus_port_t nx_port))
2987 {
2988 for (uint32_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
2989 bitmap_t bmap = nx->nx_ports_bmap[i];
2990
2991 if (bmap == NX_PORT_CHUNK_FREE) {
2992 continue;
2993 }
2994
2995 for (uint32_t j = 0; j < NX_PORT_CHUNK; j++) {
2996 if (bit_test(bmap, j)) {
2997 continue;
2998 }
2999 port_handle((i * NX_PORT_CHUNK) + j);
3000 }
3001 }
3002 }
3003
3004 /*
3005 * sysctl interfaces
3006 */
3007 static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
3008 static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
3009 static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
3010
3011 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
3012 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3013 0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
3014
3015 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
3016 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3017 0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
3018
3019 SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
3020 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3021 0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
3022 "A list of logical links");
3023
3024 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
3025 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
3026 0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
3027 "Nexus inet flows with stats collected in kernel");
3028
3029 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
3030 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3031 0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
3032 "Nexus flow owners");
3033
3034 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
3035 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3036 0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
3037 "Nexus flow routes");
3038
3039 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
3040 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3041 0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
3042 "Nexus netif statistics collected in kernel");
3043
3044 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
3045 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3046 0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
3047 "Nexus flowswitch statistics collected in kernel");
3048
3049 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
3050 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3051 0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
3052 "Nexus userstack statistics counter");
3053
3054 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
3055 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3056 0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
3057 "Nexus flow advisory dump");
3058
3059 /*
3060 * Provider list sysctl
3061 */
3062 static void
nexus_provider_info_populate(struct kern_nexus_provider * nxprov,nexus_provider_info_t info)3063 nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
3064 nexus_provider_info_t info)
3065 {
3066 struct kern_nexus *nx;
3067 uuid_t *uuids;
3068
3069 SK_LOCK_ASSERT_HELD();
3070
3071 /* provider UUID + params */
3072 uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
3073 bcopy(nxprov->nxprov_params, &info->npi_prov_params,
3074 sizeof(struct nxprov_params));
3075 info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
3076
3077 /* instance UUID list */
3078 uuids = info->npi_instance_uuids;
3079 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
3080 uuid_copy(*uuids, nx->nx_uuid);
3081 uuids++;
3082 }
3083 }
3084
3085 static int
3086 nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
3087 {
3088 #pragma unused(arg1, arg2, oidp)
3089 size_t actual_space;
3090 caddr_t buffer = NULL;
3091 size_t buffer_space;
3092 size_t allocated_space;
3093 int out_error;
3094 int error = 0;
3095 struct kern_nexus_provider *nxprov;
3096 caddr_t scan;
3097
3098 if (!kauth_cred_issuser(kauth_cred_get())) {
3099 return EPERM;
3100 }
3101
3102 net_update_uptime();
3103 buffer_space = req->oldlen;
3104 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3105 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3106 buffer_space = SK_SYSCTL_ALLOC_MAX;
3107 }
3108 allocated_space = buffer_space;
3109 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3110 if (__improbable(buffer == NULL)) {
3111 return ENOBUFS;
3112 }
3113 } else if (req->oldptr == USER_ADDR_NULL) {
3114 buffer_space = 0;
3115 }
3116 actual_space = 0;
3117 scan = buffer;
3118 SK_LOCK();
3119 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
3120 size_t info_size;
3121
3122 info_size
3123 = NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
3124 if (scan != NULL) {
3125 if (buffer_space < info_size) {
3126 /* supplied buffer too small, stop copying */
3127 error = ENOMEM;
3128 break;
3129 }
3130 nexus_provider_info_populate(nxprov, (void *)scan);
3131 scan += info_size;
3132 buffer_space -= info_size;
3133 }
3134 actual_space += info_size;
3135 }
3136 SK_UNLOCK();
3137
3138 out_error = SYSCTL_OUT(req, buffer, actual_space);
3139 if (out_error != 0) {
3140 error = out_error;
3141 }
3142
3143 if (buffer != NULL) {
3144 sk_free_data(buffer, allocated_space);
3145 }
3146
3147 return error;
3148 }
3149
3150 /*
3151 * Channel list sysctl
3152 */
3153 static uint32_t
channel_ring_count(struct kern_channel * ch,enum txrx which)3154 channel_ring_count(struct kern_channel *ch, enum txrx which)
3155 {
3156 return ch->ch_last[which] - ch->ch_first[which];
3157 }
3158
3159 static void
populate_ring_entries(struct __kern_channel_ring * kring,ring_id_t first,ring_id_t last,nexus_channel_ring_entry_t entries)3160 populate_ring_entries(struct __kern_channel_ring *kring,
3161 ring_id_t first, ring_id_t last, nexus_channel_ring_entry_t entries)
3162 {
3163 ring_id_t i;
3164 nexus_channel_ring_entry_t scan;
3165 struct __kern_channel_ring *ring;
3166
3167 scan = entries;
3168 for (i = first; i < last; i++, scan++) {
3169 ring = &kring[i];
3170
3171 DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
3172 ring);
3173 if (kr_stat_enable == 0) {
3174 bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
3175 bzero(&scan->ncre_user_stats,
3176 sizeof(scan->ncre_user_stats));
3177 } else {
3178 scan->ncre_stats = ring->ckr_stats;
3179 scan->ncre_user_stats = ring->ckr_usr_stats;
3180 }
3181 scan->ncre_error_stats = ring->ckr_err_stats;
3182 scan->ncre_ring_id = i;
3183 }
3184 }
3185
3186 /* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
3187 static uint32_t
nexus_channel_get_flags(uint32_t ch_mode,uint32_t ch_flags)3188 nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
3189 {
3190 uint32_t flags = 0;
3191
3192 flags |= (ch_mode & CHMODE_MONITOR_TX) ? SCHF_MONITOR_TX : 0;
3193 flags |= (ch_mode & CHMODE_MONITOR_RX) ? SCHF_MONITOR_RX : 0;
3194 flags |= (ch_mode & CHMODE_MONITOR_NO_COPY) ? SCHF_MONITOR_NO_COPY : 0;
3195 flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
3196 flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
3197 flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
3198 flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
3199 flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
3200 flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
3201 flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
3202 flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
3203 flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
3204 flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
3205
3206 return flags;
3207 }
3208
3209 SK_NO_INLINE_ATTRIBUTE
3210 static void
nexus_channel_entry_populate(struct kern_channel * ch,nexus_channel_entry_t entry)3211 nexus_channel_entry_populate(struct kern_channel *ch,
3212 nexus_channel_entry_t entry)
3213 {
3214 uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
3215 uint32_t ch_flags = ch->ch_flags;
3216 ring_id_t rx_first = ch->ch_first[NR_RX];
3217 ring_id_t rx_last = ch->ch_last[NR_RX];
3218 ring_id_t tx_last = ch->ch_last[NR_TX];
3219 ring_id_t tx_first = ch->ch_first[NR_TX];
3220
3221 uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
3222 entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
3223 entry->nce_port = ch->ch_info->cinfo_nx_port;
3224 entry->nce_pid = ch->ch_pid;
3225 entry->nce_fd = ch->ch_fd;
3226 entry->nce_tx_rings = tx_last - tx_first;
3227 entry->nce_rx_rings = rx_last - rx_first;
3228 populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
3229 entry->nce_ring_entries);
3230 populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
3231 entry->nce_ring_entries + entry->nce_tx_rings);
3232 }
3233
3234 SK_NO_INLINE_ATTRIBUTE
3235 static size_t
nexus_channel_info_populate(struct kern_nexus * nx,nexus_channel_info_t info,size_t buffer_size)3236 nexus_channel_info_populate(struct kern_nexus *nx,
3237 nexus_channel_info_t info, size_t buffer_size)
3238 {
3239 struct kern_channel *ch = NULL;
3240 size_t info_size;
3241 caddr_t scan = NULL;
3242
3243 SK_LOCK_ASSERT_HELD();
3244
3245 info_size = sizeof(*info);
3246
3247 /* channel list */
3248 if (info != NULL) {
3249 if (buffer_size < info_size) {
3250 return info_size;
3251 }
3252
3253 /* instance UUID */
3254 uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
3255 info->nci_channel_entries_count = nx->nx_ch_count;
3256 scan = (caddr_t)info->nci_channel_entries;
3257 }
3258 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
3259 size_t entry_size;
3260 uint32_t ring_count;
3261
3262 ring_count = channel_ring_count(ch, NR_TX) +
3263 channel_ring_count(ch, NR_RX);
3264 entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
3265 info_size += entry_size;
3266 if (scan != NULL) {
3267 if (buffer_size < info_size) {
3268 return info_size;
3269 }
3270
3271 nexus_channel_entry_populate(ch, (void *)scan);
3272 scan += entry_size;
3273 }
3274 }
3275 return info_size;
3276 }
3277
3278 static int
3279 nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
3280 {
3281 #pragma unused(arg1, arg2, oidp)
3282 size_t actual_space;
3283 caddr_t buffer = NULL;
3284 size_t buffer_space;
3285 size_t allocated_space;
3286 int out_error;
3287 struct kern_nexus *nx;
3288 int error = 0;
3289 caddr_t scan;
3290
3291 if (!kauth_cred_issuser(kauth_cred_get())) {
3292 return EPERM;
3293 }
3294
3295 net_update_uptime();
3296 buffer_space = req->oldlen;
3297 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3298 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3299 buffer_space = SK_SYSCTL_ALLOC_MAX;
3300 }
3301 allocated_space = buffer_space;
3302 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3303 if (__improbable(buffer == NULL)) {
3304 return ENOBUFS;
3305 }
3306 } else if (req->oldptr == USER_ADDR_NULL) {
3307 buffer_space = 0;
3308 }
3309 actual_space = 0;
3310 scan = buffer;
3311 SK_LOCK();
3312 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3313 size_t info_size;
3314
3315 info_size = nexus_channel_info_populate(nx, (void *)scan,
3316 buffer_space);
3317 if (scan != NULL) {
3318 if (buffer_space < info_size) {
3319 /* supplied buffer too small, stop copying */
3320 error = ENOMEM;
3321 break;
3322 }
3323 scan += info_size;
3324 buffer_space -= info_size;
3325 }
3326 actual_space += info_size;
3327 }
3328 SK_UNLOCK();
3329
3330 if (actual_space != 0) {
3331 out_error = SYSCTL_OUT(req, buffer, actual_space);
3332 if (out_error != 0) {
3333 error = out_error;
3334 }
3335 }
3336 if (buffer != NULL) {
3337 sk_free_data(buffer, allocated_space);
3338 }
3339
3340 return error;
3341 }
3342
3343 static int
3344 nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
3345 {
3346 #pragma unused(arg1, arg2)
3347 struct proc *p = req->p;
3348 struct nexus_mib_filter filter;
3349 int error = 0;
3350 size_t actual_space;
3351 caddr_t buffer = NULL;
3352 size_t buffer_space;
3353 size_t allocated_space;
3354 int out_error;
3355 struct kern_nexus *nx;
3356 caddr_t scan;
3357
3358 /* Restrict protocol stats access to root user only (like netstat). */
3359 if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
3360 !kauth_cred_issuser(kauth_cred_get())) {
3361 SK_ERR("mib request rejected, EPERM");
3362 return EPERM;
3363 }
3364
3365 if (req->newptr == USER_ADDR_NULL) {
3366 /* use subcommand for multiple nodes */
3367 filter.nmf_type = oidp->oid_arg2;
3368 filter.nmf_bitmap = 0x0;
3369 } else if (req->newlen != sizeof(struct nexus_mib_filter)) {
3370 SK_ERR("mis-matching newlen");
3371 return EINVAL;
3372 } else {
3373 error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
3374 if (error != 0) {
3375 SK_ERR("SYSCTL_IN err %d", error);
3376 return error;
3377 }
3378 if (filter.nmf_type != oidp->oid_arg2) {
3379 SK_ERR("mis-matching nmf_type");
3380 return EINVAL;
3381 }
3382 }
3383
3384 net_update_uptime();
3385 buffer_space = req->oldlen;
3386 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3387 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3388 buffer_space = SK_SYSCTL_ALLOC_MAX;
3389 }
3390 allocated_space = buffer_space;
3391 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3392 if (__improbable(buffer == NULL)) {
3393 return ENOBUFS;
3394 }
3395 } else if (req->oldptr == USER_ADDR_NULL) {
3396 buffer_space = 0;
3397 }
3398 actual_space = 0;
3399 scan = buffer;
3400
3401 SK_LOCK();
3402 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3403 if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
3404 continue;
3405 }
3406
3407 size_t size;
3408 struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
3409
3410 size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
3411 buffer_space, p);
3412
3413 if (scan != NULL) {
3414 if (buffer_space < size) {
3415 /* supplied buffer too small, stop copying */
3416 error = ENOMEM;
3417 break;
3418 }
3419 scan += size;
3420 buffer_space -= size;
3421 }
3422 actual_space += size;
3423 }
3424 SK_UNLOCK();
3425
3426 if (actual_space != 0) {
3427 out_error = SYSCTL_OUT(req, buffer, actual_space);
3428 if (out_error != 0) {
3429 error = out_error;
3430 }
3431 }
3432 if (buffer != NULL) {
3433 sk_free_data(buffer, allocated_space);
3434 }
3435
3436 return error;
3437 }
3438
3439 void
kern_nexus_walktree(kern_nexus_walktree_f_t * f,void * arg0,boolean_t is_sk_locked)3440 kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
3441 boolean_t is_sk_locked)
3442 {
3443 struct kern_nexus *nx = NULL;
3444
3445 if (!is_sk_locked) {
3446 SK_LOCK();
3447 } else {
3448 SK_LOCK_ASSERT_HELD();
3449 }
3450
3451 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3452 (*f)(nx, arg0);
3453 }
3454
3455 if (!is_sk_locked) {
3456 SK_UNLOCK();
3457 }
3458 }
3459
3460 errno_t
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,struct kern_pbufpool_memory_info * rx_pool_info,struct kern_pbufpool_memory_info * tx_pool_info)3461 kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
3462 struct kern_pbufpool_memory_info *rx_pool_info,
3463 struct kern_pbufpool_memory_info *tx_pool_info)
3464 {
3465 struct kern_pbufpool *tpp, *rpp;
3466 struct kern_nexus *nx;
3467 errno_t err = 0;
3468
3469 nx = nx_find(nx_uuid, FALSE);
3470 if (nx == NULL) {
3471 err = ENOENT;
3472 goto done;
3473 }
3474
3475 if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
3476 err = ENOTSUP;
3477 goto done;
3478 }
3479
3480 err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
3481 if (err != 0) {
3482 goto done;
3483 }
3484
3485 if ((tpp == NULL) && (rpp == NULL)) {
3486 err = ENOENT;
3487 goto done;
3488 }
3489
3490 if (tx_pool_info != NULL) {
3491 bzero(tx_pool_info, sizeof(*tx_pool_info));
3492 }
3493 if (rx_pool_info != NULL) {
3494 bzero(rx_pool_info, sizeof(*rx_pool_info));
3495 }
3496
3497 if ((tx_pool_info != NULL) && (tpp != NULL)) {
3498 err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
3499 if (err != 0) {
3500 goto done;
3501 }
3502 }
3503
3504 if ((rx_pool_info != NULL) && (rpp != NULL)) {
3505 err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
3506 }
3507
3508 done:
3509 if (nx != NULL) {
3510 (void) nx_release(nx);
3511 nx = NULL;
3512 }
3513 return err;
3514 }
3515
3516 void
nx_interface_advisory_notify(struct kern_nexus * nx)3517 nx_interface_advisory_notify(struct kern_nexus *nx)
3518 {
3519 struct kern_channel *ch;
3520 struct netif_stats *nifs;
3521 struct fsw_stats *fsw_stats;
3522 nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
3523
3524 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3525 nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
3526 } else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
3527 fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
3528 } else {
3529 VERIFY(0);
3530 __builtin_unreachable();
3531 }
3532 if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
3533 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3534 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
3535 } else {
3536 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
3537 }
3538 return;
3539 }
3540 /*
3541 * if the channel is in "nx_ch_if_adv_head" list, then we can
3542 * safely assume that the channel is not closed yet.
3543 * In ch_close_common(), the channel is removed from the
3544 * "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
3545 * exclusive mode, prior to closing the channel.
3546 */
3547 STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
3548 struct nexus_adapter *na = ch->ch_na;
3549
3550 ASSERT(na != NULL);
3551 na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
3552 TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
3553 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3554 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
3555 } else {
3556 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
3557 }
3558 }
3559 lck_rw_done(&nx->nx_ch_if_adv_lock);
3560 }
3561