1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <sys/sdt.h>
33
34 static uint32_t disable_nxctl_check = 0;
35 #if (DEVELOPMENT || DEBUG)
36 SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
37 CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
38 #endif
39
40 LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
41 LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
42 LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
43 LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
44
45 static STAILQ_HEAD(, nxctl) nxctl_head =
46 STAILQ_HEAD_INITIALIZER(nxctl_head);
47 static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
48 STAILQ_HEAD_INITIALIZER(nxprov_head);
49
50 static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
51 RB_HEAD(kern_nexus_tree, kern_nexus);
52 RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
53 RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
54 static struct kern_nexus_tree nx_head;
55
56 static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
57 static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
58 static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
59 static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
60 static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
61 static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
62 static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
63 static void nxctl_retain_locked(struct nxctl *);
64 static int nxctl_release_locked(struct nxctl *);
65 static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
66 static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
67 static void nxctl_free(struct nxctl *);
68
69 static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
70 struct kern_nexus_domain_provider *, struct nxprov_reg *,
71 const struct kern_nexus_provider_init *init, int *);
72 static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
73 static void nxprov_retain_locked(struct kern_nexus_provider *);
74 static int nxprov_release_locked(struct kern_nexus_provider *);
75 static struct kern_nexus_provider *nxprov_alloc(
76 struct kern_nexus_domain_provider *, zalloc_flags_t);
77 static void nxprov_free(struct kern_nexus_provider *);
78
79 static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
80 static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
81 static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
82 static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
83 static struct kern_nexus *nx_alloc(zalloc_flags_t);
84 static void nx_free(struct kern_nexus *);
85
86 static SKMEM_TYPE_DEFINE(nxctl_zone, struct nxctl);
87
88 static SKMEM_TYPE_DEFINE(nxbind_zone, struct nxbind);
89
90 static SKMEM_TYPE_DEFINE(nxprov_zone, struct kern_nexus_provider);
91
92 static SKMEM_TYPE_DEFINE(nxprov_params_zone, struct nxprov_params);
93
94 static SKMEM_TYPE_DEFINE(nx_zone, struct kern_nexus);
95
96 static int __nx_inited = 0;
97
98 #define SKMEM_TAG_NX_KEY "com.apple.skywalk.nexus.key"
99 SKMEM_TAG_DEFINE(skmem_tag_nx_key, SKMEM_TAG_NX_KEY);
100
101 #define SKMEM_TAG_NX_MIB "com.apple.skywalk.nexus.mib"
102 static SKMEM_TAG_DEFINE(skmem_tag_nx_mib, SKMEM_TAG_NX_MIB);
103
104 #define SKMEM_TAG_NX_PORT "com.apple.skywalk.nexus.port"
105 SKMEM_TAG_DEFINE(skmem_tag_nx_port, SKMEM_TAG_NX_PORT);
106
107 #define SKMEM_TAG_NX_PORT_INFO "com.apple.skywalk.nexus.port.info"
108 SKMEM_TAG_DEFINE(skmem_tag_nx_port_info, SKMEM_TAG_NX_PORT_INFO);
109
110 /*
111 * Special nexus controller handle for Skywalk internal use. Unlike all
112 * other nexus controller handles that are created by userland or kernel
113 * clients, this one never gets closed or freed. It is also not part of
114 * the global nxctl_head list.
115 */
116 static struct nxctl _kernnxctl;
117 static struct nxctl _usernxctl;
118 struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
119 struct nexus_controller usernxctl = { .ncd_nxctl = &_usernxctl };
120
121 /*
122 * -fbounds-safety: For static functions where additional size variables are
123 * added, we need to mark them __unused if this file is being built without
124 * -fbounds-safety.
125 */
126 #if !__has_ptrcheck
127 #define NX_FB_ARG __unused
128 #else
129 #define NX_FB_ARG
130 #endif
131
132 int
nexus_init(void)133 nexus_init(void)
134 {
135 SK_LOCK_ASSERT_HELD();
136 ASSERT(!__nx_inited);
137
138 RB_INIT(&nx_head);
139
140 na_init();
141
142 /* attach system built-in domains and domain providers */
143 nxdom_attach_all();
144
145 /*
146 * Initialize private kernel and shared user nexus controller handle;
147 *
148 * Shared Kernel controller is used internally for creating nexus providers
149 * and nexus instances from within the Skywalk code (e.g. netif_compat).
150 *
151 * Shared User controller is used userspace by clients(e.g. libnetcore)
152 * that would like to call nexus instances for use cases like
153 * configuring flow entry that they own indirectly (e.g. via NECP), so
154 * that the nexus would perform permission check based on other info
155 * (e.g. PID, UUID) and bypass nxctl check (this nxctl has no
156 * credentials).
157 */
158 nxctl_init(&_kernnxctl, kernproc, NULL);
159 nxctl_retain_locked(&_kernnxctl); /* one for us */
160 nxctl_init(&_usernxctl, kernproc, NULL);
161 nxctl_retain_locked(&_usernxctl); /* one for us */
162 nxctl_traffic_rule_init();
163
164 __nx_inited = 1;
165
166 return 0;
167 }
168
169 void
nexus_fini(void)170 nexus_fini(void)
171 {
172 SK_LOCK_ASSERT_HELD();
173
174 if (__nx_inited) {
175 nxctl_traffic_rule_fini();
176 nxctl_release_locked(&_kernnxctl);
177 nxctl_release_locked(&_usernxctl);
178
179 /* tell all domains they're going away */
180 nxdom_detach_all();
181
182 ASSERT(RB_EMPTY(&nx_head));
183
184 na_fini();
185
186 __nx_inited = 0;
187 }
188 }
189
190 struct nxctl *
nxctl_create(struct proc * p,struct fileproc * fp,const uuid_t nxctl_uuid,int * err)191 nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
192 int *err)
193 {
194 struct nxctl *nxctl = NULL;
195
196 ASSERT(!uuid_is_null(nxctl_uuid));
197
198 /* privilege checks would be done when performing nxctl operations */
199
200 SK_LOCK();
201
202 nxctl = nxctl_alloc(p, fp, Z_WAITOK);
203
204 STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
205 nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
206 uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
207
208 nxctl_retain_locked(nxctl); /* one for being in the list */
209 nxctl_retain_locked(nxctl); /* one for the caller */
210
211 #if SK_LOG
212 uuid_string_t uuidstr;
213 SK_D("nxctl 0x%llx UUID %s", SK_KVA(nxctl),
214 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
215 #endif /* SK_LOG */
216
217 SK_UNLOCK();
218
219 if (*err != 0) {
220 nxctl_free(nxctl);
221 nxctl = NULL;
222 }
223 return nxctl;
224 }
225
226 void
nxctl_close(struct nxctl * nxctl)227 nxctl_close(struct nxctl *nxctl)
228 {
229 struct kern_nexus_provider *nxprov = NULL, *tnxprov;
230
231 lck_mtx_lock(&nxctl->nxctl_lock);
232 SK_LOCK();
233
234 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
235
236 #if SK_LOG
237 uuid_string_t uuidstr;
238 SK_D("nxctl 0x%llx UUID %s flags 0x%b", SK_KVA(nxctl),
239 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
240 nxctl->nxctl_flags, NEXUSCTLF_BITS);
241 #endif /* SK_LOG */
242
243 if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
244 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
245 nxctl->nxctl_fp = NULL;
246 }
247
248 /* may be called as part of failure cleanup, so check */
249 if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
250 /* caller must hold an extra ref */
251 ASSERT(nxctl->nxctl_refcnt > 1);
252 (void) nxctl_release_locked(nxctl);
253
254 STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
255 nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
256 }
257
258 repeat:
259 STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
260 /*
261 * Close provider only for those which are owned by
262 * this control instance. Note that if we close the
263 * provider, we need to repeat this search as the
264 * list might have been changed by another thread.
265 * That's possible since SK_UNLOCK() may be called
266 * as a result of calling nxprov_close().
267 */
268 if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
269 nxprov->nxprov_ctl == nxctl) {
270 nxprov_retain_locked(nxprov);
271 (void) nxprov_close(nxprov, TRUE);
272 (void) nxprov_release_locked(nxprov);
273 goto repeat;
274 }
275 }
276
277 SK_UNLOCK();
278 lck_mtx_unlock(&nxctl->nxctl_lock);
279 nxctl_traffic_rule_clean(nxctl);
280 }
281
282 int
nxctl_set_opt(struct nxctl * nxctl,struct sockopt * sopt)283 nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
284 {
285 #pragma unused(nxctl)
286 int err = 0;
287
288 NXCTL_LOCK_ASSERT_HELD(nxctl);
289
290 if (sopt->sopt_dir != SOPT_SET) {
291 sopt->sopt_dir = SOPT_SET;
292 }
293
294 switch (sopt->sopt_name) {
295 case NXOPT_NEXUS_BIND:
296 err = nxctl_nexus_bind(nxctl, sopt);
297 break;
298
299 case NXOPT_NEXUS_UNBIND:
300 err = nxctl_nexus_unbind(nxctl, sopt);
301 break;
302
303 case NXOPT_NEXUS_CONFIG:
304 err = nxctl_nexus_config(nxctl, sopt);
305 break;
306
307 default:
308 err = ENOPROTOOPT;
309 break;
310 }
311
312 return err;
313 }
314
315 int
nxctl_get_opt(struct nxctl * nxctl,struct sockopt * sopt)316 nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
317 {
318 #pragma unused(nxctl)
319 int err = 0;
320
321 NXCTL_LOCK_ASSERT_HELD(nxctl);
322
323 if (sopt->sopt_dir != SOPT_GET) {
324 sopt->sopt_dir = SOPT_GET;
325 }
326
327 switch (sopt->sopt_name) {
328 case NXOPT_NEXUS_PROV_LIST:
329 err = nxctl_get_nexus_prov_list(nxctl, sopt);
330 break;
331
332 case NXOPT_NEXUS_PROV_ENTRY:
333 err = nxctl_get_nexus_prov_entry(nxctl, sopt);
334 break;
335
336 case NXOPT_NEXUS_LIST:
337 err = nxctl_get_nexus_list(nxctl, sopt);
338 break;
339
340 case NXOPT_CHANNEL_LIST:
341 err = nxctl_get_channel_list(nxctl, sopt);
342 break;
343
344 default:
345 err = ENOPROTOOPT;
346 break;
347 }
348
349 return err;
350 }
351
352 /* Upper bound on # of nrl_num_regs that we'd return to user space */
353 #define MAX_NUM_REG_ENTRIES 256
354
355 /* Hoisted out of line to reduce kernel stack footprint */
356 SK_NO_INLINE_ATTRIBUTE
357 static int
nxctl_get_nexus_prov_list(struct nxctl * nxctl,struct sockopt * sopt)358 nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
359 {
360 user_addr_t tmp_ptr = USER_ADDR_NULL;
361 struct nxprov_reg_ent *pnre, *nres = NULL;
362 struct nxprov_list_req nrlr;
363 struct kern_nexus_provider *nxprov = NULL;
364 uint32_t nregs = 0, ncregs = 0;
365 int err = 0, observeall;
366 size_t nres_sz;
367
368 NXCTL_LOCK_ASSERT_HELD(nxctl);
369
370 ASSERT(sopt->sopt_p != NULL);
371 if (sopt->sopt_val == USER_ADDR_NULL) {
372 return EINVAL;
373 }
374
375 err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
376 if (err != 0) {
377 return err;
378 }
379
380 if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
381 nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
382 }
383
384 /*
385 * If the caller specified a buffer, copy out the Nexus provider
386 * entries to caller gracefully. We only copy out the number of
387 * entries which caller has asked for, but we always tell caller
388 * how big the buffer really needs to be.
389 */
390 tmp_ptr = nrlr.nrl_regs;
391 if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
392 nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
393 nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
394 if (__improbable(nres == NULL)) {
395 return ENOBUFS;
396 }
397 }
398
399 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
400 PRIV_SKYWALK_OBSERVE_ALL) == 0);
401
402 SK_LOCK();
403 /*
404 * Count number of providers. If buffer space exists and
405 * remains, copy out provider entries.
406 */
407 nregs = nrlr.nrl_num_regs;
408 pnre = nres;
409
410 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
411 /*
412 * Return only entries that are visible to the caller,
413 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
414 */
415 if (nxprov->nxprov_ctl != nxctl && !observeall) {
416 continue;
417 }
418
419 if (nres != NULL && nregs > 0) {
420 uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
421 bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
422 sizeof(struct nxprov_params));
423 --nregs;
424 ++pnre;
425 ++ncregs;
426 }
427 }
428 SK_UNLOCK();
429
430 if (ncregs == 0) {
431 err = ENOENT;
432 }
433
434 if (nres != NULL) {
435 if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
436 if (sopt->sopt_p != kernproc) {
437 err = copyout(nres, tmp_ptr,
438 ncregs * sizeof(*nres));
439 } else {
440 caddr_t tmp;
441 tmp = __unsafe_forge_bidi_indexable(caddr_t,
442 CAST_DOWN(caddr_t, tmp_ptr),
443 ncregs * sizeof(*nres));
444 bcopy(nres, tmp, ncregs * sizeof(*nres));
445 }
446 }
447 sk_free_data(nres, nres_sz);
448 nres = NULL;
449 }
450
451 if (err == 0) {
452 nrlr.nrl_num_regs = ncregs;
453 err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
454 }
455
456 return err;
457 }
458
459 /* Hoisted out of line to reduce kernel stack footprint */
460 SK_NO_INLINE_ATTRIBUTE
461 static int
nxctl_get_nexus_prov_entry(struct nxctl * nxctl,struct sockopt * sopt)462 nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
463 {
464 struct nxprov_reg_ent nre;
465 struct kern_nexus_provider *nxprov = NULL;
466 int err = 0;
467
468 NXCTL_LOCK_ASSERT_HELD(nxctl);
469
470 ASSERT(sopt->sopt_p != NULL);
471 if (sopt->sopt_val == USER_ADDR_NULL) {
472 return EINVAL;
473 }
474
475 bzero(&nre, sizeof(nre));
476 err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
477 if (err != 0) {
478 return err;
479 }
480
481 if (uuid_is_null(nre.npre_prov_uuid)) {
482 return EINVAL;
483 }
484
485 SK_LOCK();
486 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
487 if (uuid_compare(nxprov->nxprov_uuid,
488 nre.npre_prov_uuid) == 0) {
489 /*
490 * Return only entries that are visible to the caller,
491 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
492 */
493 if (nxprov->nxprov_ctl != nxctl) {
494 if (skywalk_priv_check_cred(sopt->sopt_p,
495 nxctl->nxctl_cred,
496 PRIV_SKYWALK_OBSERVE_ALL) != 0) {
497 nxprov = NULL;
498 break;
499 }
500 }
501
502 bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
503 sizeof(struct nxprov_params));
504 break;
505 }
506 }
507 SK_UNLOCK();
508
509 if (nxprov != NULL) {
510 err = sooptcopyout(sopt, &nre, sizeof(nre));
511 } else {
512 err = ENOENT;
513 }
514
515 return err;
516 }
517
518 /* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
519 #define MAX_NUM_NX_UUIDS 4096
520
521 /* Hoisted out of line to reduce kernel stack footprint */
522 SK_NO_INLINE_ATTRIBUTE
523 static int
nxctl_get_nexus_list(struct nxctl * nxctl,struct sockopt * sopt)524 nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
525 {
526 user_addr_t tmp_ptr = USER_ADDR_NULL;
527 uint32_t nuuids = 0, ncuuids = 0;
528 uuid_t *puuid, *uuids = NULL;
529 size_t uuids_sz;
530 struct nx_list_req nlr;
531 struct kern_nexus_provider *nxprov = NULL;
532 struct kern_nexus *nx = NULL;
533 int err = 0, observeall;
534
535 NXCTL_LOCK_ASSERT_HELD(nxctl);
536
537 ASSERT(sopt->sopt_p != NULL);
538 if (sopt->sopt_val == USER_ADDR_NULL) {
539 return EINVAL;
540 }
541
542 err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
543 if (err != 0) {
544 return err;
545 }
546
547 if (uuid_is_null(nlr.nl_prov_uuid)) {
548 return EINVAL;
549 } else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
550 nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
551 }
552
553 /*
554 * If the caller specified a buffer, copy out the Nexus UUIDs to
555 * caller gracefully. We only copy out the number of UUIDs which
556 * caller has asked for, but we always tell caller how big the
557 * buffer really needs to be.
558 */
559 tmp_ptr = nlr.nl_nx_uuids;
560 if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
561 uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
562 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
563 if (__improbable(uuids == NULL)) {
564 return ENOBUFS;
565 }
566 }
567
568 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
569 PRIV_SKYWALK_OBSERVE_ALL) == 0);
570
571 SK_LOCK();
572 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
573 /*
574 * Return only entries that are visible to the caller,
575 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
576 */
577 if (nxprov->nxprov_ctl != nxctl && !observeall) {
578 continue;
579 }
580
581 if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
582 break;
583 }
584 }
585
586 if (nxprov != NULL) {
587 /*
588 * Count number of Nexus. If buffer space exists
589 * and remains, copy out the Nexus UUIDs.
590 */
591 nuuids = nlr.nl_num_nx_uuids;
592 puuid = uuids;
593
594 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
595 ++ncuuids;
596 if (uuids != NULL && nuuids > 0) {
597 uuid_copy(*puuid, nx->nx_uuid);
598 --nuuids;
599 ++puuid;
600 }
601 }
602 } else {
603 err = ENOENT;
604 }
605 SK_UNLOCK();
606
607 if (uuids != NULL) {
608 if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
609 uintptr_t cnt_uuid;
610
611 /* Note: Pointer arithmetic */
612 cnt_uuid = (uintptr_t)(puuid - uuids);
613 if (cnt_uuid > 0) {
614 if (sopt->sopt_p != kernproc) {
615 err = copyout(uuids, tmp_ptr,
616 cnt_uuid * sizeof(uuid_t));
617 } else {
618 caddr_t tmp;
619 tmp = __unsafe_forge_bidi_indexable(caddr_t,
620 CAST_DOWN(caddr_t, tmp_ptr),
621 cnt_uuid * sizeof(uuid_t));
622 bcopy(uuids, tmp,
623 cnt_uuid * sizeof(uuid_t));
624 }
625 }
626 }
627 sk_free_data(uuids, uuids_sz);
628 uuids = NULL;
629 }
630
631 if (err == 0) {
632 nlr.nl_num_nx_uuids = ncuuids;
633 err = sooptcopyout(sopt, &nlr, sizeof(nlr));
634 }
635
636 return err;
637 }
638
639 /* Hoisted out of line to reduce kernel stack footprint */
640 SK_NO_INLINE_ATTRIBUTE
641 static int
nxctl_nexus_bind(struct nxctl * nxctl,struct sockopt * sopt)642 nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
643 {
644 boolean_t m_pid, m_exec_uuid, m_key;
645 struct nx_bind_req nbr;
646 struct proc *p = PROC_NULL;
647 struct nxbind *nxb = NULL;
648 uint64_t p_uniqueid = -1;
649 pid_t p_pid = -1;
650 struct kern_nexus *nx = NULL;
651 #if SK_LOG
652 uuid_string_t exec_uuidstr;
653 #endif /* SK_LOG */
654 uuid_t p_uuid;
655 void *key = NULL;
656 int err = 0;
657
658 NXCTL_LOCK_ASSERT_HELD(nxctl);
659
660 if (sopt->sopt_val == USER_ADDR_NULL) {
661 return EINVAL;
662 }
663
664 uuid_clear(p_uuid);
665 bzero(&nbr, sizeof(nbr));
666 err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
667 if (err != 0) {
668 return err;
669 }
670
671 if (uuid_is_null(nbr.nb_nx_uuid)) {
672 err = EINVAL;
673 goto done_unlocked;
674 }
675
676 nbr.nb_flags &= NBR_MATCH_MASK;
677 if (nbr.nb_flags == 0) {
678 /* must choose one of the match criteria */
679 err = EINVAL;
680 goto done_unlocked;
681 }
682 m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
683 m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
684 m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
685
686 if (m_pid || m_exec_uuid) {
687 /*
688 * Validate process ID. A valid PID is needed when we're
689 * asked to match by PID, or if asked to match by executable
690 * UUID with a NULL nb_exec_uuid supplied. The latter is
691 * to support the case when a userland Nexus provider isn't
692 * able to acquire its client's executable UUID, but is
693 * able to identify it via PID.
694 */
695 if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
696 (p = proc_find(nbr.nb_pid)) == PROC_NULL) {
697 err = ESRCH;
698 goto done_unlocked;
699 }
700 /* exclude kernel from the match criteria */
701 if (p == kernproc) {
702 err = EACCES;
703 goto done_unlocked;
704 } else if (p != PROC_NULL) {
705 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
706 p_uniqueid = proc_uniqueid(p);
707 p_pid = proc_pid(p);
708 } else {
709 uuid_copy(p_uuid, nbr.nb_exec_uuid);
710 }
711 }
712
713 if (m_key) {
714 if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
715 nbr.nb_key == USER_ADDR_NULL) {
716 err = EINVAL;
717 goto done_unlocked;
718 }
719
720 key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
721 if (__improbable(key == NULL)) {
722 err = ENOMEM;
723 goto done_unlocked;
724 }
725
726 if (sopt->sopt_p != kernproc) {
727 err = copyin(nbr.nb_key, key, nbr.nb_key_len);
728 if (err != 0) {
729 goto done_unlocked;
730 }
731 } else {
732 /*
733 * -fbounds-safety: nbr.nb_key is user_addr_t. Changing
734 * it to a pointer type is risky, so we just forge it
735 * here instead.
736 */
737 void *nb_key = __unsafe_forge_bidi_indexable(void *,
738 nbr.nb_key, nbr.nb_key_len);
739 bcopy(nb_key, key, nbr.nb_key_len);
740 }
741 }
742
743 SK_LOCK();
744 nx = nx_find(nbr.nb_nx_uuid, TRUE);
745 if (nx == NULL || (disable_nxctl_check == 0 &&
746 nx->nx_prov->nxprov_ctl != nxctl &&
747 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
748 err = ENOENT;
749 goto done;
750 }
751
752 /* bind isn't applicable on anonymous nexus provider */
753 if (NX_ANONYMOUS_PROV(nx)) {
754 err = ENXIO;
755 goto done;
756 }
757
758 /* port must be within the domain's range */
759 if (nbr.nb_port != NEXUS_PORT_ANY &&
760 nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
761 err = EDOM;
762 goto done;
763 } else if (nbr.nb_port == NEXUS_PORT_ANY) {
764 /* for now, this is allowed only for kernel clients */
765 if (sopt->sopt_p != kernproc) {
766 err = EPERM;
767 goto done;
768 }
769 }
770
771 nxb = nxb_alloc(Z_WAITOK);
772
773 if (m_pid) {
774 nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
775 nxb->nxb_uniqueid = p_uniqueid;
776 nxb->nxb_pid = p_pid;
777 }
778 if (m_exec_uuid) {
779 nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
780 ASSERT(!uuid_is_null(p_uuid));
781 uuid_copy(nxb->nxb_exec_uuid, p_uuid);
782 }
783 if (m_key) {
784 nxb->nxb_flags |= NXBF_MATCH_KEY;
785 ASSERT(key != NULL);
786 ASSERT(nbr.nb_key_len != 0 &&
787 nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
788 /*
789 * -fbounds-safety: since nxb_key is __sized_by(nxb_key_len),
790 * its assignment needs to be done side-by-side to nxb_key_len.
791 */
792 nxb->nxb_key = key;
793 key = NULL; /* let nxb_free() free it */
794 nxb->nxb_key_len = nbr.nb_key_len;
795 }
796
797 /*
798 * Bind the creds to the nexus port. If client doesn't have a port,
799 * find one, claim it, and associate the creds to it. Upon success,
800 * the nexus may move the nxbind contents (including the key) to
801 * its own nxbind instance; in that case, nxb_free() below will not
802 * be freeing the key within.
803 */
804 err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
805 if (err != 0) {
806 goto done;
807 }
808
809 ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
810 (void) sooptcopyout(sopt, &nbr, sizeof(nbr));
811
812 SK_D("nexus 0x%llx nxb 0x%llx port %u flags 0x%b pid %d "
813 "(uniqueid %llu) exec_uuid %s key 0x%llx key_len %u",
814 SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
815 NXBF_BITS, nxb->nxb_pid, nxb->nxb_uniqueid,
816 sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
817 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
818 nxb->nxb_key_len);
819
820 done:
821 if (nx != NULL) {
822 (void) nx_release_locked(nx);
823 nx = NULL;
824 }
825 SK_UNLOCK();
826
827 done_unlocked:
828 ASSERT(nx == NULL);
829
830 if (nxb != NULL) {
831 nxb_free(nxb);
832 nxb = NULL;
833 }
834 if (key != NULL) {
835 sk_free_data(key, nbr.nb_key_len);
836 key = NULL;
837 }
838 if (p != PROC_NULL) {
839 proc_rele(p);
840 }
841
842 return err;
843 }
844
845 /* Hoisted out of line to reduce kernel stack footprint */
846 SK_NO_INLINE_ATTRIBUTE
847 static int
nxctl_nexus_unbind(struct nxctl * nxctl,struct sockopt * sopt)848 nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
849 {
850 struct nx_unbind_req nur;
851 struct kern_nexus *nx = NULL;
852 int err = 0;
853
854 NXCTL_LOCK_ASSERT_HELD(nxctl);
855
856 if (sopt->sopt_val == USER_ADDR_NULL) {
857 return EINVAL;
858 }
859
860 bzero(&nur, sizeof(nur));
861 err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
862 if (err != 0) {
863 return err;
864 }
865
866 if (uuid_is_null(nur.nu_nx_uuid)) {
867 return EINVAL;
868 }
869
870 SK_LOCK();
871 nx = nx_find(nur.nu_nx_uuid, TRUE);
872 if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
873 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
874 err = ENOENT;
875 goto done;
876 }
877
878 /* unbind isn't applicable on anonymous nexus provider */
879 if (NX_ANONYMOUS_PROV(nx)) {
880 err = ENXIO;
881 goto done;
882 }
883
884 if (nur.nu_port == NEXUS_PORT_ANY) {
885 err = EINVAL;
886 goto done;
887 }
888
889 err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
890
891 done:
892 if (nx != NULL) {
893 (void) nx_release_locked(nx);
894 nx = NULL;
895 }
896 SK_UNLOCK();
897
898 return err;
899 }
900
901 /* Hoisted out of line to reduce kernel stack footprint */
902 SK_NO_INLINE_ATTRIBUTE
903 static int
nxctl_nexus_config(struct nxctl * nxctl,struct sockopt * sopt)904 nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
905 {
906 struct kern_nexus *nx = NULL;
907 struct nx_cfg_req ncr;
908 int err = 0;
909
910 NXCTL_LOCK_ASSERT_HELD(nxctl);
911
912 if (sopt->sopt_val == USER_ADDR_NULL) {
913 return EINVAL;
914 }
915
916 bzero(&ncr, sizeof(ncr));
917 err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
918 if (err != 0) {
919 return err;
920 }
921
922 if (uuid_is_null(ncr.nc_nx_uuid)) {
923 return EINVAL;
924 }
925
926 SK_LOCK();
927 nx = nx_find(ncr.nc_nx_uuid, TRUE);
928 if (nx == NULL || (disable_nxctl_check == 0 &&
929 nx->nx_prov->nxprov_ctl != nxctl &&
930 nxctl != &_kernnxctl && /* allow kernel/shared user nxctl */
931 nxctl != &_usernxctl)) {
932 err = ENOENT;
933 goto done;
934 }
935
936 if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
937 err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
938 nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
939 } else {
940 err = EPERM;
941 }
942
943 if (err == 0) {
944 (void) sooptcopyout(sopt, &ncr, sizeof(ncr));
945 }
946 done:
947 if (nx != NULL) {
948 (void) nx_release_locked(nx);
949 nx = NULL;
950 }
951 SK_UNLOCK();
952
953 return err;
954 }
955
956 struct nxbind *
nxb_alloc(zalloc_flags_t how)957 nxb_alloc(zalloc_flags_t how)
958 {
959 struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
960
961 if (nxb) {
962 SK_DF(SK_VERB_MEM, "nxb 0x%llx ALLOC", SK_KVA(nxb));
963 }
964 return nxb;
965 }
966
967 void
nxb_free(struct nxbind * nxb)968 nxb_free(struct nxbind *nxb)
969 {
970 SK_DF(SK_VERB_MEM, "nxb 0x%llx key 0x%llx FREE", SK_KVA(nxb),
971 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
972
973 if (nxb->nxb_key != NULL) {
974 sk_free_data_sized_by(nxb->nxb_key, nxb->nxb_key_len);
975 nxb->nxb_key = NULL;
976 nxb->nxb_key_len = 0;
977 }
978 zfree(nxbind_zone, nxb);
979 }
980
981 /*
982 * nxb0 is assumed to possess the truth, compare nxb1 against it.
983 */
984 boolean_t
nxb_is_equal(struct nxbind * nxb0,struct nxbind * nxb1)985 nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
986 {
987 ASSERT(nxb0 != NULL && nxb1 != NULL);
988 ASSERT(nxb0 != nxb1);
989
990 /* we always compare using uniqueid and not pid */
991 if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
992 nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
993 return FALSE;
994 }
995
996 if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
997 uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
998 return FALSE;
999 }
1000
1001 ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
1002 (nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
1003
1004 if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
1005 (nxb0->nxb_key_len != nxb1->nxb_key_len ||
1006 nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
1007 nxb1->nxb_key_len) != 0)) {
1008 return FALSE;
1009 }
1010
1011 return TRUE;
1012 }
1013
1014 void
nxb_move(struct nxbind * snxb,struct nxbind * dnxb)1015 nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
1016 {
1017 ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
1018 (snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
1019
1020 /* in case the destination has a key attached, free it first */
1021 if (dnxb->nxb_key != NULL) {
1022 sk_free_data_sized_by(dnxb->nxb_key, dnxb->nxb_key_len);
1023 dnxb->nxb_key = NULL;
1024 dnxb->nxb_key_len = 0;
1025 }
1026
1027 /* move everything from src to dst, and then wipe out src */
1028 bcopy(snxb, dnxb, sizeof(*dnxb));
1029 bzero(snxb, sizeof(*snxb));
1030 }
1031
1032 /* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
1033 #define MAX_NUM_CH_UUIDS 4096
1034
1035 /* Hoisted out of line to reduce kernel stack footprint */
1036 SK_NO_INLINE_ATTRIBUTE
1037 static int
nxctl_get_channel_list(struct nxctl * nxctl,struct sockopt * sopt)1038 nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
1039 {
1040 user_addr_t tmp_ptr = USER_ADDR_NULL;
1041 uint32_t nuuids = 0, ncuuids = 0;
1042 uuid_t *puuid, *uuids = NULL;
1043 size_t uuids_sz;
1044 struct ch_list_req clr;
1045 struct kern_channel *ch = NULL;
1046 struct kern_nexus *nx = NULL;
1047 struct kern_nexus find;
1048 int err = 0, observeall;
1049
1050 NXCTL_LOCK_ASSERT_HELD(nxctl);
1051
1052 ASSERT(sopt->sopt_p != NULL);
1053 if (sopt->sopt_val == USER_ADDR_NULL) {
1054 return EINVAL;
1055 }
1056
1057 err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
1058 if (err != 0) {
1059 return err;
1060 }
1061
1062 if (uuid_is_null(clr.cl_nx_uuid)) {
1063 return EINVAL;
1064 } else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
1065 clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
1066 }
1067
1068 /*
1069 * If the caller specified a buffer, copy out the Channel UUIDs to
1070 * caller gracefully. We only copy out the number of UUIDs which
1071 * caller has asked for, but we always tell caller how big the
1072 * buffer really needs to be.
1073 */
1074 tmp_ptr = clr.cl_ch_uuids;
1075 if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
1076 uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
1077 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
1078 if (uuids == NULL) {
1079 return ENOBUFS;
1080 }
1081 }
1082
1083 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
1084 PRIV_SKYWALK_OBSERVE_ALL) == 0);
1085
1086 SK_LOCK();
1087 uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
1088 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1089 if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
1090 /*
1091 * Return only entries that are visible to the caller,
1092 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
1093 */
1094 nx = NULL;
1095 }
1096 if (nx != NULL) {
1097 /*
1098 * Count number of Channels. If buffer space exists
1099 * and remains, copy out the Channel UUIDs.
1100 */
1101 nuuids = clr.cl_num_ch_uuids;
1102 puuid = uuids;
1103
1104 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
1105 ++ncuuids;
1106 if (uuids != NULL && nuuids > 0) {
1107 uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
1108 --nuuids;
1109 ++puuid;
1110 }
1111 }
1112 } else {
1113 err = ENOENT;
1114 }
1115 SK_UNLOCK();
1116
1117 if (uuids != NULL) {
1118 if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
1119 uintptr_t cnt_uuid;
1120
1121 /* Note: Pointer arithmetic */
1122 cnt_uuid = (uintptr_t)(puuid - uuids);
1123 ASSERT(cnt_uuid > 0);
1124
1125 if (sopt->sopt_p != kernproc) {
1126 err = copyout(uuids, tmp_ptr,
1127 cnt_uuid * sizeof(uuid_t));
1128 } else {
1129 caddr_t tmp;
1130 tmp = __unsafe_forge_bidi_indexable(caddr_t,
1131 CAST_DOWN(caddr_t, tmp_ptr),
1132 cnt_uuid * sizeof(uuid_t));
1133 bcopy(uuids, tmp, cnt_uuid * sizeof(uuid_t));
1134 }
1135 }
1136 sk_free_data(uuids, uuids_sz);
1137 uuids = NULL;
1138 }
1139
1140 if (err == 0) {
1141 clr.cl_num_ch_uuids = ncuuids;
1142 err = sooptcopyout(sopt, &clr, sizeof(clr));
1143 }
1144
1145 return err;
1146 }
1147
1148 static void
nxctl_init(struct nxctl * nxctl,struct proc * p,struct fileproc * fp)1149 nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
1150 {
1151 uuid_t p_uuid;
1152
1153 bzero(nxctl, sizeof(*nxctl));
1154
1155 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
1156
1157 lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
1158 uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
1159 nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
1160 nxctl->nxctl_cred = kauth_cred_proc_ref(p);
1161 nxctl->nxctl_fp = fp;
1162 if (nxctl == &_kernnxctl) {
1163 ASSERT(p == kernproc);
1164 nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
1165 }
1166 if (nxctl == &_usernxctl) {
1167 ASSERT(p == kernproc);
1168 nxctl->nxctl_cred = NULL;
1169 }
1170 if (fp == NULL) {
1171 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
1172 }
1173 }
1174
1175 static struct nxctl *
nxctl_alloc(struct proc * p,struct fileproc * fp,zalloc_flags_t how)1176 nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
1177 {
1178 struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
1179
1180 if (nxctl != NULL) {
1181 nxctl_init(nxctl, p, fp);
1182 }
1183 return nxctl;
1184 }
1185
1186 static void
nxctl_free(struct nxctl * nxctl)1187 nxctl_free(struct nxctl *nxctl)
1188 {
1189 ASSERT(nxctl->nxctl_refcnt == 0);
1190 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
1191 kauth_cred_unref(&nxctl->nxctl_cred);
1192 lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
1193 SK_D("nxctl 0x%llx FREE", SK_KVA(nxctl));
1194 if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
1195 zfree(nxctl_zone, nxctl);
1196 }
1197 }
1198
1199 static void
nxctl_retain_locked(struct nxctl * nxctl)1200 nxctl_retain_locked(struct nxctl *nxctl)
1201 {
1202 SK_LOCK_ASSERT_HELD();
1203
1204 nxctl->nxctl_refcnt++;
1205 ASSERT(nxctl->nxctl_refcnt != 0);
1206 }
1207
1208 void
nxctl_retain(struct nxctl * nxctl)1209 nxctl_retain(struct nxctl *nxctl)
1210 {
1211 SK_LOCK();
1212 nxctl_retain_locked(nxctl);
1213 SK_UNLOCK();
1214 }
1215
1216 static int
nxctl_release_locked(struct nxctl * nxctl)1217 nxctl_release_locked(struct nxctl *nxctl)
1218 {
1219 int oldref = nxctl->nxctl_refcnt;
1220
1221 SK_LOCK_ASSERT_HELD();
1222
1223 ASSERT(nxctl->nxctl_refcnt != 0);
1224 if (--nxctl->nxctl_refcnt == 0) {
1225 nxctl_free(nxctl);
1226 }
1227
1228 return oldref == 1;
1229 }
1230
1231 int
nxctl_release(struct nxctl * nxctl)1232 nxctl_release(struct nxctl *nxctl)
1233 {
1234 int lastref;
1235
1236 SK_LOCK();
1237 lastref = nxctl_release_locked(nxctl);
1238 SK_UNLOCK();
1239
1240 return lastref;
1241 }
1242
1243 /* XXX
1244 * -fbounds-safety: Why is this taking a void *? All callers are passing nxctl.
1245 * How come there's no nxctl_ctor?
1246 */
1247 void
nxctl_dtor(struct nxctl * arg)1248 nxctl_dtor(struct nxctl *arg)
1249 {
1250 struct nxctl *nxctl = arg;
1251
1252 nxctl_close(nxctl);
1253 SK_LOCK();
1254 (void) nxctl_release_locked(nxctl);
1255 SK_UNLOCK();
1256 }
1257
1258 int
nxprov_advise_connect(struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1259 nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
1260 struct proc *p)
1261 {
1262 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1263 int err = 0;
1264
1265 ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
1266 ASSERT(ch->ch_ctx == NULL);
1267
1268 SK_LOCK_ASSERT_HELD();
1269 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1270
1271 /* monitor channels aren't externally visible/usable, so ignore */
1272 if ((ch->ch_info->cinfo_ch_mode & CHMODE_MONITOR) ||
1273 (ch->ch_flags & CHANF_EXT_SKIP) ||
1274 (nxprov->nxprov_ext.nxpi_pre_connect == NULL ||
1275 nxprov->nxprov_ext.nxpi_connected == NULL)) {
1276 return 0;
1277 }
1278
1279 ch_retain_locked(ch);
1280 lck_mtx_unlock(&ch->ch_lock);
1281 SK_UNLOCK();
1282 lck_mtx_lock(&ch->ch_lock);
1283
1284 err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
1285 ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
1286 if (err != 0) {
1287 SK_D("ch 0x%llx flags %b nx 0x%llx pre_connect "
1288 "error %d", SK_KVA(ch), ch->ch_flags,
1289 CHANF_BITS, SK_KVA(nx), err);
1290 ch->ch_ctx = NULL;
1291 goto done;
1292 }
1293 /*
1294 * Upon ring/slot init failure, this is cleared
1295 * by nxprov_advise_disconnect() below.
1296 */
1297 os_atomic_or(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
1298 if (NXPROV_LLINK(nxprov)) {
1299 err = nx_netif_llink_ext_init_default_queues(nx);
1300 } else {
1301 err = nx_init_rings(nx, ch);
1302 }
1303 if (err != 0) {
1304 goto done;
1305 }
1306 ASSERT(err == 0);
1307 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
1308 CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
1309
1310 err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
1311 if (err != 0) {
1312 SK_D("ch 0x%llx flags %b nx 0x%llx connected error %d",
1313 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx), err);
1314 goto done;
1315 }
1316 os_atomic_or(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
1317 SK_D("ch 0x%llx flags %b nx 0x%llx connected",
1318 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1319
1320
1321 done:
1322 lck_mtx_unlock(&ch->ch_lock);
1323 SK_LOCK();
1324 lck_mtx_lock(&ch->ch_lock);
1325 if ((err != 0) &&
1326 (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
1327 nxprov_advise_disconnect(nx, ch);
1328 }
1329 /* caller is expected to hold one, in addition to ourselves */
1330 VERIFY(ch->ch_refcnt >= 2);
1331 ch_release_locked(ch);
1332
1333 return err;
1334 }
1335
1336 void
nxprov_advise_disconnect(struct kern_nexus * nx,struct kern_channel * ch)1337 nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
1338 {
1339 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1340
1341 SK_LOCK_ASSERT_HELD();
1342 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1343
1344 /* check as we might be called in the error handling path */
1345 if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
1346 ch_retain_locked(ch);
1347 lck_mtx_unlock(&ch->ch_lock);
1348 SK_UNLOCK();
1349 lck_mtx_lock(&ch->ch_lock);
1350
1351 ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
1352 if (ch->ch_flags & CHANF_EXT_CONNECTED) {
1353 nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
1354 os_atomic_andnot(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
1355 }
1356
1357 /*
1358 * Inform the external domain provider that the rings
1359 * and slots for this channel are no longer valid.
1360 */
1361 if (NXPROV_LLINK(nxprov)) {
1362 nx_netif_llink_ext_fini_default_queues(nx);
1363 } else {
1364 nx_fini_rings(nx, ch);
1365 }
1366
1367 ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
1368 nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
1369 os_atomic_andnot(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
1370
1371 SK_D("ch 0x%llx flags %b nx 0x%llx disconnected",
1372 SK_KVA(ch), ch->ch_flags, CHANF_BITS, SK_KVA(nx));
1373
1374 /* We're done with this channel */
1375 ch->ch_ctx = NULL;
1376
1377 lck_mtx_unlock(&ch->ch_lock);
1378 SK_LOCK();
1379 lck_mtx_lock(&ch->ch_lock);
1380 /* caller is expected to hold one, in addition to ourselves */
1381 VERIFY(ch->ch_refcnt >= 2);
1382 ch_release_locked(ch);
1383 }
1384 ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
1385 ASSERT(ch->ch_ctx == NULL);
1386 }
1387
1388 static struct kern_nexus_provider *
nxprov_create_common(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1389 nxprov_create_common(struct nxctl *nxctl,
1390 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1391 const struct kern_nexus_provider_init *init, int *err)
1392 {
1393 struct skmem_region_params srp[SKMEM_REGIONS];
1394 struct kern_nexus_provider *nxprov = NULL;
1395 struct nxprov_params nxp;
1396 uint32_t override = 0;
1397 uint32_t pp_region_config_flags;
1398 int i;
1399
1400 _CASSERT(sizeof(*init) == sizeof(nxprov->nxprov_ext));
1401 _CASSERT(sizeof(*init) >=
1402 sizeof(struct kern_nexus_netif_provider_init));
1403
1404 SK_LOCK_ASSERT_HELD();
1405 ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
1406
1407 pp_region_config_flags = PP_REGION_CONFIG_MD_MAGAZINE_ENABLE |
1408 PP_REGION_CONFIG_BUF_IODIR_BIDIR;
1409 /*
1410 * Special handling for external nexus providers; similar
1411 * logic to what's done in kern_pbufpool_create().
1412 */
1413 if (init != NULL) {
1414 if (init->nxpi_flags & NXPIF_MONOLITHIC) {
1415 pp_region_config_flags |=
1416 PP_REGION_CONFIG_BUF_MONOLITHIC;
1417 }
1418
1419 if (init->nxpi_flags & NXPIF_INHIBIT_CACHE) {
1420 pp_region_config_flags |=
1421 PP_REGION_CONFIG_BUF_NOCACHE;
1422 }
1423 }
1424
1425 /*
1426 * For network devices, set the packet metadata memory as persistent
1427 * so that it is wired at segment creation. This allows us to access
1428 * it with preemption disabled, as well as for rdar://problem/46511741.
1429 */
1430 if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
1431 pp_region_config_flags |= PP_REGION_CONFIG_MD_PERSISTENT;
1432 }
1433
1434 /* process and validate provider parameters */
1435 if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
1436 &nxp, srp, override, pp_region_config_flags)) != 0) {
1437 goto done;
1438 }
1439
1440 nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
1441 ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
1442
1443 STAILQ_INIT(&nxprov->nxprov_nx_head);
1444 STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
1445 nxprov->nxprov_flags |= NXPROVF_ATTACHED;
1446 nxprov->nxprov_ctl = nxctl;
1447 uuid_generate_random(nxprov->nxprov_uuid);
1448 bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
1449
1450 if (init != NULL) {
1451 if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
1452 ASSERT(NXPROV_LLINK(nxprov));
1453 bcopy(init, &nxprov->nxprov_netif_ext,
1454 sizeof(nxprov->nxprov_netif_ext));
1455 } else {
1456 ASSERT(!NXPROV_LLINK(nxprov));
1457 ASSERT(init->nxpi_version ==
1458 KERN_NEXUS_PROVIDER_CURRENT_VERSION);
1459 bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
1460 }
1461 nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
1462 }
1463
1464 /* store validated region parameters to the provider */
1465 for (i = 0; i < SKMEM_REGIONS; i++) {
1466 nxprov->nxprov_region_params[i] = srp[i];
1467 }
1468
1469 if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
1470 uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
1471
1472 if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
1473 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1474 }
1475 } else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
1476 NEXUS_TYPE_NET_IF) {
1477 /*
1478 * Treat non-netif built-in nexus providers as those
1479 * meant for inter-process communications, i.e. there
1480 * is no actual networking hardware involved.
1481 */
1482 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1483 }
1484
1485 nxprov_retain_locked(nxprov); /* one for being in the list */
1486 nxprov_retain_locked(nxprov); /* one for the caller */
1487
1488 #if SK_LOG
1489 uuid_string_t uuidstr;
1490 SK_D("nxprov 0x%llx UUID %s", SK_KVA(nxprov),
1491 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
1492 #endif /* SK_LOG */
1493
1494 done:
1495 return nxprov;
1496 }
1497
1498 struct kern_nexus_provider *
nxprov_create(struct proc * p,struct nxctl * nxctl,struct nxprov_reg * reg,int * err)1499 nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
1500 int *err)
1501 {
1502 struct nxprov_params *nxp = ®->nxpreg_params;
1503 struct kern_nexus_domain_provider *nxdom_prov = NULL;
1504 struct kern_nexus_provider *nxprov = NULL;
1505
1506 NXCTL_LOCK_ASSERT_HELD(nxctl);
1507
1508 ASSERT(nxctl->nxctl_cred != proc_ucred_unsafe(kernproc));
1509 *err = 0;
1510
1511 switch (nxp->nxp_type) {
1512 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1513 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1514 PRIV_SKYWALK_REGISTER_USER_PIPE);
1515 break;
1516
1517 case NEXUS_TYPE_FLOW_SWITCH: /* allowed for userland */
1518 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1519 PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
1520 break;
1521
1522 case NEXUS_TYPE_NET_IF: /* allowed for userland */
1523 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1524 PRIV_SKYWALK_REGISTER_NET_IF);
1525 break;
1526
1527 case NEXUS_TYPE_KERNEL_PIPE: /* only for kernel */
1528 case NEXUS_TYPE_MONITOR: /* invalid */
1529 default:
1530 *err = EINVAL;
1531 goto done;
1532 }
1533
1534 if (*err != 0) {
1535 goto done;
1536 }
1537
1538 ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
1539 if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
1540 *err = ENXIO;
1541 goto done;
1542 }
1543
1544 #if CONFIG_NEXUS_NETIF
1545 /* make sure netif_compat is the default here */
1546 ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
1547 strbufcmp(nxdom_prov->nxdom_prov_name, sizeof(nxdom_prov->nxdom_prov_name),
1548 NEXUS_PROVIDER_NET_IF_COMPAT, sizeof(NEXUS_PROVIDER_NET_IF_COMPAT)) == 0);
1549 #endif /* CONFIG_NEXUS_NETIF */
1550
1551 SK_LOCK();
1552 /* callee holds a reference for our caller upon success */
1553 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
1554 SK_UNLOCK();
1555 done:
1556 return nxprov;
1557 }
1558
1559 struct kern_nexus_provider *
nxprov_create_kern(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1560 nxprov_create_kern(struct nxctl *nxctl,
1561 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1562 const struct kern_nexus_provider_init *init, int *err)
1563 {
1564 struct nxprov_params *nxp = ®->nxpreg_params;
1565 struct kern_nexus_provider *nxprov = NULL;
1566
1567 NXCTL_LOCK_ASSERT_HELD(nxctl);
1568 SK_LOCK_ASSERT_HELD();
1569
1570 ASSERT(nxctl->nxctl_cred == proc_ucred_unsafe(kernproc));
1571 ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
1572 ASSERT(init == NULL ||
1573 init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
1574 init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
1575
1576 *err = 0;
1577
1578 switch (nxp->nxp_type) {
1579 case NEXUS_TYPE_NET_IF:
1580 break;
1581 case NEXUS_TYPE_KERNEL_PIPE:
1582 if (init == NULL) {
1583 *err = EINVAL;
1584 goto done;
1585 }
1586 break;
1587 case NEXUS_TYPE_FLOW_SWITCH:
1588 if (init != NULL) {
1589 *err = EINVAL;
1590 goto done;
1591 }
1592 break;
1593
1594 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1595 case NEXUS_TYPE_MONITOR: /* invalid */
1596 default:
1597 *err = EINVAL;
1598 goto done;
1599 }
1600
1601 /* callee holds a reference for our caller upon success */
1602 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
1603
1604 done:
1605 return nxprov;
1606 }
1607
1608 int
nxprov_destroy(struct nxctl * nxctl,const uuid_t nxprov_uuid)1609 nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
1610 {
1611 struct kern_nexus_provider *nxprov = NULL;
1612 int err = 0;
1613
1614 NXCTL_LOCK_ASSERT_HELD(nxctl);
1615
1616 SK_LOCK();
1617
1618 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1619 if (nxctl == nxprov->nxprov_ctl &&
1620 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1621 nxprov_retain_locked(nxprov);
1622 break;
1623 }
1624 }
1625
1626 if (nxprov == NULL) {
1627 err = ENOENT;
1628 } else {
1629 err = nxprov_close(nxprov, TRUE);
1630 }
1631
1632 if (nxprov != NULL) {
1633 (void) nxprov_release_locked(nxprov);
1634 }
1635
1636 SK_UNLOCK();
1637
1638 return err;
1639 }
1640
1641 int
nxprov_close(struct kern_nexus_provider * nxprov,boolean_t locked)1642 nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
1643 {
1644 int err = 0;
1645
1646 if (!locked) {
1647 SK_LOCK();
1648 }
1649
1650 SK_LOCK_ASSERT_HELD();
1651
1652 #if SK_LOG
1653 uuid_string_t uuidstr;
1654 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1655 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1656 nxprov->nxprov_flags, NXPROVF_BITS);
1657 #endif /* SK_LOG */
1658
1659 if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
1660 err = EALREADY;
1661 } else {
1662 struct kern_nexus *nx, *tnx;
1663
1664 nxprov->nxprov_ctl = NULL;
1665
1666 STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
1667 nx_prov_link, tnx) {
1668 nx_retain_locked(nx);
1669 (void) nx_close(nx, TRUE);
1670 (void) nx_release_locked(nx);
1671 }
1672
1673 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
1674 /* no nexus created on this, so detach now */
1675 nxprov_detach(nxprov, TRUE);
1676 } else {
1677 /* detach when last nexus is destroyed */
1678 ASSERT(nxprov->nxprov_refcnt > 1);
1679 nxprov->nxprov_flags |= NXPROVF_CLOSED;
1680 }
1681 }
1682
1683 if (!locked) {
1684 SK_UNLOCK();
1685 }
1686
1687 return err;
1688 }
1689
1690 static void
nxprov_detach(struct kern_nexus_provider * nxprov,boolean_t locked)1691 nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
1692 {
1693 if (!locked) {
1694 SK_LOCK();
1695 }
1696
1697 SK_LOCK_ASSERT_HELD();
1698
1699 #if SK_LOG
1700 uuid_string_t uuidstr;
1701 SK_D("nxprov 0x%llx UUID %s flags 0x%b", SK_KVA(nxprov),
1702 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1703 nxprov->nxprov_flags, NXPROVF_BITS);
1704 #endif /* SK_LOG */
1705
1706 ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
1707 STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
1708 nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
1709
1710 /* caller must hold an extra ref */
1711 ASSERT(nxprov->nxprov_refcnt > 1);
1712 (void) nxprov_release_locked(nxprov);
1713
1714 if (!locked) {
1715 SK_UNLOCK();
1716 }
1717 }
1718
1719 static struct kern_nexus_provider *
nxprov_alloc(struct kern_nexus_domain_provider * nxdom_prov,zalloc_flags_t how)1720 nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
1721 {
1722 struct kern_nexus_provider *nxprov;
1723 struct nxprov_params *nxp;
1724
1725 ASSERT(nxdom_prov != NULL);
1726
1727 nxp = nxprov_params_alloc(how);
1728 if (nxp == NULL) {
1729 SK_ERR("Failed to allocate nxprov_params");
1730 return NULL;
1731 }
1732
1733 nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
1734 if (nxprov == NULL) {
1735 SK_ERR("Failed to allocate nxprov");
1736 nxprov_params_free(nxp);
1737 return NULL;
1738 }
1739
1740 nxprov->nxprov_dom_prov = nxdom_prov;
1741 nxprov->nxprov_params = nxp;
1742 /* hold a reference for nxprov */
1743 nxdom_prov_retain_locked(nxdom_prov);
1744
1745 return nxprov;
1746 }
1747
1748 static void
nxprov_free(struct kern_nexus_provider * nxprov)1749 nxprov_free(struct kern_nexus_provider *nxprov)
1750 {
1751 struct kern_nexus_domain_provider *nxdom_prov =
1752 nxprov->nxprov_dom_prov;
1753
1754 SK_LOCK_ASSERT_HELD();
1755
1756 ASSERT(nxdom_prov != NULL);
1757 (void) nxdom_prov_release_locked(nxdom_prov);
1758 nxprov->nxprov_dom_prov = NULL;
1759 ASSERT(nxprov->nxprov_params != NULL);
1760 nxprov_params_free(nxprov->nxprov_params);
1761 nxprov->nxprov_params = NULL;
1762 ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
1763 SK_DF(SK_VERB_MEM, "nxprov 0x%llx FREE", SK_KVA(nxprov));
1764 zfree(nxprov_zone, nxprov);
1765 }
1766
1767 static void
nxprov_retain_locked(struct kern_nexus_provider * nxprov)1768 nxprov_retain_locked(struct kern_nexus_provider *nxprov)
1769 {
1770 SK_LOCK_ASSERT_HELD();
1771
1772 nxprov->nxprov_refcnt++;
1773 ASSERT(nxprov->nxprov_refcnt != 0);
1774 }
1775
1776 void
nxprov_retain(struct kern_nexus_provider * nxprov)1777 nxprov_retain(struct kern_nexus_provider *nxprov)
1778 {
1779 SK_LOCK();
1780 nxprov_retain_locked(nxprov);
1781 SK_UNLOCK();
1782 }
1783
1784 static int
nxprov_release_locked(struct kern_nexus_provider * nxprov)1785 nxprov_release_locked(struct kern_nexus_provider *nxprov)
1786 {
1787 int oldref = nxprov->nxprov_refcnt;
1788
1789 SK_LOCK_ASSERT_HELD();
1790
1791 ASSERT(nxprov->nxprov_refcnt != 0);
1792 if (--nxprov->nxprov_refcnt == 0) {
1793 nxprov_free(nxprov);
1794 }
1795
1796 return oldref == 1;
1797 }
1798
1799 int
nxprov_release(struct kern_nexus_provider * nxprov)1800 nxprov_release(struct kern_nexus_provider *nxprov)
1801 {
1802 int lastref;
1803
1804 SK_LOCK();
1805 lastref = nxprov_release_locked(nxprov);
1806 SK_UNLOCK();
1807
1808 return lastref;
1809 }
1810
1811 struct nxprov_params *
nxprov_params_alloc(zalloc_flags_t how)1812 nxprov_params_alloc(zalloc_flags_t how)
1813 {
1814 return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
1815 }
1816
1817 void
nxprov_params_free(struct nxprov_params * nxp)1818 nxprov_params_free(struct nxprov_params *nxp)
1819 {
1820 SK_DF(SK_VERB_MEM, "nxp 0x%llx FREE", SK_KVA(nxp));
1821 zfree(nxprov_params_zone, nxp);
1822 }
1823
1824 static int
nx_check_pp(struct kern_nexus_provider * nxprov,struct kern_pbufpool * pp)1825 nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
1826 {
1827 struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
1828
1829 if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
1830 SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
1831 return ENOTSUP;
1832 }
1833
1834 /*
1835 * Require that the nexus domain metadata type and the
1836 * metadata type of the caller-provided pbufpool match.
1837 */
1838 if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
1839 pp->pp_md_type ||
1840 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
1841 pp->pp_md_subtype) {
1842 SK_ERR("Mismatch in metadata type/subtype "
1843 "(%u/%u != %u/%u)", pp->pp_md_type,
1844 nxdom_prov->nxdom_prov_dom->nxdom_md_type,
1845 pp->pp_md_subtype,
1846 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
1847 return EINVAL;
1848 }
1849
1850 /*
1851 * Require that the nexus provider memory configuration
1852 * has the same impedance as the caller-provided one.
1853 * Both need to be lacking or present; if one of them
1854 * is set and the other isn't, then we bail.
1855 */
1856 if (!!(PP_BUF_REGION_DEF(pp)->skr_mode & SKR_MODE_MONOLITHIC) ^
1857 !!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
1858 SK_ERR("Memory config mismatch: monolithic mode");
1859 return EINVAL;
1860 }
1861
1862 return 0;
1863 }
1864
1865 struct kern_nexus *
nx_create(struct nxctl * nxctl,const uuid_t nxprov_uuid,const nexus_type_t dom_type,const void * nx_ctx,nexus_ctx_release_fn_t nx_ctx_release,struct kern_pbufpool * tx_pp,struct kern_pbufpool * rx_pp,int * err)1866 nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
1867 const nexus_type_t dom_type, const void *nx_ctx,
1868 nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
1869 struct kern_pbufpool *rx_pp, int *err)
1870 {
1871 struct kern_nexus_domain_provider *nxdom_prov;
1872 struct kern_nexus_provider *nxprov = NULL;
1873 struct kern_nexus *nx = NULL;
1874 #if SK_LOG
1875 uuid_string_t uuidstr;
1876 #endif /* SK_LOG */
1877
1878 NXCTL_LOCK_ASSERT_HELD(nxctl);
1879
1880 ASSERT(dom_type < NEXUS_TYPE_MAX);
1881 ASSERT(!uuid_is_null(nxprov_uuid));
1882 *err = 0;
1883
1884 SK_LOCK();
1885
1886 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1887 if (nxctl == nxprov->nxprov_ctl &&
1888 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1889 break;
1890 }
1891 }
1892
1893 if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
1894 SK_ERR("Provider not found or has been closed");
1895 *err = ENOENT;
1896 goto done;
1897 }
1898
1899 nxdom_prov = nxprov->nxprov_dom_prov;
1900 if (dom_type != NEXUS_TYPE_UNDEFINED &&
1901 (nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
1902 SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
1903 dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
1904 nxdom_prov = NULL;
1905 nxprov = NULL;
1906 *err = ENODEV;
1907 goto done;
1908 }
1909
1910 if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
1911 (!tx_pp || !rx_pp)) {
1912 #if SK_LOG
1913 SK_ERR("TX/RX packet pool is required for netif logical link "
1914 "nexus provider UUID: %s",
1915 sk_uuid_unparse(nxprov_uuid, uuidstr));
1916 #endif /* SK_LOG */
1917 nxdom_prov = NULL;
1918 nxprov = NULL;
1919 *err = EINVAL;
1920 goto done;
1921 }
1922
1923 if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
1924 (rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
1925 goto done;
1926 }
1927
1928 nx = nx_alloc(Z_WAITOK);
1929
1930 STAILQ_INIT(&nx->nx_ch_head);
1931 STAILQ_INIT(&nx->nx_ch_nonxref_head);
1932 lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
1933 &nexus_lock_attr);
1934 STAILQ_INIT(&nx->nx_ch_if_adv_head);
1935 uuid_generate_random(nx->nx_uuid);
1936 nx->nx_prov = nxprov;
1937 nx->nx_ctx = __DECONST(void *, nx_ctx);
1938 nx->nx_ctx_release = nx_ctx_release;
1939 nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
1940
1941 if (tx_pp != NULL) {
1942 nx->nx_tx_pp = tx_pp;
1943 pp_retain(tx_pp); /* released by nx_free */
1944 }
1945
1946 if (rx_pp != NULL) {
1947 nx->nx_rx_pp = rx_pp;
1948 pp_retain(rx_pp); /* released by nx_free */
1949 }
1950
1951 /* this nexus is alive; tell the nexus constructor to set it up */
1952 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
1953 *err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
1954 if (*err != 0) {
1955 nx->nx_prov = NULL;
1956 goto done;
1957 }
1958 }
1959
1960 nxprov_retain_locked(nxprov); /* hold a ref on the nexus reg */
1961
1962 STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
1963 nxprov->nxprov_nx_count++;
1964 RB_INSERT(kern_nexus_tree, &nx_head, nx);
1965 os_atomic_or(&nx->nx_flags, NXF_ATTACHED, relaxed);
1966
1967 nx_retain_locked(nx); /* one for the provider list */
1968 nx_retain_locked(nx); /* one for the global list */
1969 nx_retain_locked(nx); /* one for the caller */
1970
1971 #if SK_LOG
1972 SK_D("nexus 0x%llx (%s:%s) UUID %s", SK_KVA(nx),
1973 nxdom_prov->nxdom_prov_dom->nxdom_name,
1974 nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
1975 #endif /* SK_LOG */
1976 done:
1977 SK_UNLOCK();
1978
1979 if (*err != 0) {
1980 if (nx != NULL) {
1981 nx_free(nx);
1982 nx = NULL;
1983 }
1984 }
1985 return nx;
1986 }
1987
1988 int
nx_destroy(struct nxctl * nxctl,const uuid_t nx_uuid)1989 nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
1990 {
1991 struct kern_nexus *nx = NULL;
1992 struct kern_nexus find;
1993 int err = 0;
1994
1995 NXCTL_LOCK_ASSERT_HELD(nxctl);
1996
1997 SK_LOCK();
1998
1999 uuid_copy(find.nx_uuid, nx_uuid);
2000 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
2001 if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
2002 nx = NULL;
2003 }
2004
2005 if (nx != NULL) {
2006 nx_retain_locked(nx);
2007 }
2008
2009 if (nx == NULL) {
2010 err = ENOENT;
2011 } else {
2012 /* prevent any opens */
2013 os_atomic_or(&nx->nx_flags, NXF_INVALIDATED, relaxed);
2014 err = nx_close(nx, TRUE);
2015 (void) nx_release_locked(nx);
2016 }
2017
2018 SK_UNLOCK();
2019
2020 return err;
2021 }
2022
2023 static inline int
nx_cmp(const struct kern_nexus * a,const struct kern_nexus * b)2024 nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
2025 {
2026 return uuid_compare(a->nx_uuid, b->nx_uuid);
2027 }
2028
2029 struct kern_nexus *
nx_find(const uuid_t nx_uuid,boolean_t locked)2030 nx_find(const uuid_t nx_uuid, boolean_t locked)
2031 {
2032 struct kern_nexus *nx = NULL;
2033 struct kern_nexus find;
2034
2035 if (!locked) {
2036 SK_LOCK();
2037 }
2038
2039 SK_LOCK_ASSERT_HELD();
2040
2041 uuid_copy(find.nx_uuid, nx_uuid);
2042 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
2043 if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
2044 nx = NULL;
2045 }
2046
2047 /* return reference to caller */
2048 if (nx != NULL) {
2049 nx_retain_locked(nx);
2050 }
2051
2052 if (!locked) {
2053 SK_UNLOCK();
2054 }
2055
2056 return nx;
2057 }
2058
2059 int
nx_close(struct kern_nexus * nx,boolean_t locked)2060 nx_close(struct kern_nexus *nx, boolean_t locked)
2061 {
2062 int err = 0;
2063
2064 if (!locked) {
2065 SK_LOCK();
2066 }
2067
2068 SK_LOCK_ASSERT_HELD();
2069
2070
2071 if (nx->nx_flags & NXF_CLOSED) {
2072 err = EALREADY;
2073 } else {
2074 #if SK_LOG
2075 uuid_string_t uuidstr;
2076 SK_D("nexus 0x%llx (%s:%s) UUID %s flags 0x%b", SK_KVA(nx),
2077 NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
2078 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags,
2079 NXF_BITS);
2080 #endif /* SK_LOG */
2081
2082 if (STAILQ_EMPTY(&nx->nx_ch_head)) {
2083 /* no regular channels open to it, so detach now */
2084 nx_detach(nx);
2085 } else {
2086 /* detach when the last channel closes */
2087 ASSERT(nx->nx_refcnt > 3);
2088 os_atomic_or(&nx->nx_flags, NXF_CLOSED, relaxed);
2089 }
2090 }
2091
2092 if (!locked) {
2093 SK_UNLOCK();
2094 }
2095
2096 return err;
2097 }
2098
2099 void
nx_stop(struct kern_nexus * nx)2100 nx_stop(struct kern_nexus *nx)
2101 {
2102 struct kern_nexus_provider *nxprov = nx->nx_prov;
2103
2104 SK_LOCK_ASSERT_HELD();
2105
2106 /* send a stop message */
2107 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
2108 nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
2109 }
2110 }
2111
2112 void
nx_detach(struct kern_nexus * nx)2113 nx_detach(struct kern_nexus *nx)
2114 {
2115 struct kern_nexus_provider *nxprov = nx->nx_prov;
2116
2117 SK_LOCK_ASSERT_HELD();
2118
2119 #if SK_LOG
2120 uuid_string_t uuidstr;
2121 SK_D("nexus 0x%llx UUID %s flags 0x%b", SK_KVA(nx),
2122 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags, NXF_BITS);
2123 #endif /* SK_LOG */
2124
2125 /* Caller must hold extra refs, on top of the two in reg/global lists */
2126 ASSERT(nx->nx_refcnt >= 3);
2127 ASSERT(nx->nx_flags & NXF_ATTACHED);
2128
2129 /* this nexus is done; let the nexus destructor do final cleanups */
2130 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
2131 nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
2132 }
2133
2134 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2135 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2136
2137 STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
2138 nxprov->nxprov_nx_count--;
2139 RB_REMOVE(kern_nexus_tree, &nx_head, nx);
2140 os_atomic_andnot(&nx->nx_flags, NXF_ATTACHED, relaxed);
2141 nx->nx_prov = NULL;
2142 if (nx->nx_ctx_release != NULL) {
2143 nx->nx_ctx_release(nx->nx_ctx);
2144 }
2145 nx->nx_ctx = NULL;
2146
2147 (void) nx_release_locked(nx); /* one for the reg list */
2148 (void) nx_release_locked(nx); /* one for the global list */
2149
2150 /*
2151 * If this was the last nexus and the provider has been closed,
2152 * detach the provider and and finish up the postponed job.
2153 */
2154 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
2155 (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
2156 nxprov_detach(nxprov, TRUE);
2157 }
2158 (void) nxprov_release_locked(nxprov);
2159 }
2160
2161 int
nx_advisory_alloc(struct kern_nexus * nx,const char * name,struct skmem_region_params * srp_nexusadv,nexus_advisory_type_t type)2162 nx_advisory_alloc(struct kern_nexus *nx, const char *name,
2163 struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
2164 {
2165 struct __kern_nexus_adv_metadata *adv_md;
2166 uint32_t msize = 0;
2167 /* -fbounds-safety: why do we need maddr? */
2168 void *__sized_by(msize) maddr = NULL;
2169
2170 _CASSERT(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
2171 _CASSERT((sizeof(struct sk_nexusadv) +
2172 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2173 _CASSERT((sizeof(struct netif_nexus_advisory) +
2174 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2175 ASSERT(nx->nx_adv.nxv_reg == NULL);
2176 ASSERT(nx->nx_adv.nxv_adv == NULL);
2177 ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
2178 type == NEXUS_ADVISORY_TYPE_NETIF);
2179
2180 if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
2181 NULL, NULL, NULL)) == NULL) {
2182 return ENOMEM;
2183 }
2184
2185 nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, &maddr,
2186 NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC),
2187 nx->nx_adv.nxv_reg->skr_c_obj_size, &msize);
2188 nx->nx_adv.nxv_adv_size = nx->nx_adv.nxv_reg->skr_c_obj_size;
2189 adv_md = nx->nx_adv.nxv_adv;
2190 adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
2191 adv_md->knam_type = type;
2192 adv_md->__reserved = 0;
2193 nx->nx_adv.nxv_adv_type = type;
2194 nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
2195 if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
2196 nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
2197 NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
2198 } else {
2199 nx->nx_adv.netif_nxv_adv->nna_version =
2200 NX_NETIF_ADVISORY_CURRENT_VERSION;
2201 }
2202 return 0;
2203 }
2204
2205 void
nx_advisory_free(struct kern_nexus * nx)2206 nx_advisory_free(struct kern_nexus *nx)
2207 {
2208 if (nx->nx_adv.nxv_reg != NULL) {
2209 ASSERT(nx->nx_adv.nxv_adv != NULL);
2210 skmem_region_free(nx->nx_adv.nxv_reg,
2211 nx->nx_adv.nxv_adv, NULL);
2212 nx->nx_adv.nxv_adv = NULL;
2213 nx->nx_adv.nxv_adv_size = 0;
2214 nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
2215 nx->nx_adv.flowswitch_nxv_adv = NULL;
2216 skmem_region_release(nx->nx_adv.nxv_reg);
2217 nx->nx_adv.nxv_reg = NULL;
2218 }
2219
2220 ASSERT(nx->nx_adv.nxv_reg == NULL);
2221 ASSERT(nx->nx_adv.nxv_adv == NULL);
2222 ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
2223 ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
2224 }
2225
2226 static struct kern_nexus *
nx_alloc(zalloc_flags_t how)2227 nx_alloc(zalloc_flags_t how)
2228 {
2229 SK_LOCK_ASSERT_HELD();
2230
2231 return zalloc_flags(nx_zone, how | Z_ZERO);
2232 }
2233
2234 static void
nx_free(struct kern_nexus * nx)2235 nx_free(struct kern_nexus *nx)
2236 {
2237 ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
2238 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2239 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2240
2241 nx_port_free_all(nx);
2242
2243 if (nx->nx_tx_pp != NULL) {
2244 pp_release(nx->nx_tx_pp);
2245 nx->nx_tx_pp = NULL;
2246 }
2247 if (nx->nx_rx_pp != NULL) {
2248 pp_release(nx->nx_rx_pp);
2249 nx->nx_rx_pp = NULL;
2250 }
2251
2252 ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
2253 lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
2254
2255 SK_DF(SK_VERB_MEM, "nexus 0x%llx FREE", SK_KVA(nx));
2256 zfree(nx_zone, nx);
2257 }
2258
2259 void
nx_retain_locked(struct kern_nexus * nx)2260 nx_retain_locked(struct kern_nexus *nx)
2261 {
2262 SK_LOCK_ASSERT_HELD();
2263
2264 nx->nx_refcnt++;
2265 VERIFY(nx->nx_refcnt > 0);
2266 }
2267
2268 void
nx_retain(struct kern_nexus * nx)2269 nx_retain(struct kern_nexus *nx)
2270 {
2271 SK_LOCK();
2272 nx_retain_locked(nx);
2273 SK_UNLOCK();
2274 }
2275
2276 int
nx_release_locked(struct kern_nexus * nx)2277 nx_release_locked(struct kern_nexus *nx)
2278 {
2279 int oldref = nx->nx_refcnt;
2280
2281 SK_LOCK_ASSERT_HELD();
2282
2283 VERIFY(nx->nx_refcnt > 0);
2284 if (--nx->nx_refcnt == 0) {
2285 nx_free(nx);
2286 }
2287
2288 return oldref == 1;
2289 }
2290
2291 int
nx_release(struct kern_nexus * nx)2292 nx_release(struct kern_nexus *nx)
2293 {
2294 int lastref;
2295
2296 SK_LOCK_ASSERT_NOTHELD();
2297
2298 SK_LOCK();
2299 lastref = nx_release_locked(nx);
2300 SK_UNLOCK();
2301
2302 return lastref;
2303 }
2304
2305 static int
nx_init_rings(struct kern_nexus * nx,struct kern_channel * ch)2306 nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
2307 {
2308 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2309 struct nexus_adapter *na = ch->ch_na;
2310 boolean_t undo = FALSE;
2311 int ksd_retains = 0;
2312 enum txrx t;
2313 int err = 0;
2314
2315 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
2316 CHANF_EXT_PRECONNECT);
2317
2318 if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
2319 return 0;
2320 }
2321
2322 for_rx_tx(t) {
2323 uint32_t i;
2324
2325 for (i = 0; i < na_get_nrings(na, t); i++) {
2326 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2327
2328 /* skip host rings */
2329 if (kring->ckr_flags & CKRF_HOST) {
2330 continue;
2331 }
2332
2333 if ((err = nxprov->nxprov_ext.nxpi_ring_init(
2334 nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
2335 &kring->ckr_ctx)) != 0) {
2336 SK_D("ch 0x%llx flags %b nx 0x%llx kr \"%s\" "
2337 "(0x%llx) krflags %b ring_init error %d",
2338 SK_KVA(ch), ch->ch_flags, CHANF_BITS,
2339 SK_KVA(nx), kring->ckr_name, SK_KVA(kring),
2340 kring->ckr_flags, CKRF_BITS, err);
2341 kring->ckr_ctx = NULL;
2342 undo = TRUE;
2343 break;
2344 }
2345 kring->ckr_flags |= CKRF_EXT_RING_INITED;
2346
2347 if ((err = nx_init_slots(nx, kring)) != 0) {
2348 undo = TRUE;
2349 break;
2350 }
2351
2352 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2353 ++ksd_retains;
2354 }
2355 }
2356 if (undo) {
2357 break;
2358 }
2359 }
2360
2361 /*
2362 * Note: retain KSD even in case of error, as we have set
2363 * CKRF_EXT_SLOTS_INITED flag for some of the rings
2364 * nx_fini_rings would take care of release based on it.
2365 */
2366 if (ksd_retains != 0) {
2367 /*
2368 * Mark the kernel slot descriptor region as busy; this
2369 * prevents it from being torn-down at channel defunct
2370 * time, as we need to invoke the slot_fini() callback
2371 * for each slot and we need the descriptors until then.
2372 */
2373 skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
2374 ksd_retains);
2375 }
2376
2377 if (err != 0) {
2378 ASSERT(undo);
2379 nx_fini_rings(nx, ch);
2380 }
2381
2382 return err;
2383 }
2384
2385 static void
nx_fini_rings(struct kern_nexus * nx,struct kern_channel * ch)2386 nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
2387 {
2388 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2389 struct nexus_adapter *na = ch->ch_na;
2390 int ksd_releases = 0;
2391 enum txrx t;
2392
2393 for_rx_tx(t) {
2394 uint32_t i;
2395
2396 for (i = 0; i < na_get_nrings(na, t); i++) {
2397 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2398
2399 if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
2400 continue;
2401 }
2402
2403 ASSERT(!(kring->ckr_flags & CKRF_HOST));
2404 ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
2405 nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
2406 kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
2407
2408 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2409 ++ksd_releases;
2410 }
2411
2412 /*
2413 * Undo the work done in nx_init_slots() and inform
2414 * the external domain provider, if applicable, that
2415 * the slots for this ring are no longer valid.
2416 */
2417 nx_fini_slots(nx, kring);
2418 kring->ckr_ctx = NULL;
2419 }
2420 }
2421
2422 if (ksd_releases != 0) {
2423 /*
2424 * Now that we've finished invoking the slot_fini()
2425 * callbacks, release the busy retain counts held
2426 * earlier in nx_init_rings(). This will allow the
2427 * kernel slot descriptor region to be torn down.
2428 */
2429 skmem_arena_nexus_sd_set_noidle(
2430 skmem_arena_nexus(na->na_arena), -ksd_releases);
2431 }
2432 }
2433
2434 static int
nx_init_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2435 nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2436 {
2437 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2438 struct __slot_desc *slot = kring->ckr_ksds;
2439 int err = 0;
2440 uint32_t i;
2441
2442 /*
2443 * If the slot init callback was not provided, or if the
2444 * kring was not created to hold any slot contexts, don't
2445 * go any further.
2446 */
2447 if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
2448 kring->ckr_slot_ctxs == NULL) {
2449 return 0;
2450 }
2451
2452 ASSERT(kring->ckr_slot_ctxs_set == 0);
2453 ASSERT(slot != NULL);
2454
2455 for (i = 0; i < kring->ckr_num_slots; i++) {
2456 struct kern_slot_prop *__single slot_ctx_prop = NULL;
2457 /* -fbounds-safety: slot_ctx is unsafe anyway (mach_vmaddr_t) */
2458 void *__single slot_ctx_arg = NULL;
2459
2460 ASSERT(&slot[i] <= kring->ckr_ksds_last);
2461 if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
2462 &slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
2463 SK_D("nx 0x%llx kr \"%s\" (0x%llx) krflags %b slot %u "
2464 "slot_init error %d", SK_KVA(nx), kring->ckr_name,
2465 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, i, err);
2466 break;
2467 }
2468 /* we don't want this to be used by client, so verify here */
2469 ASSERT(slot_ctx_prop == NULL);
2470 kring->ckr_slot_ctxs[i].slot_ctx_arg = slot_ctx_arg;
2471 kring->ckr_slot_ctxs_set++;
2472 }
2473
2474 if (err != 0) {
2475 nx_fini_slots(nx, kring);
2476 } else {
2477 kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
2478 }
2479
2480 return err;
2481 }
2482
2483 static void
nx_fini_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2484 nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2485 {
2486 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2487 struct __slot_desc *slot = kring->ckr_ksds;
2488 uint32_t i;
2489
2490 ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
2491 nxprov->nxprov_ext.nxpi_slot_fini != NULL);
2492 ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
2493
2494 for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
2495 ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
2496 if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
2497 nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
2498 kring, &slot[i], i);
2499 }
2500 if (kring->ckr_slot_ctxs != NULL) {
2501 kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
2502 }
2503 }
2504 kring->ckr_slot_ctxs_set = 0;
2505
2506 /* We're done with this kring */
2507 kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
2508 }
2509
2510
2511 /* 64-bit mask with range */
2512 #define BMASK64(_beg, _end) \
2513 ((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
2514
2515 int
nx_port_find(struct kern_nexus * nx,nexus_port_t first,nexus_port_t last,nexus_port_t * nx_port)2516 nx_port_find(struct kern_nexus *nx, nexus_port_t first,
2517 nexus_port_t last, nexus_port_t *nx_port)
2518 {
2519 int err = 0;
2520
2521 ASSERT(first < last);
2522 *nx_port = NEXUS_PORT_ANY;
2523
2524 if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
2525 /*
2526 * Left edge of the range is beyond the current map;
2527 * let nx_port_alloc() handle the growing later.
2528 */
2529 *nx_port = first;
2530 } else {
2531 nexus_port_size_t fc = (first / NX_PORT_CHUNK);
2532 nexus_port_size_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
2533 nexus_port_size_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
2534 nexus_port_size_t i, j;
2535 bitmap_t *bmap;
2536
2537 /*
2538 * The right edge of the range is either within or
2539 * beyond the current map; scan thru the current
2540 * map and find the first available port.
2541 */
2542 for (i = fc; i <= lc; i++) {
2543 bitmap_t mask;
2544 nexus_port_size_t beg = 0, end = 63;
2545
2546 if (i == fc) {
2547 beg = (first % NX_PORT_CHUNK);
2548 }
2549 if (i == (last / NX_PORT_CHUNK)) {
2550 end = (last % NX_PORT_CHUNK);
2551 }
2552
2553 if (i < lim) {
2554 bmap = &nx->nx_ports_bmap[i];
2555 mask = BMASK64(beg, end);
2556
2557 j = (nexus_port_size_t)ffsll((*bmap) & mask);
2558 if (j == 0) {
2559 continue;
2560 }
2561
2562 --j;
2563 *nx_port = (i * NX_PORT_CHUNK) + j;
2564 }
2565 break;
2566 }
2567
2568 /*
2569 * If the requested range is within the current map and we
2570 * couldn't find a port, return an err. Otherwise, return
2571 * the next port index to trigger growing later.
2572 */
2573 if (*nx_port == NEXUS_PORT_ANY) {
2574 if (lc == (last / NX_PORT_CHUNK)) {
2575 err = EBUSY;
2576 SK_ERR("port unavail in [%u, %u)", first, last);
2577 } else {
2578 *nx_port = nx->nx_num_ports;
2579 }
2580 }
2581 }
2582
2583 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d (err %d)", SK_KVA(nx),
2584 (int)*nx_port, err);
2585
2586 return err;
2587 }
2588
2589 static int
nx_port_grow(struct kern_nexus * nx,nexus_port_size_t grow)2590 nx_port_grow(struct kern_nexus *nx, nexus_port_size_t grow)
2591 {
2592 ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX);
2593 nexus_port_t dom_port_max = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports);
2594 struct nx_port_info *ports;
2595 size_t limit;
2596 nexus_port_size_t i, num_ports, old_num_ports;
2597 bitmap_t *bmap;
2598
2599 ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
2600 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2601 _CASSERT((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
2602 ASSERT(powerof2(dom_port_max));
2603 ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
2604
2605 old_num_ports = nx->nx_num_ports;
2606 num_ports = nx->nx_num_ports + grow;
2607 limit = P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
2608 if (num_ports > limit) {
2609 SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
2610 nx->nx_num_ports, grow, num_ports, limit);
2611 return EDOM;
2612 }
2613
2614 if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
2615 (old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2616 (num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2617 Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2618 SK_ERR("bmap alloc failed, num_port %u", num_ports);
2619 return ENOMEM;
2620 }
2621 nx->nx_ports_bmap = bmap;
2622 nx->nx_ports_bmap_size = (num_ports / NX_PORT_CHUNK) * sizeof(*bmap);
2623
2624 if ((ports = sk_realloc_type_array(struct nx_port_info, old_num_ports,
2625 num_ports, nx->nx_ports, Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2626 /* can't free bmap here, otherwise nexus won't work */
2627 SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
2628 return ENOMEM;
2629 }
2630
2631 /* initialize the additional new ports */
2632 bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
2633
2634 /* initialize new bitmaps (set all bits) */
2635 for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
2636 i < (num_ports / NX_PORT_CHUNK); i++) {
2637 bmap[i] = NX_PORT_CHUNK_FREE;
2638 }
2639
2640 /*
2641 * -fbounds-safety: Not sure if moving nx_ports assignment down here
2642 * would cause a regression.
2643 */
2644 nx->nx_ports = ports;
2645 nx->nx_num_ports = num_ports;
2646
2647 SK_DF(SK_VERB_NXPORT, "!!! nx 0x%llx ports %u/%u, %u ports added",
2648 SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
2649
2650 return 0;
2651 }
2652
2653 int
nx_port_alloc(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb,struct nexus_adapter ** na,struct proc * p)2654 nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
2655 struct nexus_adapter **na, struct proc *p)
2656 {
2657 struct nx_port_info *npi = NULL;
2658 struct nxbind *nxb0;
2659 size_t g;
2660 uint32_t i, j;
2661 bitmap_t *bmap;
2662 bool refonly = false;
2663 int err = 0;
2664
2665 ASSERT(nx_port != NEXUS_PORT_ANY);
2666 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2667
2668 /* port is zero-based, so adjust here */
2669 if ((nx_port + 1) > nx->nx_num_ports) {
2670 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2671 VERIFY(g <= NEXUS_PORT_MAX);
2672 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2673 goto done;
2674 }
2675 }
2676 ASSERT(err == 0);
2677 ASSERT(nx_port < nx->nx_num_ports);
2678 npi = &nx->nx_ports[nx_port];
2679 nxb0 = npi->npi_nxb;
2680 i = nx_port / NX_PORT_CHUNK;
2681 j = nx_port % NX_PORT_CHUNK;
2682 bmap = &nx->nx_ports_bmap[i];
2683
2684 if (bit_test(*bmap, j)) {
2685 /* port is not (yet) bound or allocated */
2686 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2687 if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
2688 /*
2689 * If the port allocation is requested by userland
2690 * and the nexus is non-anonymous, then fail the
2691 * request.
2692 */
2693 err = EACCES;
2694 SK_ERR("user proc alloc on named nexus needs binding");
2695 } else if (na != NULL && *na != NULL) {
2696 /*
2697 * Otherwise claim it (clear bit) if the caller
2698 * supplied an adapter for this port; else, it
2699 * is just an existential check and so there's
2700 * no action needed at this point (we'll skip
2701 * the init below since vpna is NULL).
2702 */
2703 bit_clear(*bmap, j);
2704 }
2705 } else {
2706 /* if port is bound, check if credentials match */
2707 if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
2708 (nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
2709 SK_ERR("nexus binding mismatch");
2710 err = EACCES;
2711 } else {
2712 /*
2713 * If port is already occupied by an adapter,
2714 * see if the client is requesting a reference
2715 * to it; if so, return the adapter. Otherwise,
2716 * if unoccupied and vpna is non-NULL, associate
2717 * it with this nexus port via the below init.
2718 */
2719 if (NPI_NA(npi) != NULL) {
2720 if (na != NULL && *na == NULL) {
2721 *na = NPI_NA(npi);
2722 na_retain_locked(*na);
2723 /* skip the init below */
2724 refonly = true;
2725 } else {
2726 /*
2727 * If the client supplied an adapter
2728 * (regardless of its value) for a
2729 * nexus port that's already occupied,
2730 * then we fail the request.
2731 */
2732 SK_ERR("nexus adapted exits");
2733 err = EEXIST;
2734 }
2735 }
2736 }
2737 }
2738
2739 done:
2740 /* initialize the nexus port and the adapter occupying it */
2741 if (err == 0 && na != NULL && *na != NULL && !refonly) {
2742 ASSERT(nx_port < nx->nx_num_ports);
2743 ASSERT(npi->npi_nah == 0);
2744 ASSERT(nx->nx_active_ports < nx->nx_num_ports);
2745 ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
2746 (nx_port % NX_PORT_CHUNK)));
2747
2748 nx->nx_active_ports++;
2749 npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
2750 (*na)->na_nx_port = nx_port;
2751 }
2752
2753 SK_DF(SK_VERB_NXPORT, "nx 0x%llx nx_port %d, ports %u/%u (err %d)",
2754 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
2755 err);
2756
2757 return err;
2758 }
2759
2760 void
nx_port_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2761 nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2762 {
2763 struct nx_port_info *npi = &nx->nx_ports[nx_port];
2764
2765 npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
2766 NEXUS_PORT_STATE_DEFUNCT);
2767 }
2768
2769 void
nx_port_free(struct kern_nexus * nx,nexus_port_t nx_port)2770 nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
2771 {
2772 struct nx_port_info *npi = NULL;
2773 bitmap_t *bmap;
2774 uint32_t i, j;
2775
2776 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2777 ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
2778 ASSERT(nx->nx_active_ports != 0);
2779
2780 i = nx_port / NX_PORT_CHUNK;
2781 j = nx_port % NX_PORT_CHUNK;
2782 bmap = &nx->nx_ports_bmap[i];
2783 ASSERT(!bit_test(*bmap, j));
2784
2785 npi = &nx->nx_ports[nx_port];
2786 npi->npi_nah = 0;
2787 if (npi->npi_nxb == NULL) {
2788 /* it's vacant, release it (set bit) */
2789 bit_set(*bmap, j);
2790 }
2791
2792 nx->nx_active_ports--;
2793
2794 //XXX [email protected] --- try to shrink bitmap & nx_ports ???
2795
2796 SK_DF(SK_VERB_NXPORT, "--- nx 0x%llx nx_port %d, ports %u/%u",
2797 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
2798 }
2799
2800 int
nx_port_bind_info(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0,void * info)2801 nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
2802 struct nxbind *nxb0, void *info)
2803 {
2804 struct nx_port_info *npi = NULL;
2805 size_t g;
2806 uint32_t i, j;
2807 bitmap_t *bmap;
2808 int err = 0;
2809
2810 ASSERT(nx_port != NEXUS_PORT_ANY);
2811 ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
2812 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2813 ASSERT(nxb0 != NULL);
2814
2815 if ((nx_port) + 1 > nx->nx_num_ports) {
2816 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2817 VERIFY(g <= NEXUS_PORT_MAX);
2818 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2819 goto done;
2820 }
2821 }
2822 ASSERT(err == 0);
2823
2824 npi = &nx->nx_ports[nx_port];
2825 i = nx_port / NX_PORT_CHUNK;
2826 j = nx_port % NX_PORT_CHUNK;
2827 bmap = &nx->nx_ports_bmap[i];
2828 if (bit_test(*bmap, j)) {
2829 /* port is not (yet) bound or allocated */
2830 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2831
2832 bit_clear(*bmap, j);
2833 struct nxbind *nxb = nxb_alloc(Z_WAITOK);
2834 nxb_move(nxb0, nxb);
2835 npi->npi_nxb = nxb;
2836 npi->npi_info = info;
2837 /* claim it (clear bit) */
2838 bit_clear(*bmap, j);
2839 ASSERT(err == 0);
2840 } else {
2841 /* port is already taken */
2842 ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
2843 err = EEXIST;
2844 }
2845 done:
2846
2847 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2848 "+++ nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2849 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2850
2851 return err;
2852 }
2853
2854 int
nx_port_bind(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0)2855 nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
2856 {
2857 return nx_port_bind_info(nx, nx_port, nxb0, NULL);
2858 }
2859
2860 /*
2861 * -fbounds-safety: all callers pass npi_info. Why don't we just change the
2862 * input type to nx_port_info_header *?
2863 */
2864 static int
nx_port_info_size(struct nx_port_info_header * info,size_t * sz)2865 nx_port_info_size(struct nx_port_info_header *info, size_t *sz)
2866 {
2867 struct nx_port_info_header *hdr = info;
2868
2869 switch (hdr->ih_type) {
2870 case NX_PORT_INFO_TYPE_NETIF:
2871 break;
2872 default:
2873 return EINVAL;
2874 }
2875 *sz = hdr->ih_size;
2876 return 0;
2877 }
2878
2879 int
nx_port_unbind(struct kern_nexus * nx,nexus_port_t nx_port)2880 nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
2881 {
2882 struct nx_port_info *npi = NULL;
2883 struct nxbind *nxb;
2884 uint32_t i, j;
2885 bitmap_t *bmap;
2886 int err = 0;
2887
2888 ASSERT(nx_port != NEXUS_PORT_ANY);
2889
2890 if (nx_port >= nx->nx_num_ports) {
2891 err = EDOM;
2892 goto done;
2893 }
2894
2895 npi = &nx->nx_ports[nx_port];
2896 i = nx_port / NX_PORT_CHUNK;
2897 j = nx_port % NX_PORT_CHUNK;
2898 bmap = &nx->nx_ports_bmap[i];
2899
2900 if ((nxb = npi->npi_nxb) == NULL) {
2901 /* must be either free or allocated */
2902 ASSERT(NPI_NA(npi) == NULL ||
2903 (!bit_test(*bmap, j) && nx->nx_active_ports > 0));
2904 err = ENOENT;
2905 } else {
2906 nxb_free(nxb);
2907 npi->npi_nxb = NULL;
2908 if (npi->npi_info != NULL) {
2909 size_t sz;
2910
2911 VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
2912 sk_free_data(npi->npi_info, sz);
2913 npi->npi_info = NULL;
2914 }
2915 ASSERT(!bit_test(*bmap, j));
2916 if (NPI_NA(npi) == NULL) {
2917 /* it's vacant, release it (set bit) */
2918 bit_set(*bmap, j);
2919 }
2920 }
2921
2922 done:
2923 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2924 "--- nx 0x%llx nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2925 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2926
2927 return err;
2928 }
2929
2930 struct nexus_adapter *
nx_port_get_na(struct kern_nexus * nx,nexus_port_t nx_port)2931 nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
2932 {
2933 if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
2934 return NPI_NA(&nx->nx_ports[nx_port]);
2935 } else {
2936 return NULL;
2937 }
2938 }
2939
2940 int
nx_port_get_info(struct kern_nexus * nx,nexus_port_t port,nx_port_info_type_t type,void * __sized_by (len)info,uint32_t len)2941 nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
2942 nx_port_info_type_t type, void *__sized_by(len)info, uint32_t len)
2943 {
2944 struct nx_port_info *npi;
2945 struct nx_port_info_header *hdr;
2946
2947 if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
2948 return ENXIO;
2949 }
2950 npi = &nx->nx_ports[port];
2951 /*
2952 * -fbounds-safety: Changing npi_info to be __sized_by is a major
2953 * surgery. Just forge it here for now.
2954 */
2955 hdr = __unsafe_forge_bidi_indexable(struct nx_port_info_header *,
2956 npi->npi_info, len);
2957 if (hdr == NULL) {
2958 return ENOENT;
2959 }
2960
2961 if (hdr->ih_type != type) {
2962 return EINVAL;
2963 }
2964
2965 bcopy(hdr, info, len);
2966 return 0;
2967 }
2968
2969 bool
nx_port_is_valid(struct kern_nexus * nx,nexus_port_t nx_port)2970 nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
2971 {
2972 return nx_port < nx->nx_num_ports;
2973 }
2974
2975 bool
nx_port_is_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2976 nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2977 {
2978 ASSERT(nx_port_is_valid(nx, nx_port));
2979
2980 return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
2981 }
2982
2983 void
nx_port_free_all(struct kern_nexus * nx)2984 nx_port_free_all(struct kern_nexus *nx)
2985 {
2986 /* uncrustify doesn't handle C blocks properly */
2987 /* BEGIN IGNORE CODESTYLE */
2988 nx_port_foreach(nx, ^(nexus_port_t p) {
2989 struct nxbind *nxb;
2990 /*
2991 * XXX -fbounds-safety: Come back to this after fixing npi_info
2992 */
2993 void *__single info;
2994 nxb = nx->nx_ports[p].npi_nxb;
2995 info = nx->nx_ports[p].npi_info;
2996 if (nxb != NULL) {
2997 nxb_free(nxb);
2998 nx->nx_ports[p].npi_nxb = NULL;
2999 }
3000 if (info != NULL) {
3001 size_t sz;
3002
3003 VERIFY(nx_port_info_size(info, &sz) == 0);
3004 skn_free_data(info, info, sz);
3005 nx->nx_ports[p].npi_info = NULL;
3006 }
3007 });
3008 /* END IGNORE CODESTYLE */
3009
3010 nx->nx_active_ports = 0;
3011 sk_free_data_sized_by(nx->nx_ports_bmap, nx->nx_ports_bmap_size);
3012 nx->nx_ports_bmap = NULL;
3013 nx->nx_ports_bmap_size = 0;
3014 sk_free_type_array_counted_by(struct nx_port_info, nx->nx_num_ports, nx->nx_ports);
3015 nx->nx_ports = NULL;
3016 nx->nx_num_ports = 0;
3017 }
3018
3019 void
3020 nx_port_foreach(struct kern_nexus *nx,
3021 void (^port_handle)(nexus_port_t nx_port))
3022 {
3023 for (nexus_port_size_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
3024 bitmap_t bmap = nx->nx_ports_bmap[i];
3025
3026 if (bmap == NX_PORT_CHUNK_FREE) {
3027 continue;
3028 }
3029
3030 for (nexus_port_size_t j = 0; j < NX_PORT_CHUNK; j++) {
3031 if (bit_test(bmap, j)) {
3032 continue;
3033 }
3034 port_handle((i * NX_PORT_CHUNK) + j);
3035 }
3036 }
3037 }
3038
3039 /*
3040 * sysctl interfaces
3041 */
3042 static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
3043 static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
3044 static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
3045
3046 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
3047 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3048 0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
3049
3050 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
3051 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3052 0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
3053
3054 SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
3055 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3056 0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
3057 "A list of logical links");
3058
3059 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
3060 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
3061 0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
3062 "Nexus inet flows with stats collected in kernel");
3063
3064 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
3065 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3066 0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
3067 "Nexus flow owners");
3068
3069 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
3070 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3071 0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
3072 "Nexus flow routes");
3073
3074 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
3075 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3076 0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
3077 "Nexus netif statistics collected in kernel");
3078
3079 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
3080 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3081 0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
3082 "Nexus flowswitch statistics collected in kernel");
3083
3084 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
3085 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3086 0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
3087 "Nexus userstack statistics counter");
3088
3089 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
3090 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3091 0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
3092 "Nexus flow advisory dump");
3093
3094 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netif_queue,
3095 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3096 0, NXMIB_NETIF_QUEUE_STATS, nexus_mib_get_sysctl, "S,netif_qstats_info",
3097 "A list of netif queue stats entries");
3098
3099 /*
3100 * Provider list sysctl
3101 */
3102 static void
nexus_provider_info_populate(struct kern_nexus_provider * nxprov,nexus_provider_info_t info)3103 nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
3104 nexus_provider_info_t info)
3105 {
3106 struct kern_nexus *nx;
3107 uuid_t *uuids;
3108
3109 SK_LOCK_ASSERT_HELD();
3110
3111 /* provider UUID + params */
3112 uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
3113 bcopy(nxprov->nxprov_params, &info->npi_prov_params,
3114 sizeof(struct nxprov_params));
3115 info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
3116
3117 /* instance UUID list */
3118 uuids = __unsafe_forge_bidi_indexable(uuid_t *,
3119 info->npi_instance_uuids, sizeof(uuid_t) * info->npi_instance_uuids_count);
3120 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
3121 uuid_copy(*uuids, nx->nx_uuid);
3122 uuids++;
3123 }
3124 }
3125
3126 static int
3127 nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
3128 {
3129 #pragma unused(arg1, arg2, oidp)
3130 size_t actual_space;
3131 caddr_t buffer = NULL;
3132 size_t buffer_space;
3133 size_t allocated_space;
3134 int out_error;
3135 int error = 0;
3136 struct kern_nexus_provider *nxprov;
3137 caddr_t scan;
3138
3139 if (!kauth_cred_issuser(kauth_cred_get())) {
3140 return EPERM;
3141 }
3142
3143 net_update_uptime();
3144 buffer_space = req->oldlen;
3145 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3146 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3147 buffer_space = SK_SYSCTL_ALLOC_MAX;
3148 }
3149 allocated_space = buffer_space;
3150 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3151 if (__improbable(buffer == NULL)) {
3152 return ENOBUFS;
3153 }
3154 } else if (req->oldptr == USER_ADDR_NULL) {
3155 buffer_space = 0;
3156 }
3157 actual_space = 0;
3158 scan = buffer;
3159 SK_LOCK();
3160 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
3161 size_t info_size;
3162
3163 info_size
3164 = NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
3165 if (scan != NULL) {
3166 if (buffer_space < info_size) {
3167 /* supplied buffer too small, stop copying */
3168 error = ENOMEM;
3169 break;
3170 }
3171 nexus_provider_info_populate(nxprov, (void *)scan);
3172 scan += info_size;
3173 buffer_space -= info_size;
3174 }
3175 actual_space += info_size;
3176 }
3177 SK_UNLOCK();
3178
3179 out_error = SYSCTL_OUT(req, buffer, actual_space);
3180 if (out_error != 0) {
3181 error = out_error;
3182 }
3183
3184 if (buffer != NULL) {
3185 sk_free_data(buffer, allocated_space);
3186 }
3187
3188 return error;
3189 }
3190
3191 /*
3192 * Channel list sysctl
3193 */
3194 static uint32_t
channel_ring_count(struct kern_channel * ch,enum txrx which)3195 channel_ring_count(struct kern_channel *ch, enum txrx which)
3196 {
3197 return ch->ch_last[which] - ch->ch_first[which];
3198 }
3199
3200 /*
3201 * -fbounds-safety: kring's range is [first..last]. Marking it
3202 * __counted_by(last) means range is [0..first..last]. The [0..first) might be
3203 * problematic. However, the for loop in this function starts indexing from
3204 * 'first', not 0, so that should be okay.
3205 * XXX Until BATS starts using uncrustify-7 (rdar://90709826), having a space
3206 * between __counted_by(entry_count) entries will be considered invalid code
3207 * style and build will fail. Until rdar://117811249 is resolved, either stick
3208 * to what makes BATS happy, or wrap IGNORE CODESTYLE around.
3209 */
3210 static void
populate_ring_entries(struct __kern_channel_ring * __counted_by (last)kring,ring_id_t first,ring_id_t last,nexus_channel_ring_entry * __counted_by (entry_count)entries,uint32_t NX_FB_ARG entry_count)3211 populate_ring_entries(struct __kern_channel_ring *__counted_by(last)kring,
3212 ring_id_t first, ring_id_t last,
3213 nexus_channel_ring_entry *__counted_by(entry_count)entries,
3214 uint32_t NX_FB_ARG entry_count)
3215 {
3216 ring_id_t i;
3217 nexus_channel_ring_entry_t scan;
3218 struct __kern_channel_ring *ring;
3219
3220 scan = entries;
3221 for (i = first; i < last; i++, scan++) {
3222 ring = &kring[i];
3223
3224 DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
3225 ring);
3226 if (kr_stat_enable == 0) {
3227 bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
3228 bzero(&scan->ncre_user_stats,
3229 sizeof(scan->ncre_user_stats));
3230 } else {
3231 scan->ncre_stats = ring->ckr_stats;
3232 scan->ncre_user_stats = ring->ckr_usr_stats;
3233 }
3234 scan->ncre_error_stats = ring->ckr_err_stats;
3235 scan->ncre_ring_id = i;
3236 }
3237 }
3238
3239 /* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
3240 static uint32_t
nexus_channel_get_flags(uint32_t ch_mode,uint32_t ch_flags)3241 nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
3242 {
3243 uint32_t flags = 0;
3244
3245 flags |= (ch_mode & CHMODE_MONITOR_TX) ? SCHF_MONITOR_TX : 0;
3246 flags |= (ch_mode & CHMODE_MONITOR_RX) ? SCHF_MONITOR_RX : 0;
3247 flags |= (ch_mode & CHMODE_MONITOR_NO_COPY) ? SCHF_MONITOR_NO_COPY : 0;
3248 flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
3249 flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
3250 flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
3251 flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
3252 flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
3253 flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
3254 flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
3255 flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
3256 flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
3257 flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
3258
3259 return flags;
3260 }
3261
3262 SK_NO_INLINE_ATTRIBUTE
3263 static void
nexus_channel_entry_populate(struct kern_channel * ch,nexus_channel_entry_t entry)3264 nexus_channel_entry_populate(struct kern_channel *ch,
3265 nexus_channel_entry_t entry)
3266 {
3267 uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
3268 uint32_t ch_flags = ch->ch_flags;
3269 ring_id_t rx_first = ch->ch_first[NR_RX];
3270 ring_id_t rx_last = ch->ch_last[NR_RX];
3271 ring_id_t tx_last = ch->ch_last[NR_TX];
3272 ring_id_t tx_first = ch->ch_first[NR_TX];
3273
3274 uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
3275 entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
3276 entry->nce_port = ch->ch_info->cinfo_nx_port;
3277 entry->nce_pid = ch->ch_pid;
3278 entry->nce_fd = ch->ch_fd;
3279 entry->nce_tx_rings = tx_last - tx_first;
3280 entry->nce_rx_rings = rx_last - rx_first;
3281 populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
3282 entry->nce_ring_entries, entry->nce_tx_rings);
3283
3284 /*
3285 * -fbounds-safety: If entry->nce_tx_rings > 0 and
3286 * entry->nce_rx_rings == 0 (i.e. entry->nce_ring_count ==
3287 * entry->nce_tx_rings), simply passing
3288 * entry->nce_ring_entries + entry->nce_tx_rings to populate_ring_entries
3289 * will fail bounds check, because it is equivalent to assigning
3290 * nce_ring_entries + nce_tx_rings to a __single variable, and in this
3291 * case it goes out of bounds. It's same thing as having:
3292 * int a[1];
3293 * some_func(a + 1); <-- bounds check will fail
3294 */
3295 if (rx_first < rx_last) {
3296 populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
3297 entry->nce_ring_entries + entry->nce_tx_rings,
3298 entry->nce_rx_rings);
3299 }
3300 }
3301
3302 SK_NO_INLINE_ATTRIBUTE
3303 static size_t
nexus_channel_info_populate(struct kern_nexus * nx,nexus_channel_info * __sized_by (buffer_size)info,size_t buffer_size)3304 nexus_channel_info_populate(struct kern_nexus *nx,
3305 nexus_channel_info *__sized_by(buffer_size) info, size_t buffer_size)
3306 {
3307 struct kern_channel *ch = NULL;
3308 size_t info_size;
3309 caddr_t scan = NULL;
3310 nexus_channel_entry *entry;
3311
3312 SK_LOCK_ASSERT_HELD();
3313
3314 info_size = sizeof(nexus_channel_info);
3315
3316 /* channel list */
3317 if (info != NULL) {
3318 if (buffer_size < info_size) {
3319 return info_size;
3320 }
3321
3322 /* instance UUID */
3323 uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
3324 info->nci_channel_entries_count = nx->nx_ch_count;
3325 scan = (caddr_t __bidi_indexable)info->nci_channel_entries;
3326 }
3327 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
3328 size_t entry_size;
3329 uint32_t ring_count;
3330
3331 ring_count = channel_ring_count(ch, NR_TX) +
3332 channel_ring_count(ch, NR_RX);
3333 entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
3334 info_size += entry_size;
3335 if (scan != NULL) {
3336 if (buffer_size < info_size) {
3337 return info_size;
3338 }
3339 entry = (nexus_channel_entry *)(void *)scan;
3340 entry->nce_ring_count = ring_count;
3341
3342 nexus_channel_entry_populate(ch, entry);
3343 scan += entry_size;
3344 }
3345 }
3346 return info_size;
3347 }
3348
3349 static int
3350 nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
3351 {
3352 #pragma unused(arg1, arg2, oidp)
3353 size_t actual_space;
3354 caddr_t buffer = NULL;
3355 size_t buffer_space;
3356 size_t allocated_space;
3357 int out_error;
3358 struct kern_nexus *nx;
3359 int error = 0;
3360 caddr_t scan;
3361
3362 if (!kauth_cred_issuser(kauth_cred_get())) {
3363 return EPERM;
3364 }
3365
3366 net_update_uptime();
3367 buffer_space = req->oldlen;
3368 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3369 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3370 buffer_space = SK_SYSCTL_ALLOC_MAX;
3371 }
3372 allocated_space = buffer_space;
3373 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3374 if (__improbable(buffer == NULL)) {
3375 return ENOBUFS;
3376 }
3377 } else if (req->oldptr == USER_ADDR_NULL) {
3378 buffer_space = 0;
3379 }
3380 actual_space = 0;
3381 scan = buffer;
3382 SK_LOCK();
3383 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3384 size_t info_size;
3385
3386 info_size = nexus_channel_info_populate(nx, (void *)scan,
3387 buffer_space);
3388 if (scan != NULL) {
3389 if (buffer_space < info_size) {
3390 /* supplied buffer too small, stop copying */
3391 error = ENOMEM;
3392 break;
3393 }
3394 scan += info_size;
3395 buffer_space -= info_size;
3396 }
3397 actual_space += info_size;
3398 }
3399 SK_UNLOCK();
3400
3401 if (actual_space != 0) {
3402 out_error = SYSCTL_OUT(req, buffer, actual_space);
3403 if (out_error != 0) {
3404 error = out_error;
3405 }
3406 }
3407 if (buffer != NULL) {
3408 sk_free_data(buffer, allocated_space);
3409 }
3410
3411 return error;
3412 }
3413
3414 static int
3415 nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
3416 {
3417 #pragma unused(arg1, arg2)
3418 struct proc *p = req->p;
3419 struct nexus_mib_filter filter;
3420 int error = 0;
3421 size_t actual_space;
3422 size_t allocated_space = 0;
3423 caddr_t __sized_by(allocated_space) buffer = NULL;
3424 size_t buffer_space;
3425 int out_error;
3426 struct kern_nexus *nx;
3427 caddr_t scan;
3428
3429 /* Restrict protocol stats access to root user only (like netstat). */
3430 if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
3431 !kauth_cred_issuser(kauth_cred_get())) {
3432 SK_ERR("mib request rejected, EPERM");
3433 return EPERM;
3434 }
3435
3436 if (req->newptr == USER_ADDR_NULL) {
3437 /*
3438 * For flow stats requests, non-root users need to provide a
3439 * 5-tuple. Otherwise, we do not grant access.
3440 */
3441 if (oidp->oid_arg2 == NXMIB_FLOW &&
3442 !kauth_cred_issuser(kauth_cred_get())) {
3443 SK_ERR("mib request rejected: tuple not provided");
3444 return EPERM;
3445 }
3446 /* use subcommand for multiple nodes */
3447 filter.nmf_type = oidp->oid_arg2;
3448 filter.nmf_bitmap = 0x0;
3449 } else if (req->newlen != sizeof(struct nexus_mib_filter)) {
3450 SK_ERR("mis-matching newlen");
3451 return EINVAL;
3452 } else {
3453 error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
3454 if (error != 0) {
3455 SK_ERR("SYSCTL_IN err %d", error);
3456 return error;
3457 }
3458 if (filter.nmf_type != oidp->oid_arg2) {
3459 SK_ERR("mis-matching nmf_type");
3460 return EINVAL;
3461 }
3462 /*
3463 * For flow stats requests, non-root users need to set the nexus
3464 * mib filter to NXMIB_FILTER_INFO_TUPLE. Otherwise, we do not
3465 * grant access. This ensures that fsw_mib_get_flow looks for a
3466 * flow entry that matches the given tuple of the non-root user.
3467 */
3468 if (filter.nmf_type == NXMIB_FLOW &&
3469 (filter.nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) == 0 &&
3470 !kauth_cred_issuser(kauth_cred_get())) {
3471 SK_ERR("mib request rejected: tuple filter not set");
3472 return EPERM;
3473 }
3474 }
3475
3476 net_update_uptime();
3477 buffer_space = req->oldlen;
3478 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3479 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3480 buffer_space = SK_SYSCTL_ALLOC_MAX;
3481 }
3482 buffer = sk_alloc_data(buffer_space, Z_WAITOK, skmem_tag_sysctl_buf);
3483 allocated_space = buffer_space;
3484 if (__improbable(buffer == NULL)) {
3485 return ENOBUFS;
3486 }
3487 } else if (req->oldptr == USER_ADDR_NULL) {
3488 buffer_space = 0;
3489 }
3490 actual_space = 0;
3491 scan = buffer;
3492
3493 SK_LOCK();
3494 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3495 if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
3496 continue;
3497 }
3498
3499 size_t size = 0;
3500 struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
3501
3502 /*
3503 * -fbounds-safety: Because scan takes the bounds of buffer
3504 * (which is __sized_by(allocated_space)), at some point scan
3505 * will reach its bounds (because of scan += size). When it
3506 * does, it won't pass the bounds check when scan is passed to
3507 * nxdom_prov_nx_mib_get function. We need to avoid passing scan
3508 * to nxdom_prov_nx_mib_get when it reaches its upper bound,
3509 * i.e. when buffer_space reaches 0 (see buffer_space -= size).
3510 */
3511 if (req->oldptr == USER_ADDR_NULL || buffer_space) {
3512 size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
3513 buffer_space, p);
3514 }
3515
3516 if (scan != NULL) {
3517 if (buffer_space < size) {
3518 /* supplied buffer too small, stop copying */
3519 error = ENOMEM;
3520 break;
3521 }
3522 scan += size;
3523 buffer_space -= size;
3524 }
3525 actual_space += size;
3526 }
3527 SK_UNLOCK();
3528
3529 if (actual_space != 0) {
3530 out_error = SYSCTL_OUT(req, buffer, actual_space);
3531 if (out_error != 0) {
3532 error = out_error;
3533 }
3534 }
3535 if (buffer != NULL) {
3536 sk_free_data_sized_by(buffer, allocated_space);
3537 }
3538
3539 return error;
3540 }
3541
3542 void
kern_nexus_walktree(kern_nexus_walktree_f_t * f,void * arg0,boolean_t is_sk_locked)3543 kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
3544 boolean_t is_sk_locked)
3545 {
3546 struct kern_nexus *nx = NULL;
3547
3548 if (!is_sk_locked) {
3549 SK_LOCK();
3550 } else {
3551 SK_LOCK_ASSERT_HELD();
3552 }
3553
3554 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3555 (*f)(nx, arg0);
3556 }
3557
3558 if (!is_sk_locked) {
3559 SK_UNLOCK();
3560 }
3561 }
3562
3563 errno_t
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,struct kern_pbufpool_memory_info * rx_pool_info,struct kern_pbufpool_memory_info * tx_pool_info)3564 kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
3565 struct kern_pbufpool_memory_info *rx_pool_info,
3566 struct kern_pbufpool_memory_info *tx_pool_info)
3567 {
3568 struct kern_pbufpool *__single tpp, *__single rpp;
3569 struct kern_nexus *nx;
3570 errno_t err = 0;
3571
3572 nx = nx_find(nx_uuid, FALSE);
3573 if (nx == NULL) {
3574 err = ENOENT;
3575 goto done;
3576 }
3577
3578 if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
3579 err = ENOTSUP;
3580 goto done;
3581 }
3582
3583 err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
3584 if (err != 0) {
3585 goto done;
3586 }
3587
3588 if ((tpp == NULL) && (rpp == NULL)) {
3589 err = ENOENT;
3590 goto done;
3591 }
3592
3593 if (tx_pool_info != NULL) {
3594 bzero(tx_pool_info, sizeof(*tx_pool_info));
3595 }
3596 if (rx_pool_info != NULL) {
3597 bzero(rx_pool_info, sizeof(*rx_pool_info));
3598 }
3599
3600 if ((tx_pool_info != NULL) && (tpp != NULL)) {
3601 err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
3602 if (err != 0) {
3603 goto done;
3604 }
3605 }
3606
3607 if ((rx_pool_info != NULL) && (rpp != NULL)) {
3608 err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
3609 }
3610
3611 done:
3612 if (nx != NULL) {
3613 (void) nx_release(nx);
3614 nx = NULL;
3615 }
3616 return err;
3617 }
3618
3619 void
nx_interface_advisory_notify(struct kern_nexus * nx)3620 nx_interface_advisory_notify(struct kern_nexus *nx)
3621 {
3622 struct kern_channel *ch;
3623 struct netif_stats *nifs;
3624 struct fsw_stats *fsw_stats;
3625 nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
3626
3627 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3628 nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
3629 } else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
3630 fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
3631 } else {
3632 VERIFY(0);
3633 __builtin_unreachable();
3634 }
3635 if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
3636 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3637 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
3638 } else {
3639 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
3640 }
3641 return;
3642 }
3643 /*
3644 * if the channel is in "nx_ch_if_adv_head" list, then we can
3645 * safely assume that the channel is not closed yet.
3646 * In ch_close_common(), the channel is removed from the
3647 * "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
3648 * exclusive mode, prior to closing the channel.
3649 */
3650 STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
3651 struct nexus_adapter *na = ch->ch_na;
3652
3653 ASSERT(na != NULL);
3654 na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
3655 TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
3656 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3657 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
3658 } else {
3659 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
3660 }
3661 }
3662 lck_rw_done(&nx->nx_ch_if_adv_lock);
3663 }
3664