1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <sys/sdt.h>
33
34 #include <kern/uipc_domain.h>
35
36 static uint32_t disable_nxctl_check = 0;
37 #if (DEVELOPMENT || DEBUG)
38 SYSCTL_UINT(_kern_skywalk, OID_AUTO, disable_nxctl_check,
39 CTLFLAG_RW | CTLFLAG_LOCKED, &disable_nxctl_check, 0, "");
40 #endif
41
42 LCK_GRP_DECLARE(nexus_lock_group, "sk_nx_lock");
43 LCK_GRP_DECLARE(nexus_mbq_lock_group, "sk_nx_mbq_lock");
44 LCK_GRP_DECLARE(nexus_pktq_lock_group, "sk_nx_pktq_lock");
45 LCK_ATTR_DECLARE(nexus_lock_attr, 0, 0);
46
47 static STAILQ_HEAD(, nxctl) nxctl_head =
48 STAILQ_HEAD_INITIALIZER(nxctl_head);
49 static STAILQ_HEAD(, kern_nexus_provider) nxprov_head =
50 STAILQ_HEAD_INITIALIZER(nxprov_head);
51
52 static int nx_cmp(const struct kern_nexus *, const struct kern_nexus *);
53 RB_HEAD(kern_nexus_tree, kern_nexus);
54 RB_PROTOTYPE_SC(static, kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
55 RB_GENERATE(kern_nexus_tree, kern_nexus, nx_link, nx_cmp);
56 static struct kern_nexus_tree nx_head;
57
58 static int nxctl_get_nexus_prov_list(struct nxctl *, struct sockopt *);
59 static int nxctl_get_nexus_prov_entry(struct nxctl *, struct sockopt *);
60 static int nxctl_get_nexus_list(struct nxctl *, struct sockopt *);
61 static int nxctl_nexus_bind(struct nxctl *, struct sockopt *);
62 static int nxctl_nexus_unbind(struct nxctl *, struct sockopt *);
63 static int nxctl_nexus_config(struct nxctl *, struct sockopt *);
64 static int nxctl_get_channel_list(struct nxctl *, struct sockopt *);
65 static void nxctl_retain_locked(struct nxctl *);
66 static int nxctl_release_locked(struct nxctl *);
67 static void nxctl_init(struct nxctl *, struct proc *, struct fileproc *);
68 static struct nxctl *nxctl_alloc(struct proc *, struct fileproc *, zalloc_flags_t);
69 static void nxctl_free(struct nxctl *);
70
71 static struct kern_nexus_provider *nxprov_create_common(struct nxctl *,
72 struct kern_nexus_domain_provider *, struct nxprov_reg *,
73 const struct kern_nexus_provider_init *init, int *);
74 static void nxprov_detach(struct kern_nexus_provider *, boolean_t);
75 static void nxprov_retain_locked(struct kern_nexus_provider *);
76 static int nxprov_release_locked(struct kern_nexus_provider *);
77 static struct kern_nexus_provider *nxprov_alloc(
78 struct kern_nexus_domain_provider *, zalloc_flags_t);
79 static void nxprov_free(struct kern_nexus_provider *);
80
81 static int nx_init_rings(struct kern_nexus *, struct kern_channel *);
82 static void nx_fini_rings(struct kern_nexus *, struct kern_channel *);
83 static int nx_init_slots(struct kern_nexus *, struct __kern_channel_ring *);
84 static void nx_fini_slots(struct kern_nexus *, struct __kern_channel_ring *);
85 static struct kern_nexus *nx_alloc(zalloc_flags_t);
86 static void nx_free(struct kern_nexus *);
87
88 static SKMEM_TYPE_DEFINE(nxctl_zone, struct nxctl);
89
90 static SKMEM_TYPE_DEFINE(nxbind_zone, struct nxbind);
91
92 static SKMEM_TYPE_DEFINE(nxprov_zone, struct kern_nexus_provider);
93
94 static SKMEM_TYPE_DEFINE(nxprov_params_zone, struct nxprov_params);
95
96 static SKMEM_TYPE_DEFINE(nx_zone, struct kern_nexus);
97
98 static int __nx_inited = 0;
99
100 #define SKMEM_TAG_NX_KEY "com.apple.skywalk.nexus.key"
101 SKMEM_TAG_DEFINE(skmem_tag_nx_key, SKMEM_TAG_NX_KEY);
102
103 #define SKMEM_TAG_NX_MIB "com.apple.skywalk.nexus.mib"
104 static SKMEM_TAG_DEFINE(skmem_tag_nx_mib, SKMEM_TAG_NX_MIB);
105
106 #define SKMEM_TAG_NX_PORT "com.apple.skywalk.nexus.port"
107 SKMEM_TAG_DEFINE(skmem_tag_nx_port, SKMEM_TAG_NX_PORT);
108
109 #define SKMEM_TAG_NX_PORT_INFO "com.apple.skywalk.nexus.port.info"
110 SKMEM_TAG_DEFINE(skmem_tag_nx_port_info, SKMEM_TAG_NX_PORT_INFO);
111
112 /*
113 * Special nexus controller handle for Skywalk internal use. Unlike all
114 * other nexus controller handles that are created by userland or kernel
115 * clients, this one never gets closed or freed. It is also not part of
116 * the global nxctl_head list.
117 */
118 static struct nxctl _kernnxctl;
119 static struct nxctl _usernxctl;
120 struct nexus_controller kernnxctl = { .ncd_nxctl = &_kernnxctl };
121 struct nexus_controller usernxctl = { .ncd_nxctl = &_usernxctl };
122
123 /*
124 * -fbounds-safety: For static functions where additional size variables are
125 * added, we need to mark them __unused if this file is being built without
126 * -fbounds-safety.
127 */
128 #if !__has_ptrcheck
129 #define NX_FB_ARG __unused
130 #else
131 #define NX_FB_ARG
132 #endif
133
134 int
nexus_init(void)135 nexus_init(void)
136 {
137 SK_LOCK_ASSERT_HELD();
138 ASSERT(!__nx_inited);
139
140 RB_INIT(&nx_head);
141
142 na_init();
143
144 /* attach system built-in domains and domain providers */
145 nxdom_attach_all();
146
147 /*
148 * Initialize private kernel and shared user nexus controller handle;
149 *
150 * Shared Kernel controller is used internally for creating nexus providers
151 * and nexus instances from within the Skywalk code (e.g. netif_compat).
152 *
153 * Shared User controller is used userspace by clients(e.g. libnetcore)
154 * that would like to call nexus instances for use cases like
155 * configuring flow entry that they own indirectly (e.g. via NECP), so
156 * that the nexus would perform permission check based on other info
157 * (e.g. PID, UUID) and bypass nxctl check (this nxctl has no
158 * credentials).
159 */
160 nxctl_init(&_kernnxctl, kernproc, NULL);
161 nxctl_retain_locked(&_kernnxctl); /* one for us */
162 nxctl_init(&_usernxctl, kernproc, NULL);
163 nxctl_retain_locked(&_usernxctl); /* one for us */
164 nxctl_traffic_rule_init();
165
166 __nx_inited = 1;
167
168 return 0;
169 }
170
171 void
nexus_fini(void)172 nexus_fini(void)
173 {
174 SK_LOCK_ASSERT_HELD();
175
176 if (__nx_inited) {
177 nxctl_traffic_rule_fini();
178 nxctl_release_locked(&_kernnxctl);
179 nxctl_release_locked(&_usernxctl);
180
181 /* tell all domains they're going away */
182 nxdom_detach_all();
183
184 ASSERT(RB_EMPTY(&nx_head));
185
186 na_fini();
187
188 __nx_inited = 0;
189 }
190 }
191
192 struct nxctl *
nxctl_create(struct proc * p,struct fileproc * fp,const uuid_t nxctl_uuid,int * err)193 nxctl_create(struct proc *p, struct fileproc *fp, const uuid_t nxctl_uuid,
194 int *err)
195 {
196 struct nxctl *nxctl = NULL;
197
198 ASSERT(!uuid_is_null(nxctl_uuid));
199
200 /* privilege checks would be done when performing nxctl operations */
201
202 SK_LOCK();
203
204 nxctl = nxctl_alloc(p, fp, Z_WAITOK);
205
206 STAILQ_INSERT_TAIL(&nxctl_head, nxctl, nxctl_link);
207 nxctl->nxctl_flags |= NEXUSCTLF_ATTACHED;
208 uuid_copy(nxctl->nxctl_uuid, nxctl_uuid);
209
210 nxctl_retain_locked(nxctl); /* one for being in the list */
211 nxctl_retain_locked(nxctl); /* one for the caller */
212
213 #if SK_LOG
214 uuid_string_t uuidstr;
215 SK_D("nxctl %p UUID %s", SK_KVA(nxctl),
216 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr));
217 #endif /* SK_LOG */
218
219 SK_UNLOCK();
220
221 if (*err != 0) {
222 nxctl_free(nxctl);
223 nxctl = NULL;
224 }
225 return nxctl;
226 }
227
228 void
nxctl_close(struct nxctl * nxctl)229 nxctl_close(struct nxctl *nxctl)
230 {
231 struct kern_nexus_provider *nxprov = NULL, *tnxprov;
232
233 lck_mtx_lock(&nxctl->nxctl_lock);
234 SK_LOCK();
235
236 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL));
237
238 #if SK_LOG
239 uuid_string_t uuidstr;
240 SK_D("nxctl %p UUID %s flags 0x%x", SK_KVA(nxctl),
241 sk_uuid_unparse(nxctl->nxctl_uuid, uuidstr),
242 nxctl->nxctl_flags);
243 #endif /* SK_LOG */
244
245 if (!(nxctl->nxctl_flags & NEXUSCTLF_NOFDREF)) {
246 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
247 nxctl->nxctl_fp = NULL;
248 }
249
250 /* may be called as part of failure cleanup, so check */
251 if (nxctl->nxctl_flags & NEXUSCTLF_ATTACHED) {
252 /* caller must hold an extra ref */
253 ASSERT(nxctl->nxctl_refcnt > 1);
254 (void) nxctl_release_locked(nxctl);
255
256 STAILQ_REMOVE(&nxctl_head, nxctl, nxctl, nxctl_link);
257 nxctl->nxctl_flags &= ~NEXUSCTLF_ATTACHED;
258 }
259
260 repeat:
261 STAILQ_FOREACH_SAFE(nxprov, &nxprov_head, nxprov_link, tnxprov) {
262 /*
263 * Close provider only for those which are owned by
264 * this control instance. Note that if we close the
265 * provider, we need to repeat this search as the
266 * list might have been changed by another thread.
267 * That's possible since SK_UNLOCK() may be called
268 * as a result of calling nxprov_close().
269 */
270 if (!(nxprov->nxprov_flags & NXPROVF_CLOSED) &&
271 nxprov->nxprov_ctl == nxctl) {
272 nxprov_retain_locked(nxprov);
273 (void) nxprov_close(nxprov, TRUE);
274 (void) nxprov_release_locked(nxprov);
275 goto repeat;
276 }
277 }
278
279 SK_UNLOCK();
280 lck_mtx_unlock(&nxctl->nxctl_lock);
281 nxctl_traffic_rule_clean(nxctl);
282 }
283
284 int
nxctl_set_opt(struct nxctl * nxctl,struct sockopt * sopt)285 nxctl_set_opt(struct nxctl *nxctl, struct sockopt *sopt)
286 {
287 #pragma unused(nxctl)
288 int err = 0;
289
290 NXCTL_LOCK_ASSERT_HELD(nxctl);
291
292 if (sopt->sopt_dir != SOPT_SET) {
293 sopt->sopt_dir = SOPT_SET;
294 }
295
296 switch (sopt->sopt_name) {
297 case NXOPT_NEXUS_BIND:
298 err = nxctl_nexus_bind(nxctl, sopt);
299 break;
300
301 case NXOPT_NEXUS_UNBIND:
302 err = nxctl_nexus_unbind(nxctl, sopt);
303 break;
304
305 case NXOPT_NEXUS_CONFIG:
306 err = nxctl_nexus_config(nxctl, sopt);
307 break;
308
309 default:
310 err = ENOPROTOOPT;
311 break;
312 }
313
314 return err;
315 }
316
317 int
nxctl_get_opt(struct nxctl * nxctl,struct sockopt * sopt)318 nxctl_get_opt(struct nxctl *nxctl, struct sockopt *sopt)
319 {
320 #pragma unused(nxctl)
321 int err = 0;
322
323 NXCTL_LOCK_ASSERT_HELD(nxctl);
324
325 if (sopt->sopt_dir != SOPT_GET) {
326 sopt->sopt_dir = SOPT_GET;
327 }
328
329 switch (sopt->sopt_name) {
330 case NXOPT_NEXUS_PROV_LIST:
331 err = nxctl_get_nexus_prov_list(nxctl, sopt);
332 break;
333
334 case NXOPT_NEXUS_PROV_ENTRY:
335 err = nxctl_get_nexus_prov_entry(nxctl, sopt);
336 break;
337
338 case NXOPT_NEXUS_LIST:
339 err = nxctl_get_nexus_list(nxctl, sopt);
340 break;
341
342 case NXOPT_CHANNEL_LIST:
343 err = nxctl_get_channel_list(nxctl, sopt);
344 break;
345
346 default:
347 err = ENOPROTOOPT;
348 break;
349 }
350
351 return err;
352 }
353
354 /* Upper bound on # of nrl_num_regs that we'd return to user space */
355 #define MAX_NUM_REG_ENTRIES 256
356
357 /* Hoisted out of line to reduce kernel stack footprint */
358 SK_NO_INLINE_ATTRIBUTE
359 static int
nxctl_get_nexus_prov_list(struct nxctl * nxctl,struct sockopt * sopt)360 nxctl_get_nexus_prov_list(struct nxctl *nxctl, struct sockopt *sopt)
361 {
362 user_addr_t tmp_ptr = USER_ADDR_NULL;
363 struct nxprov_reg_ent *pnre, *nres = NULL;
364 struct nxprov_list_req nrlr;
365 struct kern_nexus_provider *nxprov = NULL;
366 uint32_t nregs = 0, ncregs = 0;
367 int err = 0, observeall;
368 size_t nres_sz;
369
370 NXCTL_LOCK_ASSERT_HELD(nxctl);
371
372 ASSERT(sopt->sopt_p != NULL);
373 if (sopt->sopt_val == USER_ADDR_NULL) {
374 return EINVAL;
375 }
376
377 err = sooptcopyin(sopt, &nrlr, sizeof(nrlr), sizeof(nrlr));
378 if (err != 0) {
379 return err;
380 }
381
382 if ((size_t)nrlr.nrl_num_regs > MAX_NUM_REG_ENTRIES) {
383 nrlr.nrl_num_regs = MAX_NUM_REG_ENTRIES;
384 }
385
386 /*
387 * If the caller specified a buffer, copy out the Nexus provider
388 * entries to caller gracefully. We only copy out the number of
389 * entries which caller has asked for, but we always tell caller
390 * how big the buffer really needs to be.
391 */
392 tmp_ptr = nrlr.nrl_regs;
393 if (tmp_ptr != USER_ADDR_NULL && nrlr.nrl_num_regs > 0) {
394 nres_sz = (size_t)nrlr.nrl_num_regs * sizeof(*nres);
395 nres = sk_alloc_data(nres_sz, Z_WAITOK, skmem_tag_sysctl_buf);
396 if (__improbable(nres == NULL)) {
397 return ENOBUFS;
398 }
399 }
400
401 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
402 PRIV_SKYWALK_OBSERVE_ALL) == 0);
403
404 SK_LOCK();
405 /*
406 * Count number of providers. If buffer space exists and
407 * remains, copy out provider entries.
408 */
409 nregs = nrlr.nrl_num_regs;
410 pnre = nres;
411
412 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
413 /*
414 * Return only entries that are visible to the caller,
415 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
416 */
417 if (nxprov->nxprov_ctl != nxctl && !observeall) {
418 continue;
419 }
420
421 if (nres != NULL && nregs > 0) {
422 uuid_copy(pnre->npre_prov_uuid, nxprov->nxprov_uuid);
423 bcopy(nxprov->nxprov_params, &pnre->npre_prov_params,
424 sizeof(struct nxprov_params));
425 --nregs;
426 ++pnre;
427 ++ncregs;
428 }
429 }
430 SK_UNLOCK();
431
432 if (ncregs == 0) {
433 err = ENOENT;
434 }
435
436 if (nres != NULL) {
437 if (err == 0 && tmp_ptr != USER_ADDR_NULL) {
438 if (sopt->sopt_p != kernproc) {
439 err = copyout(nres, tmp_ptr,
440 ncregs * sizeof(*nres));
441 } else {
442 caddr_t tmp;
443 tmp = __unsafe_forge_bidi_indexable(caddr_t,
444 CAST_DOWN(caddr_t, tmp_ptr),
445 ncregs * sizeof(*nres));
446 bcopy(nres, tmp, ncregs * sizeof(*nres));
447 }
448 }
449 sk_free_data(nres, nres_sz);
450 nres = NULL;
451 }
452
453 if (err == 0) {
454 nrlr.nrl_num_regs = ncregs;
455 err = sooptcopyout(sopt, &nrlr, sizeof(nrlr));
456 }
457
458 return err;
459 }
460
461 /* Hoisted out of line to reduce kernel stack footprint */
462 SK_NO_INLINE_ATTRIBUTE
463 static int
nxctl_get_nexus_prov_entry(struct nxctl * nxctl,struct sockopt * sopt)464 nxctl_get_nexus_prov_entry(struct nxctl *nxctl, struct sockopt *sopt)
465 {
466 struct nxprov_reg_ent nre;
467 struct kern_nexus_provider *nxprov = NULL;
468 int err = 0;
469
470 NXCTL_LOCK_ASSERT_HELD(nxctl);
471
472 ASSERT(sopt->sopt_p != NULL);
473 if (sopt->sopt_val == USER_ADDR_NULL) {
474 return EINVAL;
475 }
476
477 bzero(&nre, sizeof(nre));
478 err = sooptcopyin(sopt, &nre, sizeof(nre), sizeof(nre));
479 if (err != 0) {
480 return err;
481 }
482
483 if (uuid_is_null(nre.npre_prov_uuid)) {
484 return EINVAL;
485 }
486
487 SK_LOCK();
488 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
489 if (uuid_compare(nxprov->nxprov_uuid,
490 nre.npre_prov_uuid) == 0) {
491 /*
492 * Return only entries that are visible to the caller,
493 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
494 */
495 if (nxprov->nxprov_ctl != nxctl) {
496 if (skywalk_priv_check_cred(sopt->sopt_p,
497 nxctl->nxctl_cred,
498 PRIV_SKYWALK_OBSERVE_ALL) != 0) {
499 nxprov = NULL;
500 break;
501 }
502 }
503
504 bcopy(nxprov->nxprov_params, &nre.npre_prov_params,
505 sizeof(struct nxprov_params));
506 break;
507 }
508 }
509 SK_UNLOCK();
510
511 if (nxprov != NULL) {
512 err = sooptcopyout(sopt, &nre, sizeof(nre));
513 } else {
514 err = ENOENT;
515 }
516
517 return err;
518 }
519
520 /* Upper bound on # of nl_num_nx_uuids that we'd return to user space */
521 #define MAX_NUM_NX_UUIDS 4096
522
523 /* Hoisted out of line to reduce kernel stack footprint */
524 SK_NO_INLINE_ATTRIBUTE
525 static int
nxctl_get_nexus_list(struct nxctl * nxctl,struct sockopt * sopt)526 nxctl_get_nexus_list(struct nxctl *nxctl, struct sockopt *sopt)
527 {
528 user_addr_t tmp_ptr = USER_ADDR_NULL;
529 uint32_t nuuids = 0, ncuuids = 0;
530 uuid_t *puuid, *uuids = NULL;
531 size_t uuids_sz;
532 struct nx_list_req nlr;
533 struct kern_nexus_provider *nxprov = NULL;
534 struct kern_nexus *nx = NULL;
535 int err = 0, observeall;
536
537 NXCTL_LOCK_ASSERT_HELD(nxctl);
538
539 ASSERT(sopt->sopt_p != NULL);
540 if (sopt->sopt_val == USER_ADDR_NULL) {
541 return EINVAL;
542 }
543
544 err = sooptcopyin(sopt, &nlr, sizeof(nlr), sizeof(nlr));
545 if (err != 0) {
546 return err;
547 }
548
549 if (uuid_is_null(nlr.nl_prov_uuid)) {
550 return EINVAL;
551 } else if ((size_t)nlr.nl_num_nx_uuids > MAX_NUM_NX_UUIDS) {
552 nlr.nl_num_nx_uuids = MAX_NUM_NX_UUIDS;
553 }
554
555 /*
556 * If the caller specified a buffer, copy out the Nexus UUIDs to
557 * caller gracefully. We only copy out the number of UUIDs which
558 * caller has asked for, but we always tell caller how big the
559 * buffer really needs to be.
560 */
561 tmp_ptr = nlr.nl_nx_uuids;
562 if (tmp_ptr != USER_ADDR_NULL && nlr.nl_num_nx_uuids > 0) {
563 uuids_sz = (size_t)nlr.nl_num_nx_uuids * sizeof(uuid_t);
564 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
565 if (__improbable(uuids == NULL)) {
566 return ENOBUFS;
567 }
568 }
569
570 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
571 PRIV_SKYWALK_OBSERVE_ALL) == 0);
572
573 SK_LOCK();
574 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
575 /*
576 * Return only entries that are visible to the caller,
577 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
578 */
579 if (nxprov->nxprov_ctl != nxctl && !observeall) {
580 continue;
581 }
582
583 if (uuid_compare(nxprov->nxprov_uuid, nlr.nl_prov_uuid) == 0) {
584 break;
585 }
586 }
587
588 if (nxprov != NULL) {
589 /*
590 * Count number of Nexus. If buffer space exists
591 * and remains, copy out the Nexus UUIDs.
592 */
593 nuuids = nlr.nl_num_nx_uuids;
594 puuid = uuids;
595
596 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
597 ++ncuuids;
598 if (uuids != NULL && nuuids > 0) {
599 uuid_copy(*puuid, nx->nx_uuid);
600 --nuuids;
601 ++puuid;
602 }
603 }
604 } else {
605 err = ENOENT;
606 }
607 SK_UNLOCK();
608
609 if (uuids != NULL) {
610 if (err == 0 && nxprov != NULL && tmp_ptr != USER_ADDR_NULL) {
611 uintptr_t cnt_uuid;
612
613 /* Note: Pointer arithmetic */
614 cnt_uuid = (uintptr_t)(puuid - uuids);
615 if (cnt_uuid > 0) {
616 if (sopt->sopt_p != kernproc) {
617 err = copyout(uuids, tmp_ptr,
618 cnt_uuid * sizeof(uuid_t));
619 } else {
620 caddr_t tmp;
621 tmp = __unsafe_forge_bidi_indexable(caddr_t,
622 CAST_DOWN(caddr_t, tmp_ptr),
623 cnt_uuid * sizeof(uuid_t));
624 bcopy(uuids, tmp,
625 cnt_uuid * sizeof(uuid_t));
626 }
627 }
628 }
629 sk_free_data(uuids, uuids_sz);
630 uuids = NULL;
631 }
632
633 if (err == 0) {
634 nlr.nl_num_nx_uuids = ncuuids;
635 err = sooptcopyout(sopt, &nlr, sizeof(nlr));
636 }
637
638 return err;
639 }
640
641 /* Hoisted out of line to reduce kernel stack footprint */
642 SK_NO_INLINE_ATTRIBUTE
643 static int
nxctl_nexus_bind(struct nxctl * nxctl,struct sockopt * sopt)644 nxctl_nexus_bind(struct nxctl *nxctl, struct sockopt *sopt)
645 {
646 boolean_t m_pid, m_exec_uuid, m_key;
647 struct nx_bind_req nbr;
648 struct proc *p = PROC_NULL;
649 struct nxbind *nxb = NULL;
650 uint64_t p_uniqueid = -1;
651 pid_t p_pid = -1;
652 struct kern_nexus *nx = NULL;
653 #if SK_LOG
654 uuid_string_t exec_uuidstr;
655 #endif /* SK_LOG */
656 uuid_t p_uuid;
657 void *key = NULL;
658 int err = 0;
659
660 NXCTL_LOCK_ASSERT_HELD(nxctl);
661
662 if (sopt->sopt_val == USER_ADDR_NULL) {
663 return EINVAL;
664 }
665
666 uuid_clear(p_uuid);
667 bzero(&nbr, sizeof(nbr));
668 err = sooptcopyin(sopt, &nbr, sizeof(nbr), sizeof(nbr));
669 if (err != 0) {
670 return err;
671 }
672
673 if (uuid_is_null(nbr.nb_nx_uuid)) {
674 err = EINVAL;
675 goto done_unlocked;
676 }
677
678 nbr.nb_flags &= NBR_MATCH_MASK;
679 if (nbr.nb_flags == 0) {
680 /* must choose one of the match criteria */
681 err = EINVAL;
682 goto done_unlocked;
683 }
684 m_pid = !!(nbr.nb_flags & NBR_MATCH_PID);
685 m_exec_uuid = !!(nbr.nb_flags & NBR_MATCH_EXEC_UUID);
686 m_key = !!(nbr.nb_flags & NBR_MATCH_KEY);
687
688 if (m_pid || m_exec_uuid) {
689 /*
690 * Validate process ID. A valid PID is needed when we're
691 * asked to match by PID, or if asked to match by executable
692 * UUID with a NULL nb_exec_uuid supplied. The latter is
693 * to support the case when a userland Nexus provider isn't
694 * able to acquire its client's executable UUID, but is
695 * able to identify it via PID.
696 */
697 if ((m_pid || uuid_is_null(nbr.nb_exec_uuid)) &&
698 (p = proc_find(nbr.nb_pid)) == PROC_NULL) {
699 err = ESRCH;
700 goto done_unlocked;
701 }
702 /* exclude kernel from the match criteria */
703 if (p == kernproc) {
704 err = EACCES;
705 goto done_unlocked;
706 } else if (p != PROC_NULL) {
707 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
708 p_uniqueid = proc_uniqueid(p);
709 p_pid = proc_pid(p);
710 } else {
711 uuid_copy(p_uuid, nbr.nb_exec_uuid);
712 }
713 }
714
715 if (m_key) {
716 if (nbr.nb_key_len == 0 || nbr.nb_key_len > NEXUS_MAX_KEY_LEN ||
717 nbr.nb_key == USER_ADDR_NULL) {
718 err = EINVAL;
719 goto done_unlocked;
720 }
721
722 key = sk_alloc_data(nbr.nb_key_len, Z_WAITOK, skmem_tag_nx_key);
723 if (__improbable(key == NULL)) {
724 err = ENOMEM;
725 goto done_unlocked;
726 }
727
728 if (sopt->sopt_p != kernproc) {
729 err = copyin(nbr.nb_key, key, nbr.nb_key_len);
730 if (err != 0) {
731 goto done_unlocked;
732 }
733 } else {
734 /*
735 * -fbounds-safety: nbr.nb_key is user_addr_t. Changing
736 * it to a pointer type is risky, so we just forge it
737 * here instead.
738 */
739 void *nb_key = __unsafe_forge_bidi_indexable(void *,
740 nbr.nb_key, nbr.nb_key_len);
741 bcopy(nb_key, key, nbr.nb_key_len);
742 }
743 }
744
745 SK_LOCK();
746 nx = nx_find(nbr.nb_nx_uuid, TRUE);
747 if (nx == NULL || (disable_nxctl_check == 0 &&
748 nx->nx_prov->nxprov_ctl != nxctl &&
749 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
750 err = ENOENT;
751 goto done;
752 }
753
754 /* bind isn't applicable on anonymous nexus provider */
755 if (NX_ANONYMOUS_PROV(nx)) {
756 err = ENXIO;
757 goto done;
758 }
759
760 /* port must be within the domain's range */
761 if (nbr.nb_port != NEXUS_PORT_ANY &&
762 nbr.nb_port >= NXDOM_MAX(NX_DOM(nx), ports)) {
763 err = EDOM;
764 goto done;
765 } else if (nbr.nb_port == NEXUS_PORT_ANY) {
766 /* for now, this is allowed only for kernel clients */
767 if (sopt->sopt_p != kernproc) {
768 err = EPERM;
769 goto done;
770 }
771 }
772
773 nxb = nxb_alloc(Z_WAITOK);
774
775 if (m_pid) {
776 nxb->nxb_flags |= NXBF_MATCH_UNIQUEID;
777 nxb->nxb_uniqueid = p_uniqueid;
778 nxb->nxb_pid = p_pid;
779 }
780 if (m_exec_uuid) {
781 nxb->nxb_flags |= NXBF_MATCH_EXEC_UUID;
782 ASSERT(!uuid_is_null(p_uuid));
783 uuid_copy(nxb->nxb_exec_uuid, p_uuid);
784 }
785 if (m_key) {
786 nxb->nxb_flags |= NXBF_MATCH_KEY;
787 ASSERT(key != NULL);
788 ASSERT(nbr.nb_key_len != 0 &&
789 nbr.nb_key_len <= NEXUS_MAX_KEY_LEN);
790 /*
791 * -fbounds-safety: since nxb_key is __sized_by(nxb_key_len),
792 * its assignment needs to be done side-by-side to nxb_key_len.
793 */
794 nxb->nxb_key = key;
795 key = NULL; /* let nxb_free() free it */
796 nxb->nxb_key_len = nbr.nb_key_len;
797 }
798
799 /*
800 * Bind the creds to the nexus port. If client doesn't have a port,
801 * find one, claim it, and associate the creds to it. Upon success,
802 * the nexus may move the nxbind contents (including the key) to
803 * its own nxbind instance; in that case, nxb_free() below will not
804 * be freeing the key within.
805 */
806 err = NX_DOM(nx)->nxdom_bind_port(nx, &nbr.nb_port, nxb, NULL);
807 if (err != 0) {
808 goto done;
809 }
810
811 ASSERT(nbr.nb_port != NEXUS_PORT_ANY);
812 (void) sooptcopyout(sopt, &nbr, sizeof(nbr));
813
814 SK_D("nexus %p nxb %p port %u flags 0x%x pid %d "
815 "(uniqueid %llu) exec_uuid %s key %p key_len %u",
816 SK_KVA(nx), SK_KVA(nxb), nbr.nb_port, nxb->nxb_flags,
817 nxb->nxb_pid, nxb->nxb_uniqueid,
818 sk_uuid_unparse(nxb->nxb_exec_uuid, exec_uuidstr),
819 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0,
820 nxb->nxb_key_len);
821
822 done:
823 if (nx != NULL) {
824 (void) nx_release_locked(nx);
825 nx = NULL;
826 }
827 SK_UNLOCK();
828
829 done_unlocked:
830 ASSERT(nx == NULL);
831
832 if (nxb != NULL) {
833 nxb_free(nxb);
834 nxb = NULL;
835 }
836 if (key != NULL) {
837 sk_free_data(key, nbr.nb_key_len);
838 key = NULL;
839 }
840 if (p != PROC_NULL) {
841 proc_rele(p);
842 }
843
844 return err;
845 }
846
847 /* Hoisted out of line to reduce kernel stack footprint */
848 SK_NO_INLINE_ATTRIBUTE
849 static int
nxctl_nexus_unbind(struct nxctl * nxctl,struct sockopt * sopt)850 nxctl_nexus_unbind(struct nxctl *nxctl, struct sockopt *sopt)
851 {
852 struct nx_unbind_req nur;
853 struct kern_nexus *nx = NULL;
854 int err = 0;
855
856 NXCTL_LOCK_ASSERT_HELD(nxctl);
857
858 if (sopt->sopt_val == USER_ADDR_NULL) {
859 return EINVAL;
860 }
861
862 bzero(&nur, sizeof(nur));
863 err = sooptcopyin(sopt, &nur, sizeof(nur), sizeof(nur));
864 if (err != 0) {
865 return err;
866 }
867
868 if (uuid_is_null(nur.nu_nx_uuid)) {
869 return EINVAL;
870 }
871
872 SK_LOCK();
873 nx = nx_find(nur.nu_nx_uuid, TRUE);
874 if (nx == NULL || (nx->nx_prov->nxprov_ctl != nxctl &&
875 nxctl != &_kernnxctl)) { /* make exception for kernnxctl */
876 err = ENOENT;
877 goto done;
878 }
879
880 /* unbind isn't applicable on anonymous nexus provider */
881 if (NX_ANONYMOUS_PROV(nx)) {
882 err = ENXIO;
883 goto done;
884 }
885
886 if (nur.nu_port == NEXUS_PORT_ANY) {
887 err = EINVAL;
888 goto done;
889 }
890
891 err = NX_DOM(nx)->nxdom_unbind_port(nx, nur.nu_port);
892
893 done:
894 if (nx != NULL) {
895 (void) nx_release_locked(nx);
896 nx = NULL;
897 }
898 SK_UNLOCK();
899
900 return err;
901 }
902
903 /* Hoisted out of line to reduce kernel stack footprint */
904 SK_NO_INLINE_ATTRIBUTE
905 static int
nxctl_nexus_config(struct nxctl * nxctl,struct sockopt * sopt)906 nxctl_nexus_config(struct nxctl *nxctl, struct sockopt *sopt)
907 {
908 struct kern_nexus *nx = NULL;
909 struct nx_cfg_req ncr;
910 int err = 0;
911
912 NXCTL_LOCK_ASSERT_HELD(nxctl);
913
914 if (sopt->sopt_val == USER_ADDR_NULL) {
915 return EINVAL;
916 }
917
918 bzero(&ncr, sizeof(ncr));
919 err = sooptcopyin(sopt, &ncr, sizeof(ncr), sizeof(ncr));
920 if (err != 0) {
921 return err;
922 }
923
924 if (uuid_is_null(ncr.nc_nx_uuid)) {
925 return EINVAL;
926 }
927
928 SK_LOCK();
929 nx = nx_find(ncr.nc_nx_uuid, TRUE);
930 if (nx == NULL || (disable_nxctl_check == 0 &&
931 nx->nx_prov->nxprov_ctl != nxctl &&
932 nxctl != &_kernnxctl && /* allow kernel/shared user nxctl */
933 nxctl != &_usernxctl)) {
934 err = ENOENT;
935 goto done;
936 }
937
938 if (NX_DOM_PROV(nx)->nxdom_prov_config != NULL) {
939 err = NX_DOM_PROV(nx)->nxdom_prov_config(NX_DOM_PROV(nx),
940 nx, &ncr, sopt->sopt_dir, sopt->sopt_p, nxctl->nxctl_cred);
941 } else {
942 err = EPERM;
943 }
944
945 if (err == 0) {
946 (void) sooptcopyout(sopt, &ncr, sizeof(ncr));
947 }
948 done:
949 if (nx != NULL) {
950 (void) nx_release_locked(nx);
951 nx = NULL;
952 }
953 SK_UNLOCK();
954
955 return err;
956 }
957
958 struct nxbind *
nxb_alloc(zalloc_flags_t how)959 nxb_alloc(zalloc_flags_t how)
960 {
961 struct nxbind *nxb = zalloc_flags(nxbind_zone, how | Z_ZERO);
962
963 if (nxb) {
964 SK_DF(SK_VERB_MEM, "nxb %p ALLOC", SK_KVA(nxb));
965 }
966 return nxb;
967 }
968
969 void
nxb_free(struct nxbind * nxb)970 nxb_free(struct nxbind *nxb)
971 {
972 SK_DF(SK_VERB_MEM, "nxb %p key %p FREE", SK_KVA(nxb),
973 (nxb->nxb_key != NULL) ? SK_KVA(nxb->nxb_key) : 0);
974
975 if (nxb->nxb_key != NULL) {
976 sk_free_data_sized_by(nxb->nxb_key, nxb->nxb_key_len);
977 nxb->nxb_key = NULL;
978 nxb->nxb_key_len = 0;
979 }
980 zfree(nxbind_zone, nxb);
981 }
982
983 /*
984 * nxb0 is assumed to possess the truth, compare nxb1 against it.
985 */
986 boolean_t
nxb_is_equal(struct nxbind * nxb0,struct nxbind * nxb1)987 nxb_is_equal(struct nxbind *nxb0, struct nxbind *nxb1)
988 {
989 ASSERT(nxb0 != NULL && nxb1 != NULL);
990 ASSERT(nxb0 != nxb1);
991
992 /* we always compare using uniqueid and not pid */
993 if ((nxb0->nxb_flags & NXBF_MATCH_UNIQUEID) &&
994 nxb1->nxb_uniqueid != nxb0->nxb_uniqueid) {
995 return FALSE;
996 }
997
998 if ((nxb0->nxb_flags & NXBF_MATCH_EXEC_UUID) &&
999 uuid_compare(nxb1->nxb_exec_uuid, nxb0->nxb_exec_uuid) != 0) {
1000 return FALSE;
1001 }
1002
1003 ASSERT(!(nxb0->nxb_flags & NXBF_MATCH_KEY) ||
1004 (nxb0->nxb_key_len != 0 && nxb0->nxb_key != NULL));
1005
1006 if ((nxb0->nxb_flags & NXBF_MATCH_KEY) &&
1007 (nxb0->nxb_key_len != nxb1->nxb_key_len ||
1008 nxb1->nxb_key == NULL || timingsafe_bcmp(nxb1->nxb_key, nxb0->nxb_key,
1009 nxb1->nxb_key_len) != 0)) {
1010 return FALSE;
1011 }
1012
1013 return TRUE;
1014 }
1015
1016 void
nxb_move(struct nxbind * snxb,struct nxbind * dnxb)1017 nxb_move(struct nxbind *snxb, struct nxbind *dnxb)
1018 {
1019 ASSERT(!(snxb->nxb_flags & NXBF_MATCH_KEY) ||
1020 (snxb->nxb_key_len != 0 && snxb->nxb_key != NULL));
1021
1022 /* in case the destination has a key attached, free it first */
1023 if (dnxb->nxb_key != NULL) {
1024 sk_free_data_sized_by(dnxb->nxb_key, dnxb->nxb_key_len);
1025 dnxb->nxb_key = NULL;
1026 dnxb->nxb_key_len = 0;
1027 }
1028
1029 /* move everything from src to dst, and then wipe out src */
1030 bcopy(snxb, dnxb, sizeof(*dnxb));
1031 bzero(snxb, sizeof(*snxb));
1032 }
1033
1034 /* Upper bound on # of cl_num_ch_uuids that we'd return to user space */
1035 #define MAX_NUM_CH_UUIDS 4096
1036
1037 /* Hoisted out of line to reduce kernel stack footprint */
1038 SK_NO_INLINE_ATTRIBUTE
1039 static int
nxctl_get_channel_list(struct nxctl * nxctl,struct sockopt * sopt)1040 nxctl_get_channel_list(struct nxctl *nxctl, struct sockopt *sopt)
1041 {
1042 user_addr_t tmp_ptr = USER_ADDR_NULL;
1043 uint32_t nuuids = 0, ncuuids = 0;
1044 uuid_t *puuid, *uuids = NULL;
1045 size_t uuids_sz;
1046 struct ch_list_req clr;
1047 struct kern_channel *ch = NULL;
1048 struct kern_nexus *nx = NULL;
1049 struct kern_nexus find;
1050 int err = 0, observeall;
1051
1052 NXCTL_LOCK_ASSERT_HELD(nxctl);
1053
1054 ASSERT(sopt->sopt_p != NULL);
1055 if (sopt->sopt_val == USER_ADDR_NULL) {
1056 return EINVAL;
1057 }
1058
1059 err = sooptcopyin(sopt, &clr, sizeof(clr), sizeof(clr));
1060 if (err != 0) {
1061 return err;
1062 }
1063
1064 if (uuid_is_null(clr.cl_nx_uuid)) {
1065 return EINVAL;
1066 } else if ((size_t)clr.cl_num_ch_uuids > MAX_NUM_CH_UUIDS) {
1067 clr.cl_num_ch_uuids = MAX_NUM_CH_UUIDS;
1068 }
1069
1070 /*
1071 * If the caller specified a buffer, copy out the Channel UUIDs to
1072 * caller gracefully. We only copy out the number of UUIDs which
1073 * caller has asked for, but we always tell caller how big the
1074 * buffer really needs to be.
1075 */
1076 tmp_ptr = clr.cl_ch_uuids;
1077 if (tmp_ptr != USER_ADDR_NULL && clr.cl_num_ch_uuids > 0) {
1078 uuids_sz = (size_t)clr.cl_num_ch_uuids * sizeof(uuid_t);
1079 uuids = sk_alloc_data(uuids_sz, Z_WAITOK, skmem_tag_sysctl_buf);
1080 if (uuids == NULL) {
1081 return ENOBUFS;
1082 }
1083 }
1084
1085 observeall = (skywalk_priv_check_cred(sopt->sopt_p, nxctl->nxctl_cred,
1086 PRIV_SKYWALK_OBSERVE_ALL) == 0);
1087
1088 SK_LOCK();
1089 uuid_copy(find.nx_uuid, clr.cl_nx_uuid);
1090 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1091 if (nx != NULL && NX_PROV(nx)->nxprov_ctl != nxctl && !observeall) {
1092 /*
1093 * Return only entries that are visible to the caller,
1094 * unless it has PRIV_SKYWALK_OBSERVE_ALL.
1095 */
1096 nx = NULL;
1097 }
1098 if (nx != NULL) {
1099 /*
1100 * Count number of Channels. If buffer space exists
1101 * and remains, copy out the Channel UUIDs.
1102 */
1103 nuuids = clr.cl_num_ch_uuids;
1104 puuid = uuids;
1105
1106 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
1107 ++ncuuids;
1108 if (uuids != NULL && nuuids > 0) {
1109 uuid_copy(*puuid, ch->ch_info->cinfo_ch_id);
1110 --nuuids;
1111 ++puuid;
1112 }
1113 }
1114 } else {
1115 err = ENOENT;
1116 }
1117 SK_UNLOCK();
1118
1119 if (uuids != NULL) {
1120 if (err == 0 && nx != NULL && tmp_ptr != USER_ADDR_NULL) {
1121 uintptr_t cnt_uuid;
1122
1123 /* Note: Pointer arithmetic */
1124 cnt_uuid = (uintptr_t)(puuid - uuids);
1125 ASSERT(cnt_uuid > 0);
1126
1127 if (sopt->sopt_p != kernproc) {
1128 err = copyout(uuids, tmp_ptr,
1129 cnt_uuid * sizeof(uuid_t));
1130 } else {
1131 caddr_t tmp;
1132 tmp = __unsafe_forge_bidi_indexable(caddr_t,
1133 CAST_DOWN(caddr_t, tmp_ptr),
1134 cnt_uuid * sizeof(uuid_t));
1135 bcopy(uuids, tmp, cnt_uuid * sizeof(uuid_t));
1136 }
1137 }
1138 sk_free_data(uuids, uuids_sz);
1139 uuids = NULL;
1140 }
1141
1142 if (err == 0) {
1143 clr.cl_num_ch_uuids = ncuuids;
1144 err = sooptcopyout(sopt, &clr, sizeof(clr));
1145 }
1146
1147 return err;
1148 }
1149
1150 static void
nxctl_init(struct nxctl * nxctl,struct proc * p,struct fileproc * fp)1151 nxctl_init(struct nxctl *nxctl, struct proc *p, struct fileproc *fp)
1152 {
1153 uuid_t p_uuid;
1154
1155 bzero(nxctl, sizeof(*nxctl));
1156
1157 proc_getexecutableuuid(p, p_uuid, sizeof(p_uuid));
1158
1159 lck_mtx_init(&nxctl->nxctl_lock, &nexus_lock_group, &nexus_lock_attr);
1160 uuid_copy(nxctl->nxctl_proc_uuid, p_uuid);
1161 nxctl->nxctl_proc_uniqueid = proc_uniqueid(p);
1162 nxctl->nxctl_cred = kauth_cred_proc_ref(p);
1163 nxctl->nxctl_fp = fp;
1164 if (nxctl == &_kernnxctl) {
1165 ASSERT(p == kernproc);
1166 nxctl->nxctl_flags |= NEXUSCTLF_KERNEL;
1167 }
1168 if (nxctl == &_usernxctl) {
1169 ASSERT(p == kernproc);
1170 nxctl->nxctl_cred = NULL;
1171 }
1172 if (fp == NULL) {
1173 nxctl->nxctl_flags |= NEXUSCTLF_NOFDREF;
1174 }
1175 }
1176
1177 static struct nxctl *
nxctl_alloc(struct proc * p,struct fileproc * fp,zalloc_flags_t how)1178 nxctl_alloc(struct proc *p, struct fileproc *fp, zalloc_flags_t how)
1179 {
1180 struct nxctl *nxctl = zalloc_flags(nxctl_zone, how);
1181
1182 if (nxctl != NULL) {
1183 nxctl_init(nxctl, p, fp);
1184 }
1185 return nxctl;
1186 }
1187
1188 static void
nxctl_free(struct nxctl * nxctl)1189 nxctl_free(struct nxctl *nxctl)
1190 {
1191 ASSERT(nxctl->nxctl_refcnt == 0);
1192 ASSERT(!(nxctl->nxctl_flags & NEXUSCTLF_ATTACHED));
1193 kauth_cred_unref(&nxctl->nxctl_cred);
1194 lck_mtx_destroy(&nxctl->nxctl_lock, &nexus_lock_group);
1195 SK_D("nxctl %p FREE", SK_KVA(nxctl));
1196 if (!(nxctl->nxctl_flags & NEXUSCTLF_KERNEL)) {
1197 zfree(nxctl_zone, nxctl);
1198 }
1199 }
1200
1201 static void
nxctl_retain_locked(struct nxctl * nxctl)1202 nxctl_retain_locked(struct nxctl *nxctl)
1203 {
1204 SK_LOCK_ASSERT_HELD();
1205
1206 nxctl->nxctl_refcnt++;
1207 ASSERT(nxctl->nxctl_refcnt != 0);
1208 }
1209
1210 void
nxctl_retain(struct nxctl * nxctl)1211 nxctl_retain(struct nxctl *nxctl)
1212 {
1213 SK_LOCK();
1214 nxctl_retain_locked(nxctl);
1215 SK_UNLOCK();
1216 }
1217
1218 static int
nxctl_release_locked(struct nxctl * nxctl)1219 nxctl_release_locked(struct nxctl *nxctl)
1220 {
1221 int oldref = nxctl->nxctl_refcnt;
1222
1223 SK_LOCK_ASSERT_HELD();
1224
1225 ASSERT(nxctl->nxctl_refcnt != 0);
1226 if (--nxctl->nxctl_refcnt == 0) {
1227 nxctl_free(nxctl);
1228 }
1229
1230 return oldref == 1;
1231 }
1232
1233 int
nxctl_release(struct nxctl * nxctl)1234 nxctl_release(struct nxctl *nxctl)
1235 {
1236 int lastref;
1237
1238 SK_LOCK();
1239 lastref = nxctl_release_locked(nxctl);
1240 SK_UNLOCK();
1241
1242 return lastref;
1243 }
1244
1245 /* XXX
1246 * -fbounds-safety: Why is this taking a void *? All callers are passing nxctl.
1247 * How come there's no nxctl_ctor?
1248 */
1249 void
nxctl_dtor(struct nxctl * arg)1250 nxctl_dtor(struct nxctl *arg)
1251 {
1252 struct nxctl *nxctl = arg;
1253
1254 nxctl_close(nxctl);
1255 SK_LOCK();
1256 (void) nxctl_release_locked(nxctl);
1257 SK_UNLOCK();
1258 }
1259
1260 int
nxprov_advise_connect(struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)1261 nxprov_advise_connect(struct kern_nexus *nx, struct kern_channel *ch,
1262 struct proc *p)
1263 {
1264 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1265 int err = 0;
1266
1267 ASSERT(!(ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)));
1268 ASSERT(ch->ch_ctx == NULL);
1269
1270 SK_LOCK_ASSERT_HELD();
1271 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1272
1273 if ((ch->ch_flags & CHANF_EXT_SKIP) ||
1274 (nxprov->nxprov_ext.nxpi_pre_connect == NULL ||
1275 nxprov->nxprov_ext.nxpi_connected == NULL)) {
1276 return 0;
1277 }
1278
1279 ch_retain_locked(ch);
1280 lck_mtx_unlock(&ch->ch_lock);
1281 SK_UNLOCK();
1282 lck_mtx_lock(&ch->ch_lock);
1283
1284 err = nxprov->nxprov_ext.nxpi_pre_connect(nxprov, p, nx,
1285 ch->ch_info->cinfo_nx_port, ch, &ch->ch_ctx);
1286 if (err != 0) {
1287 SK_D("ch %p flags %x nx %p pre_connect "
1288 "error %d", SK_KVA(ch), ch->ch_flags, SK_KVA(nx), err);
1289 ch->ch_ctx = NULL;
1290 goto done;
1291 }
1292 /*
1293 * Upon ring/slot init failure, this is cleared
1294 * by nxprov_advise_disconnect() below.
1295 */
1296 os_atomic_or(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
1297 if (NXPROV_LLINK(nxprov)) {
1298 err = nx_netif_llink_ext_init_default_queues(nx);
1299 } else {
1300 err = nx_init_rings(nx, ch);
1301 }
1302 if (err != 0) {
1303 goto done;
1304 }
1305 ASSERT(err == 0);
1306 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT |
1307 CHANF_EXT_CONNECTED)) == CHANF_EXT_PRECONNECT);
1308
1309 err = nxprov->nxprov_ext.nxpi_connected(nxprov, nx, ch);
1310 if (err != 0) {
1311 SK_D("ch %p flags %x nx %p connected error %d",
1312 SK_KVA(ch), ch->ch_flags, SK_KVA(nx), err);
1313 goto done;
1314 }
1315 os_atomic_or(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
1316 SK_D("ch %p flags %x nx %p connected",
1317 SK_KVA(ch), ch->ch_flags, SK_KVA(nx));
1318
1319
1320 done:
1321 lck_mtx_unlock(&ch->ch_lock);
1322 SK_LOCK();
1323 lck_mtx_lock(&ch->ch_lock);
1324 if ((err != 0) &&
1325 (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT))) {
1326 nxprov_advise_disconnect(nx, ch);
1327 }
1328 /* caller is expected to hold one, in addition to ourselves */
1329 VERIFY(ch->ch_refcnt >= 2);
1330 ch_release_locked(ch);
1331
1332 return err;
1333 }
1334
1335 void
nxprov_advise_disconnect(struct kern_nexus * nx,struct kern_channel * ch)1336 nxprov_advise_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
1337 {
1338 struct kern_nexus_provider *nxprov = NX_PROV(nx);
1339
1340 SK_LOCK_ASSERT_HELD();
1341 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1342
1343 /* check as we might be called in the error handling path */
1344 if (ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)) {
1345 ch_retain_locked(ch);
1346 lck_mtx_unlock(&ch->ch_lock);
1347 SK_UNLOCK();
1348 lck_mtx_lock(&ch->ch_lock);
1349
1350 ASSERT(!(ch->ch_flags & CHANF_EXT_SKIP));
1351 if (ch->ch_flags & CHANF_EXT_CONNECTED) {
1352 nxprov->nxprov_ext.nxpi_pre_disconnect(nxprov, nx, ch);
1353 os_atomic_andnot(&ch->ch_flags, CHANF_EXT_CONNECTED, relaxed);
1354 }
1355
1356 /*
1357 * Inform the external domain provider that the rings
1358 * and slots for this channel are no longer valid.
1359 */
1360 if (NXPROV_LLINK(nxprov)) {
1361 nx_netif_llink_ext_fini_default_queues(nx);
1362 } else {
1363 nx_fini_rings(nx, ch);
1364 }
1365
1366 ASSERT(ch->ch_flags & CHANF_EXT_PRECONNECT);
1367 nxprov->nxprov_ext.nxpi_disconnected(nxprov, nx, ch);
1368 os_atomic_andnot(&ch->ch_flags, CHANF_EXT_PRECONNECT, relaxed);
1369
1370 SK_D("ch %p flags %x nx %p disconnected",
1371 SK_KVA(ch), ch->ch_flags, SK_KVA(nx));
1372
1373 /* We're done with this channel */
1374 ch->ch_ctx = NULL;
1375
1376 lck_mtx_unlock(&ch->ch_lock);
1377 SK_LOCK();
1378 lck_mtx_lock(&ch->ch_lock);
1379 /* caller is expected to hold one, in addition to ourselves */
1380 VERIFY(ch->ch_refcnt >= 2);
1381 ch_release_locked(ch);
1382 }
1383 ASSERT(!(ch->ch_flags & (CHANF_EXT_CONNECTED | CHANF_EXT_PRECONNECT)));
1384 ASSERT(ch->ch_ctx == NULL);
1385 }
1386
1387 static struct kern_nexus_provider *
nxprov_create_common(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1388 nxprov_create_common(struct nxctl *nxctl,
1389 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1390 const struct kern_nexus_provider_init *init, int *err)
1391 {
1392 struct skmem_region_params srp[SKMEM_REGIONS];
1393 struct kern_nexus_provider *nxprov = NULL;
1394 struct nxprov_params nxp;
1395 uint32_t override = 0;
1396 uint32_t pp_region_config_flags;
1397 int i;
1398
1399 static_assert(sizeof(*init) == sizeof(nxprov->nxprov_ext));
1400 static_assert(sizeof(*init) >= sizeof(struct kern_nexus_netif_provider_init));
1401
1402 SK_LOCK_ASSERT_HELD();
1403 ASSERT(nxctl != NULL && reg != NULL && nxdom_prov != NULL);
1404
1405 pp_region_config_flags = PP_REGION_CONFIG_MD_MAGAZINE_ENABLE |
1406 PP_REGION_CONFIG_BUF_IODIR_BIDIR;
1407 /*
1408 * Special handling for external nexus providers; similar
1409 * logic to what's done in kern_pbufpool_create().
1410 */
1411 if (init != NULL) {
1412 if (init->nxpi_flags & NXPIF_MONOLITHIC) {
1413 pp_region_config_flags |=
1414 PP_REGION_CONFIG_BUF_MONOLITHIC;
1415 }
1416
1417 if (init->nxpi_flags & NXPIF_INHIBIT_CACHE) {
1418 pp_region_config_flags |=
1419 PP_REGION_CONFIG_BUF_NOCACHE;
1420 }
1421 }
1422
1423 /*
1424 * For network devices, set the packet metadata memory as persistent
1425 * so that it is wired at segment creation. This allows us to access
1426 * it with preemption disabled, as well as for rdar://problem/46511741.
1427 */
1428 if (nxdom_prov->nxdom_prov_dom->nxdom_type == NEXUS_TYPE_NET_IF) {
1429 pp_region_config_flags |= PP_REGION_CONFIG_MD_PERSISTENT;
1430 }
1431
1432 /* process and validate provider parameters */
1433 if ((*err = nxdom_prov_validate_params(nxdom_prov, reg,
1434 &nxp, srp, override, pp_region_config_flags)) != 0) {
1435 goto done;
1436 }
1437
1438 nxprov = nxprov_alloc(nxdom_prov, Z_WAITOK);
1439 ASSERT(nxprov->nxprov_dom_prov == nxdom_prov);
1440
1441 STAILQ_INIT(&nxprov->nxprov_nx_head);
1442 STAILQ_INSERT_TAIL(&nxprov_head, nxprov, nxprov_link);
1443 nxprov->nxprov_flags |= NXPROVF_ATTACHED;
1444 nxprov->nxprov_ctl = nxctl;
1445 uuid_generate_random(nxprov->nxprov_uuid);
1446 bcopy(&nxp, nxprov->nxprov_params, sizeof(struct nxprov_params));
1447
1448 if (init != NULL) {
1449 if (init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF) {
1450 ASSERT(NXPROV_LLINK(nxprov));
1451 bcopy(init, &nxprov->nxprov_netif_ext,
1452 sizeof(nxprov->nxprov_netif_ext));
1453 } else {
1454 ASSERT(!NXPROV_LLINK(nxprov));
1455 ASSERT(init->nxpi_version ==
1456 KERN_NEXUS_PROVIDER_CURRENT_VERSION);
1457 bcopy(init, &nxprov->nxprov_ext, sizeof(*init));
1458 }
1459 nxprov->nxprov_flags |= NXPROVF_EXTERNAL;
1460 }
1461
1462 /* store validated region parameters to the provider */
1463 for (i = 0; i < SKMEM_REGIONS; i++) {
1464 nxprov->nxprov_region_params[i] = srp[i];
1465 }
1466
1467 if (nxprov->nxprov_flags & NXPROVF_EXTERNAL) {
1468 uint32_t nxpi_flags = nxprov->nxprov_ext.nxpi_flags;
1469
1470 if (nxpi_flags & NXPIF_VIRTUAL_DEVICE) {
1471 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1472 }
1473 } else if (nxdom_prov->nxdom_prov_dom->nxdom_type !=
1474 NEXUS_TYPE_NET_IF) {
1475 /*
1476 * Treat non-netif built-in nexus providers as those
1477 * meant for inter-process communications, i.e. there
1478 * is no actual networking hardware involved.
1479 */
1480 nxprov->nxprov_flags |= NXPROVF_VIRTUAL_DEVICE;
1481 }
1482
1483 nxprov_retain_locked(nxprov); /* one for being in the list */
1484 nxprov_retain_locked(nxprov); /* one for the caller */
1485
1486 #if SK_LOG
1487 uuid_string_t uuidstr;
1488 SK_D("nxprov %p UUID %s", SK_KVA(nxprov),
1489 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr));
1490 #endif /* SK_LOG */
1491
1492 done:
1493 return nxprov;
1494 }
1495
1496 struct kern_nexus_provider *
nxprov_create(struct proc * p,struct nxctl * nxctl,struct nxprov_reg * reg,int * err)1497 nxprov_create(struct proc *p, struct nxctl *nxctl, struct nxprov_reg *reg,
1498 int *err)
1499 {
1500 struct nxprov_params *nxp = ®->nxpreg_params;
1501 struct kern_nexus_domain_provider *nxdom_prov = NULL;
1502 struct kern_nexus_provider *nxprov = NULL;
1503
1504 NXCTL_LOCK_ASSERT_HELD(nxctl);
1505
1506 ASSERT(nxctl->nxctl_cred != proc_ucred_unsafe(kernproc));
1507 *err = 0;
1508
1509 switch (nxp->nxp_type) {
1510 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1511 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1512 PRIV_SKYWALK_REGISTER_USER_PIPE);
1513 break;
1514
1515 case NEXUS_TYPE_FLOW_SWITCH: /* allowed for userland */
1516 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1517 PRIV_SKYWALK_REGISTER_FLOW_SWITCH);
1518 break;
1519
1520 case NEXUS_TYPE_NET_IF: /* allowed for userland */
1521 *err = skywalk_priv_check_cred(p, nxctl->nxctl_cred,
1522 PRIV_SKYWALK_REGISTER_NET_IF);
1523 break;
1524
1525 case NEXUS_TYPE_KERNEL_PIPE: /* only for kernel */
1526 default:
1527 *err = EINVAL;
1528 goto done;
1529 }
1530
1531 if (*err != 0) {
1532 goto done;
1533 }
1534
1535 ASSERT(nxp->nxp_type < NEXUS_TYPE_MAX);
1536 if ((nxdom_prov = nxdom_prov_default[nxp->nxp_type]) == NULL) {
1537 *err = ENXIO;
1538 goto done;
1539 }
1540
1541 #if CONFIG_NEXUS_NETIF
1542 /* make sure netif_compat is the default here */
1543 ASSERT(nxp->nxp_type != NEXUS_TYPE_NET_IF ||
1544 strbufcmp(nxdom_prov->nxdom_prov_name, sizeof(nxdom_prov->nxdom_prov_name),
1545 NEXUS_PROVIDER_NET_IF_COMPAT, sizeof(NEXUS_PROVIDER_NET_IF_COMPAT)) == 0);
1546 #endif /* CONFIG_NEXUS_NETIF */
1547
1548 SK_LOCK();
1549 /* callee holds a reference for our caller upon success */
1550 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, NULL, err);
1551 SK_UNLOCK();
1552 done:
1553 return nxprov;
1554 }
1555
1556 struct kern_nexus_provider *
nxprov_create_kern(struct nxctl * nxctl,struct kern_nexus_domain_provider * nxdom_prov,struct nxprov_reg * reg,const struct kern_nexus_provider_init * init,int * err)1557 nxprov_create_kern(struct nxctl *nxctl,
1558 struct kern_nexus_domain_provider *nxdom_prov, struct nxprov_reg *reg,
1559 const struct kern_nexus_provider_init *init, int *err)
1560 {
1561 struct nxprov_params *nxp = ®->nxpreg_params;
1562 struct kern_nexus_provider *nxprov = NULL;
1563
1564 NXCTL_LOCK_ASSERT_HELD(nxctl);
1565 SK_LOCK_ASSERT_HELD();
1566
1567 ASSERT(nxctl->nxctl_cred == proc_ucred_unsafe(kernproc));
1568 ASSERT(nxp->nxp_type == nxdom_prov->nxdom_prov_dom->nxdom_type);
1569 ASSERT(init == NULL ||
1570 init->nxpi_version == KERN_NEXUS_PROVIDER_CURRENT_VERSION ||
1571 init->nxpi_version == KERN_NEXUS_PROVIDER_VERSION_NETIF);
1572
1573 *err = 0;
1574
1575 switch (nxp->nxp_type) {
1576 case NEXUS_TYPE_NET_IF:
1577 break;
1578 case NEXUS_TYPE_KERNEL_PIPE:
1579 if (init == NULL) {
1580 *err = EINVAL;
1581 goto done;
1582 }
1583 break;
1584 case NEXUS_TYPE_FLOW_SWITCH:
1585 if (init != NULL) {
1586 *err = EINVAL;
1587 goto done;
1588 }
1589 break;
1590
1591 case NEXUS_TYPE_USER_PIPE: /* only for userland */
1592 default:
1593 *err = EINVAL;
1594 goto done;
1595 }
1596
1597 /* callee holds a reference for our caller upon success */
1598 nxprov = nxprov_create_common(nxctl, nxdom_prov, reg, init, err);
1599
1600 done:
1601 return nxprov;
1602 }
1603
1604 int
nxprov_destroy(struct nxctl * nxctl,const uuid_t nxprov_uuid)1605 nxprov_destroy(struct nxctl *nxctl, const uuid_t nxprov_uuid)
1606 {
1607 struct kern_nexus_provider *nxprov = NULL;
1608 int err = 0;
1609
1610 NXCTL_LOCK_ASSERT_HELD(nxctl);
1611
1612 SK_LOCK();
1613
1614 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1615 if (nxctl == nxprov->nxprov_ctl &&
1616 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1617 nxprov_retain_locked(nxprov);
1618 break;
1619 }
1620 }
1621
1622 if (nxprov == NULL) {
1623 err = ENOENT;
1624 } else {
1625 err = nxprov_close(nxprov, TRUE);
1626 }
1627
1628 if (nxprov != NULL) {
1629 (void) nxprov_release_locked(nxprov);
1630 }
1631
1632 SK_UNLOCK();
1633
1634 return err;
1635 }
1636
1637 int
nxprov_close(struct kern_nexus_provider * nxprov,boolean_t locked)1638 nxprov_close(struct kern_nexus_provider *nxprov, boolean_t locked)
1639 {
1640 int err = 0;
1641
1642 if (!locked) {
1643 SK_LOCK();
1644 }
1645
1646 SK_LOCK_ASSERT_HELD();
1647
1648 #if SK_LOG
1649 uuid_string_t uuidstr;
1650 SK_D("nxprov %p UUID %s flags 0x%x", SK_KVA(nxprov),
1651 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1652 nxprov->nxprov_flags);
1653 #endif /* SK_LOG */
1654
1655 if (nxprov->nxprov_flags & NXPROVF_CLOSED) {
1656 err = EALREADY;
1657 } else {
1658 struct kern_nexus *nx, *tnx;
1659
1660 nxprov->nxprov_ctl = NULL;
1661
1662 STAILQ_FOREACH_SAFE(nx, &nxprov->nxprov_nx_head,
1663 nx_prov_link, tnx) {
1664 nx_retain_locked(nx);
1665 (void) nx_close(nx, TRUE);
1666 (void) nx_release_locked(nx);
1667 }
1668
1669 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head)) {
1670 /* no nexus created on this, so detach now */
1671 nxprov_detach(nxprov, TRUE);
1672 } else {
1673 /* detach when last nexus is destroyed */
1674 ASSERT(nxprov->nxprov_refcnt > 1);
1675 nxprov->nxprov_flags |= NXPROVF_CLOSED;
1676 }
1677 }
1678
1679 if (!locked) {
1680 SK_UNLOCK();
1681 }
1682
1683 return err;
1684 }
1685
1686 static void
nxprov_detach(struct kern_nexus_provider * nxprov,boolean_t locked)1687 nxprov_detach(struct kern_nexus_provider *nxprov, boolean_t locked)
1688 {
1689 if (!locked) {
1690 SK_LOCK();
1691 }
1692
1693 SK_LOCK_ASSERT_HELD();
1694
1695 #if SK_LOG
1696 uuid_string_t uuidstr;
1697 SK_D("nxprov %p UUID %s flags 0x%x", SK_KVA(nxprov),
1698 sk_uuid_unparse(nxprov->nxprov_uuid, uuidstr),
1699 nxprov->nxprov_flags);
1700 #endif /* SK_LOG */
1701
1702 ASSERT(nxprov->nxprov_flags & NXPROVF_ATTACHED);
1703 STAILQ_REMOVE(&nxprov_head, nxprov, kern_nexus_provider, nxprov_link);
1704 nxprov->nxprov_flags &= ~NXPROVF_ATTACHED;
1705
1706 /* caller must hold an extra ref */
1707 ASSERT(nxprov->nxprov_refcnt > 1);
1708 (void) nxprov_release_locked(nxprov);
1709
1710 if (!locked) {
1711 SK_UNLOCK();
1712 }
1713 }
1714
1715 static struct kern_nexus_provider *
nxprov_alloc(struct kern_nexus_domain_provider * nxdom_prov,zalloc_flags_t how)1716 nxprov_alloc(struct kern_nexus_domain_provider *nxdom_prov, zalloc_flags_t how)
1717 {
1718 struct kern_nexus_provider *nxprov;
1719 struct nxprov_params *nxp;
1720
1721 ASSERT(nxdom_prov != NULL);
1722
1723 nxp = nxprov_params_alloc(how);
1724 if (nxp == NULL) {
1725 SK_ERR("Failed to allocate nxprov_params");
1726 return NULL;
1727 }
1728
1729 nxprov = zalloc_flags(nxprov_zone, how | Z_ZERO);
1730 if (nxprov == NULL) {
1731 SK_ERR("Failed to allocate nxprov");
1732 nxprov_params_free(nxp);
1733 return NULL;
1734 }
1735
1736 nxprov->nxprov_dom_prov = nxdom_prov;
1737 nxprov->nxprov_params = nxp;
1738 /* hold a reference for nxprov */
1739 nxdom_prov_retain_locked(nxdom_prov);
1740
1741 return nxprov;
1742 }
1743
1744 static void
nxprov_free(struct kern_nexus_provider * nxprov)1745 nxprov_free(struct kern_nexus_provider *nxprov)
1746 {
1747 struct kern_nexus_domain_provider *nxdom_prov =
1748 nxprov->nxprov_dom_prov;
1749
1750 SK_LOCK_ASSERT_HELD();
1751
1752 ASSERT(nxdom_prov != NULL);
1753 (void) nxdom_prov_release_locked(nxdom_prov);
1754 nxprov->nxprov_dom_prov = NULL;
1755 ASSERT(nxprov->nxprov_params != NULL);
1756 nxprov_params_free(nxprov->nxprov_params);
1757 nxprov->nxprov_params = NULL;
1758 ASSERT(!(nxprov->nxprov_flags & NXPROVF_ATTACHED));
1759 SK_DF(SK_VERB_MEM, "nxprov %p FREE", SK_KVA(nxprov));
1760 zfree(nxprov_zone, nxprov);
1761 }
1762
1763 static void
nxprov_retain_locked(struct kern_nexus_provider * nxprov)1764 nxprov_retain_locked(struct kern_nexus_provider *nxprov)
1765 {
1766 SK_LOCK_ASSERT_HELD();
1767
1768 nxprov->nxprov_refcnt++;
1769 ASSERT(nxprov->nxprov_refcnt != 0);
1770 }
1771
1772 void
nxprov_retain(struct kern_nexus_provider * nxprov)1773 nxprov_retain(struct kern_nexus_provider *nxprov)
1774 {
1775 SK_LOCK();
1776 nxprov_retain_locked(nxprov);
1777 SK_UNLOCK();
1778 }
1779
1780 static int
nxprov_release_locked(struct kern_nexus_provider * nxprov)1781 nxprov_release_locked(struct kern_nexus_provider *nxprov)
1782 {
1783 int oldref = nxprov->nxprov_refcnt;
1784
1785 SK_LOCK_ASSERT_HELD();
1786
1787 ASSERT(nxprov->nxprov_refcnt != 0);
1788 if (--nxprov->nxprov_refcnt == 0) {
1789 nxprov_free(nxprov);
1790 }
1791
1792 return oldref == 1;
1793 }
1794
1795 int
nxprov_release(struct kern_nexus_provider * nxprov)1796 nxprov_release(struct kern_nexus_provider *nxprov)
1797 {
1798 int lastref;
1799
1800 SK_LOCK();
1801 lastref = nxprov_release_locked(nxprov);
1802 SK_UNLOCK();
1803
1804 return lastref;
1805 }
1806
1807 struct nxprov_params *
nxprov_params_alloc(zalloc_flags_t how)1808 nxprov_params_alloc(zalloc_flags_t how)
1809 {
1810 return zalloc_flags(nxprov_params_zone, how | Z_ZERO);
1811 }
1812
1813 void
nxprov_params_free(struct nxprov_params * nxp)1814 nxprov_params_free(struct nxprov_params *nxp)
1815 {
1816 SK_DF(SK_VERB_MEM, "nxp %p FREE", SK_KVA(nxp));
1817 zfree(nxprov_params_zone, nxp);
1818 }
1819
1820 static int
nx_check_pp(struct kern_nexus_provider * nxprov,struct kern_pbufpool * pp)1821 nx_check_pp(struct kern_nexus_provider *nxprov, struct kern_pbufpool *pp)
1822 {
1823 struct kern_nexus_domain_provider *nxdom_prov = nxprov->nxprov_dom_prov;
1824
1825 if ((pp->pp_flags & (PPF_EXTERNAL | PPF_CLOSED)) != PPF_EXTERNAL) {
1826 SK_ERR("Rejecting \"%s\" built-in pp", pp->pp_name);
1827 return ENOTSUP;
1828 }
1829
1830 /*
1831 * Require that the nexus domain metadata type and the
1832 * metadata type of the caller-provided pbufpool match.
1833 */
1834 if (nxdom_prov->nxdom_prov_dom->nxdom_md_type !=
1835 pp->pp_md_type ||
1836 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype !=
1837 pp->pp_md_subtype) {
1838 SK_ERR("Mismatch in metadata type/subtype "
1839 "(%u/%u != %u/%u)", pp->pp_md_type,
1840 nxdom_prov->nxdom_prov_dom->nxdom_md_type,
1841 pp->pp_md_subtype,
1842 nxdom_prov->nxdom_prov_dom->nxdom_md_subtype);
1843 return EINVAL;
1844 }
1845
1846 /*
1847 * Require that the nexus provider memory configuration
1848 * has the same impedance as the caller-provided one.
1849 * Both need to be lacking or present; if one of them
1850 * is set and the other isn't, then we bail.
1851 */
1852 if (!!(PP_BUF_REGION_DEF(pp)->skr_mode & SKR_MODE_MONOLITHIC) ^
1853 !!(nxprov->nxprov_ext.nxpi_flags & NXPIF_MONOLITHIC)) {
1854 SK_ERR("Memory config mismatch: monolithic mode");
1855 return EINVAL;
1856 }
1857
1858 return 0;
1859 }
1860
1861 struct kern_nexus *
nx_create(struct nxctl * nxctl,const uuid_t nxprov_uuid,const nexus_type_t dom_type,const void * nx_ctx,nexus_ctx_release_fn_t nx_ctx_release,struct kern_pbufpool * tx_pp,struct kern_pbufpool * rx_pp,int * err)1862 nx_create(struct nxctl *nxctl, const uuid_t nxprov_uuid,
1863 const nexus_type_t dom_type, const void *nx_ctx,
1864 nexus_ctx_release_fn_t nx_ctx_release, struct kern_pbufpool *tx_pp,
1865 struct kern_pbufpool *rx_pp, int *err)
1866 {
1867 struct kern_nexus_domain_provider *nxdom_prov;
1868 struct kern_nexus_provider *nxprov = NULL;
1869 struct kern_nexus *nx = NULL;
1870 #if SK_LOG
1871 uuid_string_t uuidstr;
1872 #endif /* SK_LOG */
1873
1874 NXCTL_LOCK_ASSERT_HELD(nxctl);
1875
1876 ASSERT(dom_type < NEXUS_TYPE_MAX);
1877 ASSERT(!uuid_is_null(nxprov_uuid));
1878 *err = 0;
1879
1880 SK_LOCK();
1881
1882 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
1883 if (nxctl == nxprov->nxprov_ctl &&
1884 uuid_compare(nxprov_uuid, nxprov->nxprov_uuid) == 0) {
1885 break;
1886 }
1887 }
1888
1889 if (nxprov == NULL || (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
1890 SK_ERR("Provider not found or has been closed");
1891 *err = ENOENT;
1892 goto done;
1893 }
1894
1895 nxdom_prov = nxprov->nxprov_dom_prov;
1896 if (dom_type != NEXUS_TYPE_UNDEFINED &&
1897 (nxdom_prov->nxdom_prov_dom->nxdom_type != dom_type)) {
1898 SK_ERR("Mismatch in domain type (0x%u != 0x%u)",
1899 dom_type, nxdom_prov->nxdom_prov_dom->nxdom_type);
1900 nxdom_prov = NULL;
1901 nxprov = NULL;
1902 *err = ENODEV;
1903 goto done;
1904 }
1905
1906 if ((dom_type == NEXUS_TYPE_NET_IF) && NXPROV_LLINK(nxprov) &&
1907 (!tx_pp || !rx_pp)) {
1908 #if SK_LOG
1909 SK_ERR("TX/RX packet pool is required for netif logical link "
1910 "nexus provider UUID: %s",
1911 sk_uuid_unparse(nxprov_uuid, uuidstr));
1912 #endif /* SK_LOG */
1913 nxdom_prov = NULL;
1914 nxprov = NULL;
1915 *err = EINVAL;
1916 goto done;
1917 }
1918
1919 if ((tx_pp != NULL && (*err = nx_check_pp(nxprov, tx_pp)) != 0) ||
1920 (rx_pp != NULL && (*err = nx_check_pp(nxprov, rx_pp)) != 0)) {
1921 goto done;
1922 }
1923
1924 nx = nx_alloc(Z_WAITOK);
1925
1926 STAILQ_INIT(&nx->nx_ch_head);
1927 STAILQ_INIT(&nx->nx_ch_nonxref_head);
1928 lck_rw_init(&nx->nx_ch_if_adv_lock, &nexus_lock_group,
1929 &nexus_lock_attr);
1930 STAILQ_INIT(&nx->nx_ch_if_adv_head);
1931 uuid_generate_random(nx->nx_uuid);
1932 nx->nx_prov = nxprov;
1933 nx->nx_ctx = __DECONST(void *, nx_ctx);
1934 nx->nx_ctx_release = nx_ctx_release;
1935 nx->nx_id = nxdom_prov->nxdom_prov_gencnt++;
1936
1937 if (tx_pp != NULL) {
1938 nx->nx_tx_pp = tx_pp;
1939 pp_retain(tx_pp); /* released by nx_free */
1940 }
1941
1942 if (rx_pp != NULL) {
1943 nx->nx_rx_pp = rx_pp;
1944 pp_retain(rx_pp); /* released by nx_free */
1945 }
1946
1947 /* this nexus is alive; tell the nexus constructor to set it up */
1948 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor != NULL) {
1949 *err = nxprov->nxprov_dom_prov->nxdom_prov_nx_ctor(nx);
1950 if (*err != 0) {
1951 nx->nx_prov = NULL;
1952 goto done;
1953 }
1954 }
1955
1956 nxprov_retain_locked(nxprov); /* hold a ref on the nexus reg */
1957
1958 STAILQ_INSERT_TAIL(&nxprov->nxprov_nx_head, nx, nx_prov_link);
1959 nxprov->nxprov_nx_count++;
1960 RB_INSERT(kern_nexus_tree, &nx_head, nx);
1961 os_atomic_or(&nx->nx_flags, NXF_ATTACHED, relaxed);
1962
1963 nx_retain_locked(nx); /* one for the provider list */
1964 nx_retain_locked(nx); /* one for the global list */
1965 nx_retain_locked(nx); /* one for the caller */
1966
1967 #if SK_LOG
1968 SK_D("nexus %p (%s:%s) UUID %s", SK_KVA(nx),
1969 nxdom_prov->nxdom_prov_dom->nxdom_name,
1970 nxdom_prov->nxdom_prov_name, sk_uuid_unparse(nx->nx_uuid, uuidstr));
1971 #endif /* SK_LOG */
1972 done:
1973 SK_UNLOCK();
1974
1975 if (*err != 0) {
1976 if (nx != NULL) {
1977 nx_free(nx);
1978 nx = NULL;
1979 }
1980 }
1981 return nx;
1982 }
1983
1984 int
nx_destroy(struct nxctl * nxctl,const uuid_t nx_uuid)1985 nx_destroy(struct nxctl *nxctl, const uuid_t nx_uuid)
1986 {
1987 struct kern_nexus *nx = NULL;
1988 struct kern_nexus find;
1989 int err = 0;
1990
1991 NXCTL_LOCK_ASSERT_HELD(nxctl);
1992
1993 SK_LOCK();
1994
1995 uuid_copy(find.nx_uuid, nx_uuid);
1996 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
1997 if (nx != NULL && nxctl != NX_PROV(nx)->nxprov_ctl) {
1998 nx = NULL;
1999 }
2000
2001 if (nx != NULL) {
2002 nx_retain_locked(nx);
2003 }
2004
2005 if (nx == NULL) {
2006 err = ENOENT;
2007 } else {
2008 /* prevent any opens */
2009 os_atomic_or(&nx->nx_flags, NXF_INVALIDATED, relaxed);
2010 err = nx_close(nx, TRUE);
2011 (void) nx_release_locked(nx);
2012 }
2013
2014 SK_UNLOCK();
2015
2016 return err;
2017 }
2018
2019 static inline int
nx_cmp(const struct kern_nexus * a,const struct kern_nexus * b)2020 nx_cmp(const struct kern_nexus *a, const struct kern_nexus *b)
2021 {
2022 return uuid_compare(a->nx_uuid, b->nx_uuid);
2023 }
2024
2025 struct kern_nexus *
nx_find(const uuid_t nx_uuid,boolean_t locked)2026 nx_find(const uuid_t nx_uuid, boolean_t locked)
2027 {
2028 struct kern_nexus *nx = NULL;
2029 struct kern_nexus find;
2030
2031 if (!locked) {
2032 SK_LOCK();
2033 }
2034
2035 SK_LOCK_ASSERT_HELD();
2036
2037 uuid_copy(find.nx_uuid, nx_uuid);
2038 nx = RB_FIND(kern_nexus_tree, &nx_head, &find);
2039 if (nx != NULL && (nx->nx_flags & NXF_CLOSED)) {
2040 nx = NULL;
2041 }
2042
2043 /* return reference to caller */
2044 if (nx != NULL) {
2045 nx_retain_locked(nx);
2046 }
2047
2048 if (!locked) {
2049 SK_UNLOCK();
2050 }
2051
2052 return nx;
2053 }
2054
2055 int
nx_close(struct kern_nexus * nx,boolean_t locked)2056 nx_close(struct kern_nexus *nx, boolean_t locked)
2057 {
2058 int err = 0;
2059
2060 if (!locked) {
2061 SK_LOCK();
2062 }
2063
2064 SK_LOCK_ASSERT_HELD();
2065
2066
2067 if (nx->nx_flags & NXF_CLOSED) {
2068 err = EALREADY;
2069 } else {
2070 #if SK_LOG
2071 uuid_string_t uuidstr;
2072 SK_D("nexus %p (%s:%s) UUID %s flags 0x%x", SK_KVA(nx),
2073 NX_DOM(nx)->nxdom_name, NX_DOM_PROV(nx)->nxdom_prov_name,
2074 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags);
2075 #endif /* SK_LOG */
2076
2077 if (STAILQ_EMPTY(&nx->nx_ch_head)) {
2078 /* no regular channels open to it, so detach now */
2079 nx_detach(nx);
2080 } else {
2081 /* detach when the last channel closes */
2082 ASSERT(nx->nx_refcnt > 3);
2083 os_atomic_or(&nx->nx_flags, NXF_CLOSED, relaxed);
2084 }
2085 }
2086
2087 if (!locked) {
2088 SK_UNLOCK();
2089 }
2090
2091 return err;
2092 }
2093
2094 void
nx_stop(struct kern_nexus * nx)2095 nx_stop(struct kern_nexus *nx)
2096 {
2097 struct kern_nexus_provider *nxprov = nx->nx_prov;
2098
2099 SK_LOCK_ASSERT_HELD();
2100
2101 /* send a stop message */
2102 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_stop != NULL) {
2103 nxprov->nxprov_dom_prov->nxdom_prov_nx_stop(nx);
2104 }
2105 }
2106
2107 void
nx_detach(struct kern_nexus * nx)2108 nx_detach(struct kern_nexus *nx)
2109 {
2110 struct kern_nexus_provider *nxprov = nx->nx_prov;
2111
2112 SK_LOCK_ASSERT_HELD();
2113
2114 #if SK_LOG
2115 uuid_string_t uuidstr;
2116 SK_D("nexus %p UUID %s flags 0x%x", SK_KVA(nx),
2117 sk_uuid_unparse(nx->nx_uuid, uuidstr), nx->nx_flags);
2118 #endif /* SK_LOG */
2119
2120 /* Caller must hold extra refs, on top of the two in reg/global lists */
2121 ASSERT(nx->nx_refcnt >= 3);
2122 ASSERT(nx->nx_flags & NXF_ATTACHED);
2123
2124 /* this nexus is done; let the nexus destructor do final cleanups */
2125 if (nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor != NULL) {
2126 nxprov->nxprov_dom_prov->nxdom_prov_nx_dtor(nx);
2127 }
2128
2129 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2130 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2131
2132 STAILQ_REMOVE(&nxprov->nxprov_nx_head, nx, kern_nexus, nx_prov_link);
2133 nxprov->nxprov_nx_count--;
2134 RB_REMOVE(kern_nexus_tree, &nx_head, nx);
2135 os_atomic_andnot(&nx->nx_flags, NXF_ATTACHED, relaxed);
2136 nx->nx_prov = NULL;
2137 if (nx->nx_ctx_release != NULL) {
2138 nx->nx_ctx_release(nx->nx_ctx);
2139 }
2140 nx->nx_ctx = NULL;
2141
2142 (void) nx_release_locked(nx); /* one for the reg list */
2143 (void) nx_release_locked(nx); /* one for the global list */
2144
2145 /*
2146 * If this was the last nexus and the provider has been closed,
2147 * detach the provider and and finish up the postponed job.
2148 */
2149 if (STAILQ_EMPTY(&nxprov->nxprov_nx_head) &&
2150 (nxprov->nxprov_flags & NXPROVF_CLOSED)) {
2151 nxprov_detach(nxprov, TRUE);
2152 }
2153 (void) nxprov_release_locked(nxprov);
2154 }
2155
2156 int
nx_advisory_alloc(struct kern_nexus * nx,const char * name,struct skmem_region_params * srp_nexusadv,nexus_advisory_type_t type)2157 nx_advisory_alloc(struct kern_nexus *nx, const char *name,
2158 struct skmem_region_params *srp_nexusadv, nexus_advisory_type_t type)
2159 {
2160 struct __kern_nexus_adv_metadata *adv_md;
2161 uint32_t msize = 0;
2162 /* -fbounds-safety: why do we need maddr? */
2163 void *__sized_by(msize) maddr = NULL;
2164
2165 static_assert(sizeof(struct __kern_nexus_adv_metadata) == sizeof(uint64_t));
2166 static_assert((sizeof(struct sk_nexusadv) +
2167 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2168 static_assert((sizeof(struct netif_nexus_advisory) +
2169 sizeof(struct __kern_nexus_adv_metadata)) <= NX_NEXUSADV_MAX_SZ);
2170 ASSERT(nx->nx_adv.nxv_reg == NULL);
2171 ASSERT(nx->nx_adv.nxv_adv == NULL);
2172 ASSERT(type == NEXUS_ADVISORY_TYPE_FLOWSWITCH ||
2173 type == NEXUS_ADVISORY_TYPE_NETIF);
2174
2175 if ((nx->nx_adv.nxv_reg = skmem_region_create(name, srp_nexusadv,
2176 NULL, NULL, NULL)) == NULL) {
2177 return ENOMEM;
2178 }
2179
2180 nx->nx_adv.nxv_adv = skmem_region_alloc(nx->nx_adv.nxv_reg, &maddr,
2181 NULL, NULL, (SKMEM_NOSLEEP | SKMEM_PANIC),
2182 nx->nx_adv.nxv_reg->skr_c_obj_size, &msize);
2183 nx->nx_adv.nxv_adv_size = nx->nx_adv.nxv_reg->skr_c_obj_size;
2184 adv_md = nx->nx_adv.nxv_adv;
2185 adv_md->knam_version = NX_ADVISORY_MD_CURRENT_VERSION;
2186 adv_md->knam_type = type;
2187 adv_md->__reserved = 0;
2188 nx->nx_adv.nxv_adv_type = type;
2189 nx->nx_adv.flowswitch_nxv_adv = (void *)(adv_md + 1);
2190 if (type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
2191 nx->nx_adv.flowswitch_nxv_adv->nxadv_ver =
2192 NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION;
2193 } else {
2194 nx->nx_adv.netif_nxv_adv->nna_version =
2195 NX_NETIF_ADVISORY_CURRENT_VERSION;
2196 }
2197 return 0;
2198 }
2199
2200 void
nx_advisory_free(struct kern_nexus * nx)2201 nx_advisory_free(struct kern_nexus *nx)
2202 {
2203 if (nx->nx_adv.nxv_reg != NULL) {
2204 ASSERT(nx->nx_adv.nxv_adv != NULL);
2205 skmem_region_free(nx->nx_adv.nxv_reg,
2206 nx->nx_adv.nxv_adv, NULL);
2207 nx->nx_adv.nxv_adv = NULL;
2208 nx->nx_adv.nxv_adv_size = 0;
2209 nx->nx_adv.nxv_adv_type = NEXUS_ADVISORY_TYPE_INVALID;
2210 nx->nx_adv.flowswitch_nxv_adv = NULL;
2211 skmem_region_release(nx->nx_adv.nxv_reg);
2212 nx->nx_adv.nxv_reg = NULL;
2213 }
2214
2215 ASSERT(nx->nx_adv.nxv_reg == NULL);
2216 ASSERT(nx->nx_adv.nxv_adv == NULL);
2217 ASSERT(nx->nx_adv.nxv_adv_type == NEXUS_ADVISORY_TYPE_INVALID);
2218 ASSERT(nx->nx_adv.flowswitch_nxv_adv == NULL);
2219 }
2220
2221 static struct kern_nexus *
nx_alloc(zalloc_flags_t how)2222 nx_alloc(zalloc_flags_t how)
2223 {
2224 SK_LOCK_ASSERT_HELD();
2225
2226 return zalloc_flags(nx_zone, how | Z_ZERO);
2227 }
2228
2229 static void
nx_free(struct kern_nexus * nx)2230 nx_free(struct kern_nexus *nx)
2231 {
2232 ASSERT(!(nx->nx_flags & NXF_ATTACHED) && nx->nx_prov == NULL);
2233 ASSERT(STAILQ_EMPTY(&nx->nx_ch_head));
2234 ASSERT(STAILQ_EMPTY(&nx->nx_ch_nonxref_head));
2235
2236 nx_port_free_all(nx);
2237
2238 if (nx->nx_tx_pp != NULL) {
2239 pp_release(nx->nx_tx_pp);
2240 nx->nx_tx_pp = NULL;
2241 }
2242 if (nx->nx_rx_pp != NULL) {
2243 pp_release(nx->nx_rx_pp);
2244 nx->nx_rx_pp = NULL;
2245 }
2246
2247 ASSERT(STAILQ_EMPTY(&nx->nx_ch_if_adv_head));
2248 lck_rw_destroy(&nx->nx_ch_if_adv_lock, &nexus_lock_group);
2249
2250 SK_DF(SK_VERB_MEM, "nexus %p FREE", SK_KVA(nx));
2251 zfree(nx_zone, nx);
2252 }
2253
2254 void
nx_retain_locked(struct kern_nexus * nx)2255 nx_retain_locked(struct kern_nexus *nx)
2256 {
2257 SK_LOCK_ASSERT_HELD();
2258
2259 nx->nx_refcnt++;
2260 VERIFY(nx->nx_refcnt > 0);
2261 }
2262
2263 void
nx_retain(struct kern_nexus * nx)2264 nx_retain(struct kern_nexus *nx)
2265 {
2266 SK_LOCK();
2267 nx_retain_locked(nx);
2268 SK_UNLOCK();
2269 }
2270
2271 int
nx_release_locked(struct kern_nexus * nx)2272 nx_release_locked(struct kern_nexus *nx)
2273 {
2274 int oldref = nx->nx_refcnt;
2275
2276 SK_LOCK_ASSERT_HELD();
2277
2278 VERIFY(nx->nx_refcnt > 0);
2279 if (--nx->nx_refcnt == 0) {
2280 nx_free(nx);
2281 }
2282
2283 return oldref == 1;
2284 }
2285
2286 int
nx_release(struct kern_nexus * nx)2287 nx_release(struct kern_nexus *nx)
2288 {
2289 int lastref;
2290
2291 SK_LOCK_ASSERT_NOTHELD();
2292
2293 SK_LOCK();
2294 lastref = nx_release_locked(nx);
2295 SK_UNLOCK();
2296
2297 return lastref;
2298 }
2299
2300 static int
nx_init_rings(struct kern_nexus * nx,struct kern_channel * ch)2301 nx_init_rings(struct kern_nexus *nx, struct kern_channel *ch)
2302 {
2303 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2304 struct nexus_adapter *na = ch->ch_na;
2305 boolean_t undo = FALSE;
2306 int ksd_retains = 0;
2307 enum txrx t;
2308 int err = 0;
2309
2310 ASSERT((ch->ch_flags & (CHANF_EXT_PRECONNECT | CHANF_EXT_CONNECTED)) ==
2311 CHANF_EXT_PRECONNECT);
2312
2313 if (nxprov->nxprov_ext.nxpi_ring_init == NULL) {
2314 return 0;
2315 }
2316
2317 for_rx_tx(t) {
2318 uint32_t i;
2319
2320 for (i = 0; i < na_get_nrings(na, t); i++) {
2321 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2322
2323 /* skip host rings */
2324 if (kring->ckr_flags & CKRF_HOST) {
2325 continue;
2326 }
2327
2328 if ((err = nxprov->nxprov_ext.nxpi_ring_init(
2329 nxprov, nx, ch, kring, (kring->ckr_tx == NR_TX),
2330 &kring->ckr_ctx)) != 0) {
2331 SK_D("ch %p flags %x nx %p kr \"%s\" "
2332 "(%p) krflags %x ring_init error %d",
2333 SK_KVA(ch), ch->ch_flags, SK_KVA(nx),
2334 kring->ckr_name, SK_KVA(kring),
2335 kring->ckr_flags, err);
2336 kring->ckr_ctx = NULL;
2337 undo = TRUE;
2338 break;
2339 }
2340 kring->ckr_flags |= CKRF_EXT_RING_INITED;
2341
2342 if ((err = nx_init_slots(nx, kring)) != 0) {
2343 undo = TRUE;
2344 break;
2345 }
2346
2347 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2348 ++ksd_retains;
2349 }
2350 }
2351 if (undo) {
2352 break;
2353 }
2354 }
2355
2356 /*
2357 * Note: retain KSD even in case of error, as we have set
2358 * CKRF_EXT_SLOTS_INITED flag for some of the rings
2359 * nx_fini_rings would take care of release based on it.
2360 */
2361 if (ksd_retains != 0) {
2362 /*
2363 * Mark the kernel slot descriptor region as busy; this
2364 * prevents it from being torn-down at channel defunct
2365 * time, as we need to invoke the slot_fini() callback
2366 * for each slot and we need the descriptors until then.
2367 */
2368 skmem_arena_nexus_sd_set_noidle(skmem_arena_nexus(na->na_arena),
2369 ksd_retains);
2370 }
2371
2372 if (err != 0) {
2373 ASSERT(undo);
2374 nx_fini_rings(nx, ch);
2375 }
2376
2377 return err;
2378 }
2379
2380 static void
nx_fini_rings(struct kern_nexus * nx,struct kern_channel * ch)2381 nx_fini_rings(struct kern_nexus *nx, struct kern_channel *ch)
2382 {
2383 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2384 struct nexus_adapter *na = ch->ch_na;
2385 int ksd_releases = 0;
2386 enum txrx t;
2387
2388 for_rx_tx(t) {
2389 uint32_t i;
2390
2391 for (i = 0; i < na_get_nrings(na, t); i++) {
2392 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2393
2394 if (!(kring->ckr_flags & CKRF_EXT_RING_INITED)) {
2395 continue;
2396 }
2397
2398 ASSERT(!(kring->ckr_flags & CKRF_HOST));
2399 ASSERT(nxprov->nxprov_ext.nxpi_ring_fini != NULL);
2400 nxprov->nxprov_ext.nxpi_ring_fini(nxprov, nx, kring);
2401 kring->ckr_flags &= ~CKRF_EXT_RING_INITED;
2402
2403 if (kring->ckr_flags & CKRF_EXT_SLOTS_INITED) {
2404 ++ksd_releases;
2405 }
2406
2407 /*
2408 * Undo the work done in nx_init_slots() and inform
2409 * the external domain provider, if applicable, that
2410 * the slots for this ring are no longer valid.
2411 */
2412 nx_fini_slots(nx, kring);
2413 kring->ckr_ctx = NULL;
2414 }
2415 }
2416
2417 if (ksd_releases != 0) {
2418 /*
2419 * Now that we've finished invoking the slot_fini()
2420 * callbacks, release the busy retain counts held
2421 * earlier in nx_init_rings(). This will allow the
2422 * kernel slot descriptor region to be torn down.
2423 */
2424 skmem_arena_nexus_sd_set_noidle(
2425 skmem_arena_nexus(na->na_arena), -ksd_releases);
2426 }
2427 }
2428
2429 static int
nx_init_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2430 nx_init_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2431 {
2432 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2433 struct __slot_desc *slot = kring->ckr_ksds;
2434 int err = 0;
2435 uint32_t i;
2436
2437 /*
2438 * If the slot init callback was not provided, or if the
2439 * kring was not created to hold any slot contexts, don't
2440 * go any further.
2441 */
2442 if (nxprov->nxprov_ext.nxpi_slot_init == NULL ||
2443 kring->ckr_slot_ctxs == NULL) {
2444 return 0;
2445 }
2446
2447 ASSERT(kring->ckr_slot_ctxs_set == 0);
2448 ASSERT(slot != NULL);
2449
2450 for (i = 0; i < kring->ckr_num_slots; i++) {
2451 struct kern_slot_prop *__single slot_ctx_prop = NULL;
2452 /* -fbounds-safety: slot_ctx is unsafe anyway (mach_vmaddr_t) */
2453 void *__single slot_ctx_arg = NULL;
2454
2455 ASSERT(&slot[i] <= kring->ckr_ksds_last);
2456 if ((err = nxprov->nxprov_ext.nxpi_slot_init(nxprov, nx, kring,
2457 &slot[i], i, &slot_ctx_prop, &slot_ctx_arg)) != 0) {
2458 SK_D("nx %p kr \"%s\" (%p) krflags %x slot %u "
2459 "slot_init error %d", SK_KVA(nx), kring->ckr_name,
2460 SK_KVA(kring), kring->ckr_flags, i, err);
2461 break;
2462 }
2463 /* we don't want this to be used by client, so verify here */
2464 ASSERT(slot_ctx_prop == NULL);
2465 kring->ckr_slot_ctxs[i].slot_ctx_arg = slot_ctx_arg;
2466 kring->ckr_slot_ctxs_set++;
2467 }
2468
2469 if (err != 0) {
2470 nx_fini_slots(nx, kring);
2471 } else {
2472 kring->ckr_flags |= CKRF_EXT_SLOTS_INITED;
2473 }
2474
2475 return err;
2476 }
2477
2478 static void
nx_fini_slots(struct kern_nexus * nx,struct __kern_channel_ring * kring)2479 nx_fini_slots(struct kern_nexus *nx, struct __kern_channel_ring *kring)
2480 {
2481 struct kern_nexus_provider *nxprov = NX_PROV(nx);
2482 struct __slot_desc *slot = kring->ckr_ksds;
2483 uint32_t i;
2484
2485 ASSERT(!(kring->ckr_flags & CKRF_EXT_SLOTS_INITED) ||
2486 nxprov->nxprov_ext.nxpi_slot_fini != NULL);
2487 ASSERT(slot != NULL || !(kring->ckr_flags & CKRF_EXT_SLOTS_INITED));
2488
2489 for (i = 0; i < kring->ckr_slot_ctxs_set; i++) {
2490 ASSERT(slot != NULL && &slot[i] <= kring->ckr_ksds_last);
2491 if (nxprov->nxprov_ext.nxpi_slot_fini != NULL) {
2492 nxprov->nxprov_ext.nxpi_slot_fini(nxprov, nx,
2493 kring, &slot[i], i);
2494 }
2495 if (kring->ckr_slot_ctxs != NULL) {
2496 kring->ckr_slot_ctxs[i].slot_ctx_arg = 0;
2497 }
2498 }
2499 kring->ckr_slot_ctxs_set = 0;
2500
2501 /* We're done with this kring */
2502 kring->ckr_flags &= ~CKRF_EXT_SLOTS_INITED;
2503 }
2504
2505
2506 /* 64-bit mask with range */
2507 #define BMASK64(_beg, _end) \
2508 ((NX_PORT_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
2509
2510 int
nx_port_find(struct kern_nexus * nx,nexus_port_t first,nexus_port_t last,nexus_port_t * nx_port)2511 nx_port_find(struct kern_nexus *nx, nexus_port_t first,
2512 nexus_port_t last, nexus_port_t *nx_port)
2513 {
2514 int err = 0;
2515
2516 ASSERT(first < last);
2517 *nx_port = NEXUS_PORT_ANY;
2518
2519 if (nx->nx_num_ports == 0 || (first + 1) >= nx->nx_num_ports) {
2520 /*
2521 * Left edge of the range is beyond the current map;
2522 * let nx_port_alloc() handle the growing later.
2523 */
2524 *nx_port = first;
2525 } else {
2526 nexus_port_size_t fc = (first / NX_PORT_CHUNK);
2527 nexus_port_size_t lc = (MIN(last, nx->nx_num_ports) / NX_PORT_CHUNK);
2528 nexus_port_size_t lim = (nx->nx_num_ports / NX_PORT_CHUNK);
2529 nexus_port_size_t i, j;
2530 bitmap_t *bmap;
2531
2532 /*
2533 * The right edge of the range is either within or
2534 * beyond the current map; scan thru the current
2535 * map and find the first available port.
2536 */
2537 for (i = fc; i <= lc; i++) {
2538 bitmap_t mask;
2539 nexus_port_size_t beg = 0, end = 63;
2540
2541 if (i == fc) {
2542 beg = (first % NX_PORT_CHUNK);
2543 }
2544 if (i == (last / NX_PORT_CHUNK)) {
2545 end = (last % NX_PORT_CHUNK);
2546 }
2547
2548 if (i < lim) {
2549 bmap = &nx->nx_ports_bmap[i];
2550 mask = BMASK64(beg, end);
2551
2552 j = (nexus_port_size_t)ffsll((*bmap) & mask);
2553 if (j == 0) {
2554 continue;
2555 }
2556
2557 --j;
2558 *nx_port = (i * NX_PORT_CHUNK) + j;
2559 }
2560 break;
2561 }
2562
2563 /*
2564 * If the requested range is within the current map and we
2565 * couldn't find a port, return an err. Otherwise, return
2566 * the next port index to trigger growing later.
2567 */
2568 if (*nx_port == NEXUS_PORT_ANY) {
2569 if (lc == (last / NX_PORT_CHUNK)) {
2570 err = EBUSY;
2571 SK_ERR("port unavail in [%u, %u)", first, last);
2572 } else {
2573 *nx_port = nx->nx_num_ports;
2574 }
2575 }
2576 }
2577
2578 SK_DF(SK_VERB_NXPORT, "nx %p nx_port %d (err %d)", SK_KVA(nx),
2579 (int)*nx_port, err);
2580
2581 return err;
2582 }
2583
2584 static int
nx_port_grow(struct kern_nexus * nx,nexus_port_size_t grow)2585 nx_port_grow(struct kern_nexus *nx, nexus_port_size_t grow)
2586 {
2587 ASSERT(NXDOM_MAX(NX_DOM(nx), ports) <= NEXUS_PORT_MAX);
2588 nexus_port_t dom_port_max = (nexus_port_size_t)NXDOM_MAX(NX_DOM(nx), ports);
2589 struct nx_port_info *ports;
2590 nexus_port_size_t limit, i, num_ports, old_num_ports;
2591 bitmap_t *bmap;
2592
2593 ASSERT(grow > 0 && (grow % NX_PORT_CHUNK) == 0);
2594 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2595 static_assert((sizeof(*bmap) * 8) == NX_PORT_CHUNK);
2596 ASSERT(powerof2(dom_port_max));
2597 ASSERT(dom_port_max % NX_PORT_CHUNK == 0);
2598
2599 old_num_ports = nx->nx_num_ports;
2600 num_ports = nx->nx_num_ports + grow;
2601 limit = (nexus_port_size_t)P2ROUNDUP(dom_port_max, NX_PORT_CHUNK);
2602 if (num_ports > limit) {
2603 SK_ERR("can't grow, total %u grow %u (new %u > dom_max %u)",
2604 nx->nx_num_ports, grow, num_ports, limit);
2605 return EDOM;
2606 }
2607
2608 if ((bmap = sk_realloc_data(nx->nx_ports_bmap,
2609 (old_num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2610 (num_ports / NX_PORT_CHUNK) * sizeof(*bmap),
2611 Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2612 SK_ERR("bmap alloc failed, num_port %u", num_ports);
2613 return ENOMEM;
2614 }
2615 nx->nx_ports_bmap = bmap;
2616 nx->nx_ports_bmap_size = (num_ports / NX_PORT_CHUNK) * sizeof(*bmap);
2617
2618 if ((ports = sk_realloc_type_array(struct nx_port_info, old_num_ports,
2619 num_ports, nx->nx_ports, Z_WAITOK, skmem_tag_nx_port)) == NULL) {
2620 /* can't free bmap here, otherwise nexus won't work */
2621 SK_ERR("nx_ports alloc failed, num_port %u", num_ports);
2622 return ENOMEM;
2623 }
2624
2625 /* initialize the additional new ports */
2626 bzero(&ports[nx->nx_num_ports], (grow * sizeof(*ports)));
2627
2628 /* initialize new bitmaps (set all bits) */
2629 for (i = (nx->nx_num_ports / NX_PORT_CHUNK);
2630 i < (num_ports / NX_PORT_CHUNK); i++) {
2631 bmap[i] = NX_PORT_CHUNK_FREE;
2632 }
2633
2634 /*
2635 * -fbounds-safety: Not sure if moving nx_ports assignment down here
2636 * would cause a regression.
2637 */
2638 nx->nx_ports = ports;
2639 nx->nx_num_ports = num_ports;
2640
2641 SK_DF(SK_VERB_NXPORT, "!!! nx %p ports %u/%u, %u ports added",
2642 SK_KVA(nx), nx->nx_active_ports, nx->nx_num_ports, grow);
2643
2644 return 0;
2645 }
2646
2647 int
nx_port_alloc(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb,struct nexus_adapter ** na,struct proc * p)2648 nx_port_alloc(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb,
2649 struct nexus_adapter **na, struct proc *p)
2650 {
2651 struct nx_port_info *npi = NULL;
2652 struct nxbind *nxb0;
2653 size_t g;
2654 uint32_t i, j;
2655 bitmap_t *bmap;
2656 bool refonly = false;
2657 int err = 0;
2658
2659 ASSERT(nx_port != NEXUS_PORT_ANY);
2660 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2661
2662 /* port is zero-based, so adjust here */
2663 if ((nx_port + 1) > nx->nx_num_ports) {
2664 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2665 VERIFY(g <= NEXUS_PORT_MAX);
2666 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2667 goto done;
2668 }
2669 }
2670 ASSERT(err == 0);
2671 ASSERT(nx_port < nx->nx_num_ports);
2672 npi = &nx->nx_ports[nx_port];
2673 nxb0 = npi->npi_nxb;
2674 i = nx_port / NX_PORT_CHUNK;
2675 j = nx_port % NX_PORT_CHUNK;
2676 bmap = &nx->nx_ports_bmap[i];
2677
2678 if (bit_test(*bmap, j)) {
2679 /* port is not (yet) bound or allocated */
2680 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2681 if (p != kernproc && !NX_ANONYMOUS_PROV(nx)) {
2682 /*
2683 * If the port allocation is requested by userland
2684 * and the nexus is non-anonymous, then fail the
2685 * request.
2686 */
2687 err = EACCES;
2688 SK_ERR("user proc alloc on named nexus needs binding");
2689 } else if (na != NULL && *na != NULL) {
2690 /*
2691 * Otherwise claim it (clear bit) if the caller
2692 * supplied an adapter for this port; else, it
2693 * is just an existential check and so there's
2694 * no action needed at this point (we'll skip
2695 * the init below since vpna is NULL).
2696 */
2697 bit_clear(*bmap, j);
2698 }
2699 } else {
2700 /* if port is bound, check if credentials match */
2701 if (nxb0 != NULL && p != kernproc && !NX_ANONYMOUS_PROV(nx) &&
2702 (nxb == NULL || !nxb_is_equal(nxb0, nxb))) {
2703 SK_ERR("nexus binding mismatch");
2704 err = EACCES;
2705 } else {
2706 /*
2707 * If port is already occupied by an adapter,
2708 * see if the client is requesting a reference
2709 * to it; if so, return the adapter. Otherwise,
2710 * if unoccupied and vpna is non-NULL, associate
2711 * it with this nexus port via the below init.
2712 */
2713 if (NPI_NA(npi) != NULL) {
2714 if (na != NULL && *na == NULL) {
2715 *na = NPI_NA(npi);
2716 na_retain_locked(*na);
2717 /* skip the init below */
2718 refonly = true;
2719 } else {
2720 /*
2721 * If the client supplied an adapter
2722 * (regardless of its value) for a
2723 * nexus port that's already occupied,
2724 * then we fail the request.
2725 */
2726 SK_ERR("nexus adapted exits");
2727 err = EEXIST;
2728 }
2729 }
2730 }
2731 }
2732
2733 done:
2734 /* initialize the nexus port and the adapter occupying it */
2735 if (err == 0 && na != NULL && *na != NULL && !refonly) {
2736 ASSERT(nx_port < nx->nx_num_ports);
2737 ASSERT(npi->npi_nah == 0);
2738 ASSERT(nx->nx_active_ports < nx->nx_num_ports);
2739 ASSERT(!bit_test(nx->nx_ports_bmap[nx_port / NX_PORT_CHUNK],
2740 (nx_port % NX_PORT_CHUNK)));
2741
2742 nx->nx_active_ports++;
2743 npi->npi_nah = NPI_NA_ENCODE(*na, NEXUS_PORT_STATE_WORKING);
2744 (*na)->na_nx_port = nx_port;
2745 }
2746
2747 SK_DF(SK_VERB_NXPORT, "nx %p nx_port %d, ports %u/%u (err %d)",
2748 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports,
2749 err);
2750
2751 return err;
2752 }
2753
2754 void
nx_port_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2755 nx_port_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2756 {
2757 struct nx_port_info *npi = &nx->nx_ports[nx_port];
2758
2759 npi->npi_nah = NPI_NA_ENCODE(npi->npi_nah,
2760 NEXUS_PORT_STATE_DEFUNCT);
2761 }
2762
2763 void
nx_port_free(struct kern_nexus * nx,nexus_port_t nx_port)2764 nx_port_free(struct kern_nexus *nx, nexus_port_t nx_port)
2765 {
2766 struct nx_port_info *npi = NULL;
2767 bitmap_t *bmap;
2768 uint32_t i, j;
2769
2770 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2771 ASSERT(nx_port != NEXUS_PORT_ANY && nx_port < nx->nx_num_ports);
2772 ASSERT(nx->nx_active_ports != 0);
2773
2774 i = nx_port / NX_PORT_CHUNK;
2775 j = nx_port % NX_PORT_CHUNK;
2776 bmap = &nx->nx_ports_bmap[i];
2777 ASSERT(!bit_test(*bmap, j));
2778
2779 npi = &nx->nx_ports[nx_port];
2780 npi->npi_nah = 0;
2781 if (npi->npi_nxb == NULL) {
2782 /* it's vacant, release it (set bit) */
2783 bit_set(*bmap, j);
2784 }
2785
2786 nx->nx_active_ports--;
2787
2788 //XXX [email protected] --- try to shrink bitmap & nx_ports ???
2789
2790 SK_DF(SK_VERB_NXPORT, "--- nx %p nx_port %d, ports %u/%u",
2791 SK_KVA(nx), (int)nx_port, nx->nx_active_ports, nx->nx_num_ports);
2792 }
2793
2794 int
nx_port_bind_info(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0,void * info)2795 nx_port_bind_info(struct kern_nexus *nx, nexus_port_t nx_port,
2796 struct nxbind *nxb0, void *info)
2797 {
2798 struct nx_port_info *npi = NULL;
2799 size_t g;
2800 uint32_t i, j;
2801 bitmap_t *bmap;
2802 int err = 0;
2803
2804 ASSERT(nx_port != NEXUS_PORT_ANY);
2805 ASSERT(nx_port < NXDOM_MAX(NX_DOM(nx), ports));
2806 ASSERT((nx->nx_num_ports % NX_PORT_CHUNK) == 0);
2807 ASSERT(nxb0 != NULL);
2808
2809 if ((nx_port) + 1 > nx->nx_num_ports) {
2810 g = P2ROUNDUP((nx_port + 1) - nx->nx_num_ports, NX_PORT_CHUNK);
2811 VERIFY(g <= NEXUS_PORT_MAX);
2812 if ((err = nx_port_grow(nx, (nexus_port_size_t)g)) != 0) {
2813 goto done;
2814 }
2815 }
2816 ASSERT(err == 0);
2817
2818 npi = &nx->nx_ports[nx_port];
2819 i = nx_port / NX_PORT_CHUNK;
2820 j = nx_port % NX_PORT_CHUNK;
2821 bmap = &nx->nx_ports_bmap[i];
2822 if (bit_test(*bmap, j)) {
2823 /* port is not (yet) bound or allocated */
2824 ASSERT(npi->npi_nah == 0 && npi->npi_nxb == NULL);
2825
2826 bit_clear(*bmap, j);
2827 struct nxbind *nxb = nxb_alloc(Z_WAITOK);
2828 nxb_move(nxb0, nxb);
2829 npi->npi_nxb = nxb;
2830 npi->npi_info = info;
2831 /* claim it (clear bit) */
2832 bit_clear(*bmap, j);
2833 ASSERT(err == 0);
2834 } else {
2835 /* port is already taken */
2836 ASSERT(NPI_NA(npi) != NULL || npi->npi_nxb != NULL);
2837 err = EEXIST;
2838 }
2839 done:
2840
2841 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2842 "+++ nx %p nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2843 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2844
2845 return err;
2846 }
2847
2848 int
nx_port_bind(struct kern_nexus * nx,nexus_port_t nx_port,struct nxbind * nxb0)2849 nx_port_bind(struct kern_nexus *nx, nexus_port_t nx_port, struct nxbind *nxb0)
2850 {
2851 return nx_port_bind_info(nx, nx_port, nxb0, NULL);
2852 }
2853
2854 /*
2855 * -fbounds-safety: all callers pass npi_info. Why don't we just change the
2856 * input type to nx_port_info_header *?
2857 */
2858 static int
nx_port_info_size(struct nx_port_info_header * info,size_t * sz)2859 nx_port_info_size(struct nx_port_info_header *info, size_t *sz)
2860 {
2861 struct nx_port_info_header *hdr = info;
2862
2863 switch (hdr->ih_type) {
2864 case NX_PORT_INFO_TYPE_NETIF:
2865 break;
2866 default:
2867 return EINVAL;
2868 }
2869 *sz = hdr->ih_size;
2870 return 0;
2871 }
2872
2873 int
nx_port_unbind(struct kern_nexus * nx,nexus_port_t nx_port)2874 nx_port_unbind(struct kern_nexus *nx, nexus_port_t nx_port)
2875 {
2876 struct nx_port_info *npi = NULL;
2877 struct nxbind *nxb;
2878 uint32_t i, j;
2879 bitmap_t *bmap;
2880 int err = 0;
2881
2882 ASSERT(nx_port != NEXUS_PORT_ANY);
2883
2884 if (nx_port >= nx->nx_num_ports) {
2885 err = EDOM;
2886 goto done;
2887 }
2888
2889 npi = &nx->nx_ports[nx_port];
2890 i = nx_port / NX_PORT_CHUNK;
2891 j = nx_port % NX_PORT_CHUNK;
2892 bmap = &nx->nx_ports_bmap[i];
2893
2894 if ((nxb = npi->npi_nxb) == NULL) {
2895 /* must be either free or allocated */
2896 ASSERT(NPI_NA(npi) == NULL ||
2897 (!bit_test(*bmap, j) && nx->nx_active_ports > 0));
2898 err = ENOENT;
2899 } else {
2900 nxb_free(nxb);
2901 npi->npi_nxb = NULL;
2902 if (npi->npi_info != NULL) {
2903 size_t sz;
2904
2905 VERIFY(nx_port_info_size(npi->npi_info, &sz) == 0);
2906 sk_free_data(npi->npi_info, sz);
2907 npi->npi_info = NULL;
2908 }
2909 ASSERT(!bit_test(*bmap, j));
2910 if (NPI_NA(npi) == NULL) {
2911 /* it's vacant, release it (set bit) */
2912 bit_set(*bmap, j);
2913 }
2914 }
2915
2916 done:
2917 SK_DF(err ? SK_VERB_ERROR : SK_VERB_NXPORT,
2918 "--- nx %p nx_port %d, ports %u/%u (err %d)", SK_KVA(nx),
2919 (int)nx_port, nx->nx_active_ports, nx->nx_num_ports, err);
2920
2921 return err;
2922 }
2923
2924 struct nexus_adapter *
nx_port_get_na(struct kern_nexus * nx,nexus_port_t nx_port)2925 nx_port_get_na(struct kern_nexus *nx, nexus_port_t nx_port)
2926 {
2927 if (nx->nx_ports != NULL && nx->nx_num_ports > nx_port) {
2928 return NPI_NA(&nx->nx_ports[nx_port]);
2929 } else {
2930 return NULL;
2931 }
2932 }
2933
2934 int
nx_port_get_info(struct kern_nexus * nx,nexus_port_t port,nx_port_info_type_t type,void * __sized_by (len)info,uint32_t len)2935 nx_port_get_info(struct kern_nexus *nx, nexus_port_t port,
2936 nx_port_info_type_t type, void *__sized_by(len)info, uint32_t len)
2937 {
2938 struct nx_port_info *npi;
2939 struct nx_port_info_header *hdr;
2940
2941 if (nx->nx_ports == NULL || port >= nx->nx_num_ports) {
2942 return ENXIO;
2943 }
2944 npi = &nx->nx_ports[port];
2945 /*
2946 * -fbounds-safety: Changing npi_info to be __sized_by is a major
2947 * surgery. Just forge it here for now.
2948 */
2949 hdr = __unsafe_forge_bidi_indexable(struct nx_port_info_header *,
2950 npi->npi_info, len);
2951 if (hdr == NULL) {
2952 return ENOENT;
2953 }
2954
2955 if (hdr->ih_type != type) {
2956 return EINVAL;
2957 }
2958
2959 bcopy(hdr, info, len);
2960 return 0;
2961 }
2962
2963 bool
nx_port_is_valid(struct kern_nexus * nx,nexus_port_t nx_port)2964 nx_port_is_valid(struct kern_nexus *nx, nexus_port_t nx_port)
2965 {
2966 return nx_port < nx->nx_num_ports;
2967 }
2968
2969 bool
nx_port_is_defunct(struct kern_nexus * nx,nexus_port_t nx_port)2970 nx_port_is_defunct(struct kern_nexus *nx, nexus_port_t nx_port)
2971 {
2972 ASSERT(nx_port_is_valid(nx, nx_port));
2973
2974 return NPI_IS_DEFUNCT(&nx->nx_ports[nx_port]);
2975 }
2976
2977 void
nx_port_free_all(struct kern_nexus * nx)2978 nx_port_free_all(struct kern_nexus *nx)
2979 {
2980 /* uncrustify doesn't handle C blocks properly */
2981 /* BEGIN IGNORE CODESTYLE */
2982 nx_port_foreach(nx, ^(nexus_port_t p) {
2983 struct nxbind *nxb;
2984 /*
2985 * XXX -fbounds-safety: Come back to this after fixing npi_info
2986 */
2987 void *__single info;
2988 nxb = nx->nx_ports[p].npi_nxb;
2989 info = nx->nx_ports[p].npi_info;
2990 if (nxb != NULL) {
2991 nxb_free(nxb);
2992 nx->nx_ports[p].npi_nxb = NULL;
2993 }
2994 if (info != NULL) {
2995 size_t sz;
2996
2997 VERIFY(nx_port_info_size(info, &sz) == 0);
2998 skn_free_data(info, info, sz);
2999 nx->nx_ports[p].npi_info = NULL;
3000 }
3001 });
3002 /* END IGNORE CODESTYLE */
3003
3004 nx->nx_active_ports = 0;
3005 sk_free_data_sized_by(nx->nx_ports_bmap, nx->nx_ports_bmap_size);
3006 nx->nx_ports_bmap = NULL;
3007 nx->nx_ports_bmap_size = 0;
3008 sk_free_type_array_counted_by(struct nx_port_info, nx->nx_num_ports, nx->nx_ports);
3009 nx->nx_ports = NULL;
3010 nx->nx_num_ports = 0;
3011 }
3012
3013 void
3014 nx_port_foreach(struct kern_nexus *nx,
3015 void (^port_handle)(nexus_port_t nx_port))
3016 {
3017 for (nexus_port_size_t i = 0; i < (nx->nx_num_ports / NX_PORT_CHUNK); i++) {
3018 bitmap_t bmap = nx->nx_ports_bmap[i];
3019
3020 if (bmap == NX_PORT_CHUNK_FREE) {
3021 continue;
3022 }
3023
3024 for (nexus_port_size_t j = 0; j < NX_PORT_CHUNK; j++) {
3025 if (bit_test(bmap, j)) {
3026 continue;
3027 }
3028 port_handle((i * NX_PORT_CHUNK) + j);
3029 }
3030 }
3031 }
3032
3033 /*
3034 * sysctl interfaces
3035 */
3036 static int nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS;
3037 static int nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS;
3038 static int nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS;
3039
3040 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_provider_list,
3041 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3042 0, 0, nexus_provider_list_sysctl, "S,nexus_provider_info_t", "");
3043
3044 SYSCTL_PROC(_kern_skywalk, OID_AUTO, nexus_channel_list,
3045 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3046 0, 0, nexus_channel_list_sysctl, "S,nexus_channel_entry_t", "");
3047
3048 SYSCTL_PROC(_kern_skywalk, OID_AUTO, llink_list,
3049 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3050 0, NXMIB_LLINK_LIST, nexus_mib_get_sysctl, "S,nx_llink_info",
3051 "A list of logical links");
3052
3053 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow,
3054 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
3055 0, NXMIB_FLOW, nexus_mib_get_sysctl, "S,sk_stats_flow",
3056 "Nexus inet flows with stats collected in kernel");
3057
3058 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_owner,
3059 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3060 0, NXMIB_FLOW_OWNER, nexus_mib_get_sysctl, "S,sk_stats_flow_owner",
3061 "Nexus flow owners");
3062
3063 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_route,
3064 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3065 0, NXMIB_FLOW_ROUTE, nexus_mib_get_sysctl, "S,sk_stats_flow_route",
3066 "Nexus flow routes");
3067
3068 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, net_if,
3069 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3070 0, NXMIB_NETIF_STATS, nexus_mib_get_sysctl, "S,sk_stats_net_if",
3071 "Nexus netif statistics collected in kernel");
3072
3073 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_switch,
3074 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3075 0, NXMIB_FSW_STATS, nexus_mib_get_sysctl, "S,sk_stats_flow_switch",
3076 "Nexus flowswitch statistics collected in kernel");
3077
3078 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, userstack,
3079 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3080 0, NXMIB_USERSTACK_STATS, nexus_mib_get_sysctl, "S,sk_stats_userstack",
3081 "Nexus userstack statistics counter");
3082
3083 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flow_adv,
3084 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3085 0, NXMIB_FLOW_ADV, nexus_mib_get_sysctl, "S,sk_stats_flow_adv",
3086 "Nexus flow advisory dump");
3087
3088 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netif_queue,
3089 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
3090 0, NXMIB_NETIF_QUEUE_STATS, nexus_mib_get_sysctl, "S,netif_qstats_info",
3091 "A list of netif queue stats entries");
3092
3093 /*
3094 * Provider list sysctl
3095 */
3096 static void
nexus_provider_info_populate(struct kern_nexus_provider * nxprov,nexus_provider_info_t info)3097 nexus_provider_info_populate(struct kern_nexus_provider *nxprov,
3098 nexus_provider_info_t info)
3099 {
3100 struct kern_nexus *nx;
3101 uuid_t *uuids;
3102
3103 SK_LOCK_ASSERT_HELD();
3104
3105 /* provider UUID + params */
3106 uuid_copy(info->npi_prov_uuid, nxprov->nxprov_uuid);
3107 bcopy(nxprov->nxprov_params, &info->npi_prov_params,
3108 sizeof(struct nxprov_params));
3109 info->npi_instance_uuids_count = nxprov->nxprov_nx_count;
3110
3111 /* instance UUID list */
3112 uuids = __unsafe_forge_bidi_indexable(uuid_t *,
3113 info->npi_instance_uuids, sizeof(uuid_t) * info->npi_instance_uuids_count);
3114 STAILQ_FOREACH(nx, &nxprov->nxprov_nx_head, nx_prov_link) {
3115 uuid_copy(*uuids, nx->nx_uuid);
3116 uuids++;
3117 }
3118 }
3119
3120 static int
3121 nexus_provider_list_sysctl SYSCTL_HANDLER_ARGS
3122 {
3123 #pragma unused(arg1, arg2, oidp)
3124 size_t actual_space;
3125 caddr_t buffer = NULL;
3126 size_t buffer_space;
3127 size_t allocated_space;
3128 int out_error;
3129 int error = 0;
3130 struct kern_nexus_provider *nxprov;
3131 caddr_t scan;
3132
3133 if (!kauth_cred_issuser(kauth_cred_get())) {
3134 return EPERM;
3135 }
3136
3137 net_update_uptime();
3138 buffer_space = req->oldlen;
3139 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3140 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3141 buffer_space = SK_SYSCTL_ALLOC_MAX;
3142 }
3143 allocated_space = buffer_space;
3144 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3145 if (__improbable(buffer == NULL)) {
3146 return ENOBUFS;
3147 }
3148 } else if (req->oldptr == USER_ADDR_NULL) {
3149 buffer_space = 0;
3150 }
3151 actual_space = 0;
3152 scan = buffer;
3153 SK_LOCK();
3154 STAILQ_FOREACH(nxprov, &nxprov_head, nxprov_link) {
3155 size_t info_size;
3156
3157 info_size
3158 = NEXUS_PROVIDER_INFO_SIZE(nxprov->nxprov_nx_count);
3159 if (scan != NULL) {
3160 if (buffer_space < info_size) {
3161 /* supplied buffer too small, stop copying */
3162 error = ENOMEM;
3163 break;
3164 }
3165 nexus_provider_info_populate(nxprov, (void *)scan);
3166 scan += info_size;
3167 buffer_space -= info_size;
3168 }
3169 actual_space += info_size;
3170 }
3171 SK_UNLOCK();
3172
3173 out_error = SYSCTL_OUT(req, buffer, actual_space);
3174 if (out_error != 0) {
3175 error = out_error;
3176 }
3177
3178 if (buffer != NULL) {
3179 sk_free_data(buffer, allocated_space);
3180 }
3181
3182 return error;
3183 }
3184
3185 /*
3186 * Channel list sysctl
3187 */
3188 static uint32_t
channel_ring_count(struct kern_channel * ch,enum txrx which)3189 channel_ring_count(struct kern_channel *ch, enum txrx which)
3190 {
3191 return ch->ch_last[which] - ch->ch_first[which];
3192 }
3193
3194 /*
3195 * -fbounds-safety: kring's range is [first..last]. Marking it
3196 * __counted_by(last) means range is [0..first..last]. The [0..first) might be
3197 * problematic. However, the for loop in this function starts indexing from
3198 * 'first', not 0, so that should be okay.
3199 * XXX Until BATS starts using uncrustify-7 (rdar://90709826), having a space
3200 * between __counted_by(entry_count) entries will be considered invalid code
3201 * style and build will fail. Until rdar://117811249 is resolved, either stick
3202 * to what makes BATS happy, or wrap IGNORE CODESTYLE around.
3203 */
3204 static void
populate_ring_entries(struct __kern_channel_ring * __counted_by (last)kring,ring_id_t first,ring_id_t last,nexus_channel_ring_entry * __counted_by (entry_count)entries,uint32_t NX_FB_ARG entry_count)3205 populate_ring_entries(struct __kern_channel_ring *__counted_by(last)kring,
3206 ring_id_t first, ring_id_t last,
3207 nexus_channel_ring_entry *__counted_by(entry_count)entries,
3208 uint32_t NX_FB_ARG entry_count)
3209 {
3210 uint64_t now = net_uptime();
3211 ring_id_t i;
3212 nexus_channel_ring_entry_t scan;
3213 struct __kern_channel_ring *ring;
3214
3215 scan = entries;
3216 for (i = first; i < last; i++, scan++) {
3217 ring = &kring[i];
3218
3219 DTRACE_SKYWALK1(populate__ring, struct __kern_channel_ring *,
3220 ring);
3221 if (kr_stat_enable == 0) {
3222 bzero(&scan->ncre_stats, sizeof(scan->ncre_stats));
3223 bzero(&scan->ncre_user_stats,
3224 sizeof(scan->ncre_user_stats));
3225 } else {
3226 scan->ncre_stats = ring->ckr_stats;
3227 scan->ncre_stats.crs_seconds_since_last_update = now -
3228 scan->ncre_stats.crs_last_update_net_uptime;
3229 scan->ncre_user_stats = ring->ckr_usr_stats;
3230 }
3231 scan->ncre_error_stats = ring->ckr_err_stats;
3232 scan->ncre_ring_id = i;
3233 }
3234 }
3235
3236 /* combine/convert ch_mode/ch_flags into nexus_channel_entry flags */
3237 static uint32_t
nexus_channel_get_flags(uint32_t ch_mode,uint32_t ch_flags)3238 nexus_channel_get_flags(uint32_t ch_mode, uint32_t ch_flags)
3239 {
3240 uint32_t flags = 0;
3241
3242 flags |= (ch_mode & CHMODE_USER_PACKET_POOL) ? SCHF_USER_PACKET_POOL : 0;
3243 flags |= (ch_mode & CHMODE_DEFUNCT_OK) ? SCHF_DEFUNCT_OK : 0;
3244 flags |= (ch_mode & CHMODE_FILTER) ? SCHF_FILTER : 0;
3245 flags |= (ch_mode & CHMODE_EVENT_RING) ? SCHF_EVENT_RING : 0;
3246 flags |= (ch_mode & CHMODE_EXCLUSIVE) ? SCHF_EXCLUSIVE : 0;
3247 flags |= (ch_flags & CHANF_IF_ADV) ? SCHF_IF_ADV : 0;
3248 flags |= (ch_flags & CHANF_DEFUNCT_SKIP) ? SCHF_DEFUNCT_SKIP : 0;
3249 flags |= (ch_flags & CHANF_CLOSING) ? SCHF_CLOSING : 0;
3250 flags |= (ch_flags & CHANF_DEFUNCT) ? SCHF_DEFUNCT : 0;
3251 flags |= (ch_mode & CHMODE_LOW_LATENCY) ? SCHF_LOW_LATENCY : 0;
3252
3253 return flags;
3254 }
3255
3256 SK_NO_INLINE_ATTRIBUTE
3257 static void
nexus_channel_entry_populate(struct kern_channel * ch,nexus_channel_entry_t entry)3258 nexus_channel_entry_populate(struct kern_channel *ch,
3259 nexus_channel_entry_t entry)
3260 {
3261 uint32_t ch_mode = ch->ch_info->cinfo_ch_mode;
3262 uint32_t ch_flags = ch->ch_flags;
3263 ring_id_t rx_first = ch->ch_first[NR_RX];
3264 ring_id_t rx_last = ch->ch_last[NR_RX];
3265 ring_id_t tx_last = ch->ch_last[NR_TX];
3266 ring_id_t tx_first = ch->ch_first[NR_TX];
3267
3268 uuid_copy(entry->nce_uuid, ch->ch_info->cinfo_ch_id);
3269 entry->nce_flags = nexus_channel_get_flags(ch_mode, ch_flags);
3270 entry->nce_port = ch->ch_info->cinfo_nx_port;
3271 entry->nce_pid = ch->ch_pid;
3272 entry->nce_fd = ch->ch_fd;
3273 entry->nce_tx_rings = tx_last - tx_first;
3274 entry->nce_rx_rings = rx_last - rx_first;
3275 populate_ring_entries(ch->ch_na->na_tx_rings, tx_first, tx_last,
3276 entry->nce_ring_entries, entry->nce_tx_rings);
3277
3278 /*
3279 * -fbounds-safety: If entry->nce_tx_rings > 0 and
3280 * entry->nce_rx_rings == 0 (i.e. entry->nce_ring_count ==
3281 * entry->nce_tx_rings), simply passing
3282 * entry->nce_ring_entries + entry->nce_tx_rings to populate_ring_entries
3283 * will fail bounds check, because it is equivalent to assigning
3284 * nce_ring_entries + nce_tx_rings to a __single variable, and in this
3285 * case it goes out of bounds. It's same thing as having:
3286 * int a[1];
3287 * some_func(a + 1); <-- bounds check will fail
3288 */
3289 if (rx_first < rx_last) {
3290 populate_ring_entries(ch->ch_na->na_rx_rings, rx_first, rx_last,
3291 entry->nce_ring_entries + entry->nce_tx_rings,
3292 entry->nce_rx_rings);
3293 }
3294 }
3295
3296 SK_NO_INLINE_ATTRIBUTE
3297 static size_t
nexus_channel_info_populate(struct kern_nexus * nx,nexus_channel_info * __sized_by (buffer_size)info,size_t buffer_size)3298 nexus_channel_info_populate(struct kern_nexus *nx,
3299 nexus_channel_info *__sized_by(buffer_size) info, size_t buffer_size)
3300 {
3301 struct kern_channel *ch = NULL;
3302 size_t info_size;
3303 caddr_t scan = NULL;
3304 nexus_channel_entry *entry;
3305
3306 SK_LOCK_ASSERT_HELD();
3307
3308 info_size = sizeof(nexus_channel_info);
3309
3310 /* channel list */
3311 if (info != NULL) {
3312 if (buffer_size < info_size) {
3313 return info_size;
3314 }
3315
3316 /* instance UUID */
3317 uuid_copy(info->nci_instance_uuid, nx->nx_uuid);
3318 info->nci_channel_entries_count = nx->nx_ch_count;
3319 scan = (caddr_t __bidi_indexable)info->nci_channel_entries;
3320 }
3321 STAILQ_FOREACH(ch, &nx->nx_ch_head, ch_link) {
3322 size_t entry_size;
3323 uint32_t ring_count;
3324
3325 ring_count = channel_ring_count(ch, NR_TX) +
3326 channel_ring_count(ch, NR_RX);
3327 entry_size = NEXUS_CHANNEL_ENTRY_SIZE(ring_count);
3328 info_size += entry_size;
3329 if (scan != NULL) {
3330 if (buffer_size < info_size) {
3331 return info_size;
3332 }
3333 entry = (nexus_channel_entry *)(void *)scan;
3334 entry->nce_ring_count = ring_count;
3335
3336 nexus_channel_entry_populate(ch, entry);
3337 scan += entry_size;
3338 }
3339 }
3340 return info_size;
3341 }
3342
3343 static int
3344 nexus_channel_list_sysctl SYSCTL_HANDLER_ARGS
3345 {
3346 #pragma unused(arg1, arg2, oidp)
3347 size_t actual_space;
3348 caddr_t buffer = NULL;
3349 size_t buffer_space;
3350 size_t allocated_space;
3351 int out_error;
3352 struct kern_nexus *nx;
3353 int error = 0;
3354 caddr_t scan;
3355
3356 if (!kauth_cred_issuser(kauth_cred_get())) {
3357 return EPERM;
3358 }
3359
3360 net_update_uptime();
3361 buffer_space = req->oldlen;
3362 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3363 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3364 buffer_space = SK_SYSCTL_ALLOC_MAX;
3365 }
3366 allocated_space = buffer_space;
3367 buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_sysctl_buf);
3368 if (__improbable(buffer == NULL)) {
3369 return ENOBUFS;
3370 }
3371 } else if (req->oldptr == USER_ADDR_NULL) {
3372 buffer_space = 0;
3373 }
3374 actual_space = 0;
3375 scan = buffer;
3376 SK_LOCK();
3377 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3378 size_t info_size;
3379
3380 info_size = nexus_channel_info_populate(nx, (void *)scan,
3381 buffer_space);
3382 if (scan != NULL) {
3383 if (buffer_space < info_size) {
3384 /* supplied buffer too small, stop copying */
3385 error = ENOMEM;
3386 break;
3387 }
3388 scan += info_size;
3389 buffer_space -= info_size;
3390 }
3391 actual_space += info_size;
3392 }
3393 SK_UNLOCK();
3394
3395 if (actual_space != 0) {
3396 out_error = SYSCTL_OUT(req, buffer, actual_space);
3397 if (out_error != 0) {
3398 error = out_error;
3399 }
3400 }
3401 if (buffer != NULL) {
3402 sk_free_data(buffer, allocated_space);
3403 }
3404
3405 return error;
3406 }
3407
3408 static int
3409 nexus_mib_get_sysctl SYSCTL_HANDLER_ARGS
3410 {
3411 #pragma unused(arg1, arg2)
3412 struct proc *p = req->p;
3413 struct nexus_mib_filter filter;
3414 int error = 0;
3415 size_t actual_space;
3416 size_t allocated_space = 0;
3417 caddr_t __sized_by(allocated_space) buffer = NULL;
3418 size_t buffer_space;
3419 int out_error;
3420 struct kern_nexus *nx;
3421 caddr_t scan;
3422
3423 /* Restrict protocol stats access to root user only (like netstat). */
3424 if (oidp->oid_arg2 == NXMIB_USERSTACK_STATS &&
3425 !kauth_cred_issuser(kauth_cred_get())) {
3426 SK_ERR("mib request rejected, EPERM");
3427 return EPERM;
3428 }
3429
3430 if (req->newptr == USER_ADDR_NULL) {
3431 /*
3432 * For flow stats requests, non-root users need to provide a
3433 * 5-tuple. Otherwise, we do not grant access.
3434 */
3435 if (oidp->oid_arg2 == NXMIB_FLOW &&
3436 !kauth_cred_issuser(kauth_cred_get())) {
3437 SK_ERR("mib request rejected: tuple not provided");
3438 return EPERM;
3439 }
3440 /* use subcommand for multiple nodes */
3441 filter.nmf_type = oidp->oid_arg2;
3442 filter.nmf_bitmap = 0x0;
3443 } else if (req->newlen != sizeof(struct nexus_mib_filter)) {
3444 SK_ERR("mis-matching newlen");
3445 return EINVAL;
3446 } else {
3447 error = SYSCTL_IN(req, &filter, sizeof(struct nexus_mib_filter));
3448 if (error != 0) {
3449 SK_ERR("SYSCTL_IN err %d", error);
3450 return error;
3451 }
3452 if (filter.nmf_type != oidp->oid_arg2) {
3453 SK_ERR("mis-matching nmf_type");
3454 return EINVAL;
3455 }
3456 /*
3457 * For flow stats requests, non-root users need to set the nexus
3458 * mib filter to NXMIB_FILTER_INFO_TUPLE. Otherwise, we do not
3459 * grant access. This ensures that fsw_mib_get_flow looks for a
3460 * flow entry that matches the given tuple of the non-root user.
3461 */
3462 if (filter.nmf_type == NXMIB_FLOW &&
3463 (filter.nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) == 0 &&
3464 !kauth_cred_issuser(kauth_cred_get())) {
3465 SK_ERR("mib request rejected: tuple filter not set");
3466 return EPERM;
3467 }
3468 }
3469
3470 net_update_uptime();
3471 buffer_space = req->oldlen;
3472 if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
3473 if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
3474 buffer_space = SK_SYSCTL_ALLOC_MAX;
3475 }
3476 buffer = sk_alloc_data(buffer_space, Z_WAITOK, skmem_tag_sysctl_buf);
3477 allocated_space = buffer_space;
3478 if (__improbable(buffer == NULL)) {
3479 return ENOBUFS;
3480 }
3481 } else if (req->oldptr == USER_ADDR_NULL) {
3482 buffer_space = 0;
3483 }
3484 actual_space = 0;
3485 scan = buffer;
3486
3487 SK_LOCK();
3488 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3489 if (NX_DOM_PROV(nx)->nxdom_prov_nx_mib_get == NULL) {
3490 continue;
3491 }
3492
3493 size_t size = 0;
3494 struct kern_nexus_domain_provider *nx_dp = NX_DOM_PROV(nx);
3495
3496 /*
3497 * -fbounds-safety: Because scan takes the bounds of buffer
3498 * (which is __sized_by(allocated_space)), at some point scan
3499 * will reach its bounds (because of scan += size). When it
3500 * does, it won't pass the bounds check when scan is passed to
3501 * nxdom_prov_nx_mib_get function. We need to avoid passing scan
3502 * to nxdom_prov_nx_mib_get when it reaches its upper bound,
3503 * i.e. when buffer_space reaches 0 (see buffer_space -= size).
3504 */
3505 if (req->oldptr == USER_ADDR_NULL || buffer_space) {
3506 size = nx_dp->nxdom_prov_nx_mib_get(nx, &filter, scan,
3507 buffer_space, p);
3508 }
3509
3510 if (scan != NULL) {
3511 if (buffer_space < size) {
3512 /* supplied buffer too small, stop copying */
3513 error = ENOMEM;
3514 break;
3515 }
3516 scan += size;
3517 buffer_space -= size;
3518 }
3519 actual_space += size;
3520 }
3521 SK_UNLOCK();
3522
3523 if (actual_space != 0) {
3524 out_error = SYSCTL_OUT(req, buffer, actual_space);
3525 if (out_error != 0) {
3526 error = out_error;
3527 }
3528 }
3529 if (buffer != NULL) {
3530 sk_free_data_sized_by(buffer, allocated_space);
3531 }
3532
3533 return error;
3534 }
3535
3536 void
kern_nexus_walktree(kern_nexus_walktree_f_t * f,void * arg0,boolean_t is_sk_locked)3537 kern_nexus_walktree(kern_nexus_walktree_f_t *f, void *arg0,
3538 boolean_t is_sk_locked)
3539 {
3540 struct kern_nexus *nx = NULL;
3541
3542 if (!is_sk_locked) {
3543 SK_LOCK();
3544 } else {
3545 SK_LOCK_ASSERT_HELD();
3546 }
3547
3548 RB_FOREACH(nx, kern_nexus_tree, &nx_head) {
3549 (*f)(nx, arg0);
3550 }
3551
3552 if (!is_sk_locked) {
3553 SK_UNLOCK();
3554 }
3555 }
3556
3557 errno_t
kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,struct kern_pbufpool_memory_info * rx_pool_info,struct kern_pbufpool_memory_info * tx_pool_info)3558 kern_nexus_get_pbufpool_info(const uuid_t nx_uuid,
3559 struct kern_pbufpool_memory_info *rx_pool_info,
3560 struct kern_pbufpool_memory_info *tx_pool_info)
3561 {
3562 struct kern_pbufpool *__single tpp, *__single rpp;
3563 struct kern_nexus *nx;
3564 errno_t err = 0;
3565
3566 nx = nx_find(nx_uuid, FALSE);
3567 if (nx == NULL) {
3568 err = ENOENT;
3569 goto done;
3570 }
3571
3572 if (nx->nx_prov->nxprov_params->nxp_type != NEXUS_TYPE_NET_IF) {
3573 err = ENOTSUP;
3574 goto done;
3575 }
3576
3577 err = nx_netif_prov_nx_mem_info(nx, &tpp, &rpp);
3578 if (err != 0) {
3579 goto done;
3580 }
3581
3582 if ((tpp == NULL) && (rpp == NULL)) {
3583 err = ENOENT;
3584 goto done;
3585 }
3586
3587 if (tx_pool_info != NULL) {
3588 bzero(tx_pool_info, sizeof(*tx_pool_info));
3589 }
3590 if (rx_pool_info != NULL) {
3591 bzero(rx_pool_info, sizeof(*rx_pool_info));
3592 }
3593
3594 if ((tx_pool_info != NULL) && (tpp != NULL)) {
3595 err = kern_pbufpool_get_memory_info(tpp, tx_pool_info);
3596 if (err != 0) {
3597 goto done;
3598 }
3599 }
3600
3601 if ((rx_pool_info != NULL) && (rpp != NULL)) {
3602 err = kern_pbufpool_get_memory_info(rpp, rx_pool_info);
3603 }
3604
3605 done:
3606 if (nx != NULL) {
3607 (void) nx_release(nx);
3608 nx = NULL;
3609 }
3610 return err;
3611 }
3612
3613 void
nx_interface_advisory_notify(struct kern_nexus * nx)3614 nx_interface_advisory_notify(struct kern_nexus *nx)
3615 {
3616 struct kern_channel *ch;
3617 struct netif_stats *nifs;
3618 struct fsw_stats *fsw_stats;
3619 nexus_type_t nxdom_type = NX_DOM(nx)->nxdom_type;
3620
3621 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3622 nifs = &NX_NETIF_PRIVATE(nx)->nif_stats;
3623 } else if (nxdom_type == NEXUS_TYPE_FLOW_SWITCH) {
3624 fsw_stats = &NX_FSW_PRIVATE(nx)->fsw_stats;
3625 } else {
3626 VERIFY(0);
3627 __builtin_unreachable();
3628 }
3629 if (!lck_rw_try_lock_shared(&nx->nx_ch_if_adv_lock)) {
3630 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3631 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_DROP);
3632 } else {
3633 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_DROP);
3634 }
3635 return;
3636 }
3637 /*
3638 * if the channel is in "nx_ch_if_adv_head" list, then we can
3639 * safely assume that the channel is not closed yet.
3640 * In ch_close_common(), the channel is removed from the
3641 * "nx_ch_if_adv_head" list holding the "nx_ch_if_adv_lock" in
3642 * exclusive mode, prior to closing the channel.
3643 */
3644 STAILQ_FOREACH(ch, &nx->nx_ch_if_adv_head, ch_link_if_adv) {
3645 struct nexus_adapter *na = ch->ch_na;
3646
3647 ASSERT(na != NULL);
3648 na_post_event(&na->na_tx_rings[ch->ch_first[NR_TX]],
3649 TRUE, FALSE, FALSE, CHAN_FILT_HINT_IF_ADV_UPD);
3650 if (nxdom_type == NEXUS_TYPE_NET_IF) {
3651 STATS_INC(nifs, NETIF_STATS_IF_ADV_UPD_SENT);
3652 } else {
3653 STATS_INC(fsw_stats, FSW_STATS_IF_ADV_UPD_SENT);
3654 }
3655 }
3656 lck_rw_done(&nx->nx_ch_if_adv_lock);
3657 }
3658