1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53 #include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
56 #include <skywalk/nexus/flowswitch/fsw_var.h>
57 #include <skywalk/nexus/netif/nx_netif.h>
58 #include <skywalk/nexus/netif/nx_netif_compat.h>
59
60 #include <net/bpf.h>
61 #include <net/if.h>
62 #include <net/pktsched/pktsched_netem.h>
63 #include <sys/eventhandler.h>
64
65 #if (DEVELOPMENT || DEBUG)
66 SYSCTL_UINT(_kern_skywalk_flowswitch, OID_AUTO, chain_enqueue,
67 CTLFLAG_RW | CTLFLAG_LOCKED, &fsw_chain_enqueue, 0, "");
68 #endif /* !DEVELOPMENT && !DEBUG */
69
70 /*
71 * Configures the flowswitch to utilize user packet pool with
72 * dual sized buffers.
73 * A non-zero value enables the support.
74 */
75 #if defined(XNU_TARGET_OS_IOS) || defined(XNU_TARGET_OS_OSX)
76 uint32_t fsw_use_dual_sized_pool = 1;
77 #else
78 uint32_t fsw_use_dual_sized_pool = 0;
79 #endif
80
81 uint32_t fsw_chain_enqueue = 0;
82 static int __nx_fsw_inited = 0;
83 static eventhandler_tag __nx_fsw_ifnet_eventhandler_tag = NULL;
84 static eventhandler_tag __nx_fsw_protoctl_eventhandler_tag = NULL;
85
86 static SKMEM_TYPE_DEFINE(nx_fsw_zone, struct nx_flowswitch);
87
88 static SKMEM_TYPE_DEFINE(nx_fsw_stats_zone, struct __nx_stats_fsw);
89
90 #define SKMEM_TAG_FSW_PORTS "com.apple.skywalk.fsw.ports"
91 SKMEM_TAG_DEFINE(skmem_tag_fsw_ports, SKMEM_TAG_FSW_PORTS);
92
93 #define SKMEM_TAG_FSW_FOB_HASH "com.apple.skywalk.fsw.fsw.fob.hash"
94 SKMEM_TAG_DEFINE(skmem_tag_fsw_fob_hash, SKMEM_TAG_FSW_FOB_HASH);
95
96 #define SKMEM_TAG_FSW_FRB_HASH "com.apple.skywalk.fsw.fsw.frb.hash"
97 SKMEM_TAG_DEFINE(skmem_tag_fsw_frb_hash, SKMEM_TAG_FSW_FRB_HASH);
98
99 #define SKMEM_TAG_FSW_FRIB_HASH "com.apple.skywalk.fsw.fsw.frib.hash"
100 SKMEM_TAG_DEFINE(skmem_tag_fsw_frib_hash, SKMEM_TAG_FSW_FRIB_HASH);
101
102 #define SKMEM_TAG_FSW_FRAG_MGR "com.apple.skywalk.fsw.fsw.frag.mgr"
103 SKMEM_TAG_DEFINE(skmem_tag_fsw_frag_mgr, SKMEM_TAG_FSW_FRAG_MGR);
104
105 /* 64-bit mask with range */
106 #define BMASK64(_beg, _end) \
107 ((NX_FSW_CHUNK_FREE >> (63 - (_end))) & ~((1ULL << (_beg)) - 1))
108
109 static int fsw_detach(struct nx_flowswitch *fsw, struct nexus_adapter *hwna,
110 boolean_t purge);
111
112 int
fsw_attach_vp(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_vp_adapter ** vpna)113 fsw_attach_vp(struct kern_nexus *nx, struct kern_channel *ch,
114 struct chreq *chr, struct nxbind *nxb, struct proc *p,
115 struct nexus_vp_adapter **vpna)
116 {
117 #pragma unused(ch)
118 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
119 char *cr_name = chr->cr_name;
120 int err = 0;
121
122 SK_LOCK_ASSERT_HELD();
123 ASSERT(!(chr->cr_mode & CHMODE_CONFIG));
124 *vpna = NULL;
125
126 /* if there's an existing adapter on the nexus port then use it */
127 FSW_WLOCK(fsw);
128 err = fsw_port_alloc(fsw, nxb, vpna, chr->cr_port, p, FALSE, FALSE);
129 FSW_WUNLOCK(fsw);
130
131 if (err != 0) {
132 ASSERT(*vpna == NULL);
133 goto out;
134 } else if (*vpna != NULL) {
135 /*
136 * Use the existing adapter on that port; fsw_port_alloc()
137 * callback has retained a reference count on the adapter.
138 */
139 goto out;
140 }
141 ASSERT(*vpna == NULL);
142
143 /* create a virtual port; callee holds vpna ref */
144 err = fsw_vp_na_create(nx, chr, vpna);
145 if (err != 0) {
146 SK_ERR("vpna create failed (err %d)", err);
147 goto out;
148 }
149
150 /* attach vp to fsw */
151 err = fsw_vp_na_attach(nx, cr_name, &(*vpna)->vpna_up);
152 if (err != 0) {
153 SK_ERR("vpna \"%s\" fsw attach failed (err %d)",
154 (*vpna)->vpna_up.na_name, err);
155 goto out;
156 }
157
158 FSW_WLOCK(fsw);
159 err = fsw_port_alloc(fsw, nxb, vpna, (*vpna)->vpna_nx_port, p, FALSE, FALSE);
160 FSW_WUNLOCK(fsw);
161
162 out:
163 if ((*vpna) != NULL) {
164 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
165 "vpna \"%s\" (0x%llx) refs %u to fsw \"%s\" "
166 "nx_port %d (err %d)", (*vpna)->vpna_up.na_name,
167 SK_KVA(&(*vpna)->vpna_up), (*vpna)->vpna_up.na_refcount,
168 cr_name, (int)(*vpna)->vpna_nx_port, err);
169
170 if (err != 0) {
171 na_release_locked(&(*vpna)->vpna_up);
172 *vpna = NULL;
173 }
174 }
175
176 return err;
177 }
178
179 static int
fsw_nx_check(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)180 fsw_nx_check(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
181 {
182 #pragma unused(fsw)
183 nexus_type_t hw_nxdom_type = NX_DOM(hw_nx)->nxdom_type;
184
185 if (hw_nxdom_type != NEXUS_TYPE_NET_IF) {
186 return EINVAL;
187 }
188
189 /* it's a netif below */
190 return 0;
191 }
192
193 static int
fsw_ctl_flow_add(struct nx_flowswitch * fsw,struct proc * p,struct nx_flow_req * req)194 fsw_ctl_flow_add(struct nx_flowswitch *fsw, struct proc *p,
195 struct nx_flow_req *req)
196 {
197 struct flow_owner *fo;
198 int error = 0;
199
200 ASSERT(p != PROC_NULL);
201
202 if (p != kernproc) {
203 /* special port shouldn't be bound via this method */
204 if (req->nfr_nx_port < FSW_VP_USER_MIN) {
205 return EINVAL;
206 }
207 req->nfr_flags |= (NXFLOWREQF_TRACK | NXFLOWREQF_FLOWADV);
208 } else {
209 /* no flow track or advisory support for bsd flow */
210 ASSERT((req->nfr_flags & NXFLOWREQF_TRACK) == 0);
211 ASSERT((req->nfr_flags & NXFLOWREQF_FLOWADV) == 0);
212 ASSERT((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) == 0);
213 }
214
215 /* init kernel only fields */
216 if (p != kernproc) {
217 nx_flow_req_internalize(req);
218 }
219 req->nfr_pid = proc_pid(p);
220 if (req->nfr_epid == -1) {
221 req->nfr_epid = proc_pid(p);
222 }
223
224 if (req->nfr_flow_demux_count > MAX_FLOW_DEMUX_PATTERN) {
225 SK_ERR("invalid flow demux count %u", req->nfr_flow_demux_count);
226 return EINVAL;
227 }
228
229 fo = fsw_flow_add(fsw, req, &error);
230 ASSERT(fo != NULL || error != 0);
231
232 if (error == 0) {
233 // user space don't need this flow stats
234 flow_stats_release(req->nfr_flow_stats);
235 }
236 if (p != kernproc) {
237 nx_flow_req_externalize(req);
238 }
239
240 return error;
241 }
242
243 static int
fsw_ctl_flow_del(struct nx_flowswitch * fsw,struct proc * p,struct nx_flow_req * req)244 fsw_ctl_flow_del(struct nx_flowswitch *fsw, struct proc *p,
245 struct nx_flow_req *req)
246 {
247 int err;
248
249 nx_flow_req_internalize(req);
250 req->nfr_pid = proc_pid(p);
251 err = fsw_flow_del(fsw, req, TRUE, NULL);
252
253 nx_flow_req_externalize(req);
254 return err;
255 }
256
257 #if (DEVELOPMENT || DEBUG)
258 static int
259 fsw_rps_threads_sysctl SYSCTL_HANDLER_ARGS
260 {
261 #pragma unused(oidp, arg2)
262 struct nx_flowswitch *fsw = arg1;
263 uint32_t nthreads;
264 int changed;
265 int error;
266
267 error = sysctl_io_number(req, fsw->fsw_rps_nthreads,
268 sizeof(fsw->fsw_rps_nthreads), &nthreads, &changed);
269 if (error == 0 && changed != 0) {
270 error = fsw_rps_set_nthreads(fsw, nthreads);
271 }
272 return error;
273 }
274 #endif /* !DEVELOPMENT && !DEBUG */
275
276 static int
fsw_setup_ifp(struct nx_flowswitch * fsw,struct nexus_adapter * hwna)277 fsw_setup_ifp(struct nx_flowswitch *fsw, struct nexus_adapter *hwna)
278 {
279 int error = 0;
280 struct ifnet *ifp = hwna->na_ifp;
281 struct kern_pbufpool *pp = skmem_arena_nexus(hwna->na_arena)->arn_rx_pp;
282 size_t f_limit = pp->pp_kmd_region->skr_c_obj_cnt / 2;
283
284 ASSERT((hwna->na_type == NA_NETIF_HOST) ||
285 (hwna->na_type == NA_NETIF_COMPAT_HOST));
286
287 SK_LOCK_ASSERT_HELD();
288
289 /*
290 * XXX: we don't support non TXSTART interface.
291 * There are assumptions in fsw_port_flush_enqueue_dst() about
292 * single threaded write to destination rings.
293 */
294 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
295 SK_ERR("non TXSTART interface not supported ifp(0x%llx)",
296 SK_KVA(ifp));
297 return ENOTSUP;
298 }
299
300 FSW_WLOCK(fsw);
301
302 ASSERT(fsw->fsw_ifp == NULL);
303 ASSERT(fsw->fsw_nifna == NULL);
304 ASSERT(fsw->fsw_resolve == NULL);
305 ASSERT(fsw->fsw_frame == NULL);
306 ASSERT(fsw->fsw_demux == NULL);
307 ASSERT(fsw->fsw_pkt_copy_from_pkt == NULL);
308 ASSERT(fsw->fsw_pkt_copy_from_mbuf == NULL);
309 ASSERT(fsw->fsw_pkt_copy_to_mbuf == NULL);
310
311 fsw->fsw_ipfm = fsw_ip_frag_mgr_create(fsw, ifp, f_limit);
312 if (fsw->fsw_ipfm == NULL) {
313 FSW_WUNLOCK(fsw);
314 return ENOMEM;
315 }
316
317 switch (ifp->if_family) {
318 case IFNET_FAMILY_ETHERNET:
319 error = fsw_ethernet_setup(fsw, ifp);
320 fsw->fsw_ifp_dlt = DLT_EN10MB;
321 break;
322
323 case IFNET_FAMILY_CELLULAR:
324 error = fsw_cellular_setup(fsw, ifp);
325 fsw->fsw_ifp_dlt = DLT_RAW;
326 break;
327
328 default:
329 if (ifp->if_family == IFNET_FAMILY_IPSEC ||
330 ifp->if_family == IFNET_FAMILY_UTUN) {
331 error = fsw_ip_setup(fsw, ifp);
332 fsw->fsw_ifp_dlt = DLT_RAW;
333 break;
334 }
335 error = ENOTSUP;
336 break;
337 }
338
339 if (error != 0) {
340 FSW_WUNLOCK(fsw);
341 return error;
342 }
343
344 ASSERT(fsw->fsw_resolve != NULL);
345
346 if (NX_PROV(fsw->fsw_nx)->nxprov_region_params[SKMEM_REGION_KMD].
347 srp_max_frags > 1 || pp->pp_max_frags > 1) {
348 fsw->fsw_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
349 fsw->fsw_pkt_copy_from_mbuf = pkt_copy_multi_buflet_from_mbuf;
350 fsw->fsw_pkt_copy_to_mbuf = pkt_copy_multi_buflet_to_mbuf;
351 } else {
352 fsw->fsw_pkt_copy_from_pkt = pkt_copy_from_pkt;
353 fsw->fsw_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
354 fsw->fsw_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
355 }
356
357 /*
358 * Since it is possible for fsw to refer to the ifp after all
359 * underlying hwnas are freed (see fsw_teardown_ifp()), we need
360 * an extra reference to the ifp here.
361 *
362 * We also cache the netif adapter of the interface, as it's
363 * needed for each packet enqueued to the classq. There is no
364 * need to retain a refcnt for the same reason as above.
365 *
366 * We hold the busy lock across these, just in case an interface
367 * detach and reattach happens, as fsw_flow_bind() relies on the
368 * same lock as well before making its checks.
369 */
370 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
371
372 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
373 fsw->fsw_ifp = ifp;
374 fsw->fsw_nifna = &ifp->if_na->nifna_up;
375 ifp->if_na->nifna_netif->nif_fsw = fsw;
376 ifp->if_na->nifna_netif->nif_fsw_nxadv =
377 fsw->fsw_nx->nx_adv.flowswitch_nxv_adv;
378 (void) strlcpy(fsw->fsw_flow_mgr->fm_name,
379 if_name(ifp), IFNAMSIZ);
380
381 fsw_classq_setup(fsw, hwna);
382 fsw->fsw_classq_enabled = TRUE;
383 fsw->fsw_src_lla_gencnt = 0;
384
385 ASSERT(fsw->fsw_reap_thread != THREAD_NULL);
386 (void) snprintf(fsw->fsw_reap_name, sizeof(fsw->fsw_reap_name),
387 FSW_REAP_THREADNAME, ifp->if_xname, "");
388 thread_set_thread_name(fsw->fsw_reap_thread, fsw->fsw_reap_name);
389
390 error = fsw_netagent_register(fsw, ifp);
391 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
392 "fsw_netagent_register %s (family %u) (err %d)",
393 if_name(ifp), ifp->if_family, error);
394
395 /*
396 * Clear NXF_REJECT to allow new channels to be opened
397 * to this nexus, in case this is an interface reattach.
398 * Otherwise this flag should already be cleared.
399 */
400 if (error == 0) {
401 atomic_bitclear_32(&fsw->fsw_nx->nx_flags, NXF_REJECT);
402 }
403
404 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
405
406 /*
407 * Wake up the reaper thread.
408 */
409 if (error == 0) {
410 fsw_reap_sched(fsw);
411 }
412
413 /* init skoid */
414 skoid_create(&fsw->fsw_skoid,
415 SKOID_SNODE(_kern_skywalk_flowswitch), if_name(ifp),
416 CTLFLAG_RW);
417
418 #if (DEVELOPMENT || DEBUG)
419 if (SKYWALK_NATIVE(fsw->fsw_ifp)) {
420 skoid_add_handler(&fsw->fsw_skoid, "rps_nthreads", CTLFLAG_RW,
421 fsw_rps_threads_sysctl, fsw, 0);
422 }
423 #endif /* !DEVELOPMENT && !DEBUG */
424
425 FSW_WUNLOCK(fsw);
426
427 return error;
428 }
429
430 static void
fsw_teardown_ifp(struct nx_flowswitch * fsw,struct nexus_adapter * hwna)431 fsw_teardown_ifp(struct nx_flowswitch *fsw, struct nexus_adapter *hwna)
432 {
433 struct ifnet *ifp;
434
435 SK_LOCK_ASSERT_HELD();
436
437 FSW_WLOCK_ASSERT_HELD(fsw);
438 ifp = fsw->fsw_ifp;
439 ASSERT(ifp != NULL);
440 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
441
442 fsw_netagent_unregister(fsw, ifp);
443
444 if (fsw->fsw_ipfm != NULL) {
445 fsw_ip_frag_mgr_destroy(fsw->fsw_ipfm);
446 }
447
448 skoid_destroy(&fsw->fsw_skoid);
449
450 SK_DF(SK_VERB_FSW, "%sdetached from %s (family %u)",
451 ((fsw->fsw_agent_session != NULL) ? "netagent" : ""),
452 if_name(ifp), ifp->if_family);
453
454 if (hwna != NULL) {
455 fsw_classq_teardown(fsw, hwna);
456 }
457
458 /*
459 * Set NXF_REJECT on the nexus, which would cause existing adapters
460 * to be marked similarly; channels associated with them would then
461 * cease to function.
462 */
463 atomic_bitset_32(&fsw->fsw_nx->nx_flags, NXF_REJECT);
464
465 /* see notes on fsw_na_attach() about I/O refcnt */
466 if (ifp->if_na != NULL) {
467 ifp->if_na->nifna_netif->nif_fsw = NULL;
468 ifp->if_na->nifna_netif->nif_fsw_nxadv = NULL;
469 membar_sync();
470 }
471
472 fsw->fsw_ifp = NULL;
473 fsw->fsw_nifna = NULL;
474 fsw->fsw_resolve = NULL;
475 fsw->fsw_frame = NULL;
476 fsw->fsw_frame_headroom = 0;
477 fsw->fsw_demux = NULL;
478 fsw->fsw_classq_enabled = FALSE;
479 fsw->fsw_pkt_copy_from_pkt = NULL;
480 fsw->fsw_pkt_copy_from_mbuf = NULL;
481 fsw->fsw_pkt_copy_to_mbuf = NULL;
482
483 if (ifp->if_input_netem != NULL) {
484 netem_destroy(ifp->if_input_netem);
485 ifp->if_input_netem = NULL;
486 }
487
488 ASSERT(fsw->fsw_reap_thread != THREAD_NULL);
489 (void) snprintf(fsw->fsw_reap_name, sizeof(fsw->fsw_reap_name),
490 FSW_REAP_THREADNAME, if_name(ifp), "_detached");
491 thread_set_thread_name(fsw->fsw_reap_thread, fsw->fsw_reap_name);
492 }
493
494 static int
fsw_host_setup(struct nx_flowswitch * fsw)495 fsw_host_setup(struct nx_flowswitch *fsw)
496 {
497 struct nexus_adapter *hwna;
498 struct ifnet *ifp;
499
500 SK_LOCK_ASSERT_HELD();
501
502 hwna = fsw->fsw_host_ch->ch_na;
503 ASSERT(hwna != NULL);
504
505
506 /* the netif below must have an ifnet attached (dev/host port) */
507 if ((ifp = hwna->na_ifp) == NULL) {
508 return ENXIO;
509 }
510
511 /*
512 * XXX: we don't support multiple rx rings yet.
513 * There are assumptions in fsw_port_flush_enqueue_dst() about
514 * single threaded write to destination rings.
515 */
516 if (SKYWALK_NATIVE(ifp) && (hwna->na_num_rx_rings > 1)) {
517 SK_ERR("ifp(0x%llx): multiple rx rings(%d) not supported",
518 SK_KVA(ifp), hwna->na_num_rx_rings);
519 return ENOTSUP;
520 }
521
522 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
523 if ((fsw->fsw_detach_flags & FSW_DETACHF_DETACHING) != 0) {
524 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
525 return EBUSY;
526 }
527 fsw->fsw_detach_flags = 0;
528 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
529
530 int error = fsw_setup_ifp(fsw, hwna);
531 ASSERT(error != 0 || fsw->fsw_ifp != NULL);
532 if (error != 0) {
533 return error;
534 }
535
536 /* update the interface index */
537 ASSERT(NX_PROV(fsw->fsw_nx)->nxprov_params->nxp_ifindex == 0);
538 NX_PROV(fsw->fsw_nx)->nxprov_params->nxp_ifindex = ifp->if_index;
539 return 0;
540 }
541
542 static int
fsw_host_teardown(struct nx_flowswitch * fsw)543 fsw_host_teardown(struct nx_flowswitch *fsw)
544 {
545 struct nexus_adapter *hwna = fsw->fsw_host_ch->ch_na;
546
547 SK_LOCK_ASSERT_HELD();
548 return fsw_detach(fsw, hwna, FALSE);
549 }
550
551 #if SK_LOG
552 /* Hoisted out of line to reduce kernel stack footprint */
553 SK_LOG_ATTRIBUTE
554 static void
fsw_ctl_attach_log(const struct nx_spec_req * nsr,const struct kern_nexus * nx,int err)555 fsw_ctl_attach_log(const struct nx_spec_req *nsr,
556 const struct kern_nexus *nx, int err)
557 {
558 uuid_string_t uuidstr, ifuuidstr;
559 const char *nustr;
560
561 if (nsr->nsr_flags & NXSPECREQ_UUID) {
562 nustr = sk_uuid_unparse(nsr->nsr_uuid, uuidstr);
563 } else if (nsr->nsr_flags & NXSPECREQ_IFP) {
564 (void) snprintf((char *)uuidstr, sizeof(uuidstr), "0x%llx",
565 SK_KVA(nsr->nsr_ifp));
566 nustr = uuidstr;
567 } else {
568 nustr = nsr->nsr_name;
569 }
570
571 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
572 "nexus 0x%llx (%s) name/uuid \"%s\" if_uuid %s flags 0x%x err %d",
573 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, nustr,
574 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr), nsr->nsr_flags, err);
575 }
576 #endif /* SK_LOG */
577
578 SK_NO_INLINE_ATTRIBUTE
579 static void
fsw_netif_set_callbacks_common(struct nx_flowswitch * fsw,boolean_t set)580 fsw_netif_set_callbacks_common(struct nx_flowswitch *fsw, boolean_t set)
581 {
582 struct nexus_adapter *hwna = fsw->fsw_dev_ch->ch_na;
583
584 ASSERT(hwna->na_type == NA_NETIF_DEV ||
585 hwna->na_type == NA_NETIF_COMPAT_DEV);
586
587 if (set) {
588 netif_hwna_set_mode(hwna, NETIF_MODE_FSW, fsw_devna_rx);
589 } else {
590 netif_hwna_clear_mode(hwna);
591 }
592 }
593
594 SK_NO_INLINE_ATTRIBUTE
595 static void
fsw_netif_set_callbacks(struct nx_flowswitch * fsw)596 fsw_netif_set_callbacks(struct nx_flowswitch *fsw)
597 {
598 fsw_netif_set_callbacks_common(fsw, TRUE);
599 }
600
601 SK_NO_INLINE_ATTRIBUTE
602 static void
fsw_netif_clear_callbacks(struct nx_flowswitch * fsw)603 fsw_netif_clear_callbacks(struct nx_flowswitch *fsw)
604 {
605 fsw_netif_set_callbacks_common(fsw, FALSE);
606 }
607
608 SK_NO_INLINE_ATTRIBUTE
609 static void
fsw_dp_start(struct nx_flowswitch * fsw)610 fsw_dp_start(struct nx_flowswitch *fsw)
611 {
612 ASSERT(fsw->fsw_dev_ch != NULL);
613 ASSERT(fsw->fsw_host_ch != NULL);
614
615 fsw_netif_set_callbacks(fsw);
616 na_start_spec(fsw->fsw_dev_ch->ch_nexus, fsw->fsw_dev_ch);
617 na_start_spec(fsw->fsw_host_ch->ch_nexus, fsw->fsw_host_ch);
618 }
619
620 SK_NO_INLINE_ATTRIBUTE
621 static int
fsw_dp_stop(struct nx_flowswitch * fsw,struct ifnet ** ifpp)622 fsw_dp_stop(struct nx_flowswitch *fsw, struct ifnet **ifpp)
623 {
624 struct ifnet *ifp;
625
626 FSW_WLOCK(fsw);
627 if ((fsw->fsw_state_flags & FSW_STATEF_QUIESCED) != 0) {
628 FSW_WUNLOCK(fsw);
629 return EALREADY;
630 }
631 fsw->fsw_state_flags |= FSW_STATEF_QUIESCED;
632 FSW_WUNLOCK(fsw);
633
634 /*
635 * For regular kernel-attached interfaces, quiescing is handled by
636 * the ifnet detach thread, which calls dlil_quiesce_and_detach_nexuses().
637 * For interfaces created by skywalk test cases, flowswitch/netif nexuses
638 * are constructed on the fly and can also be torn down on the fly.
639 * dlil_quiesce_and_detach_nexuses() won't help here because any nexus
640 * can be detached while the interface is still attached.
641 */
642 if ((ifp = fsw->fsw_ifp) != NULL &&
643 ifnet_datamov_suspend_if_needed(ifp)) {
644 SK_UNLOCK();
645 ifnet_datamov_drain(ifp);
646 /* Reference will be released by caller */
647 *ifpp = ifp;
648 SK_LOCK();
649 }
650 ASSERT(fsw->fsw_dev_ch != NULL);
651 ASSERT(fsw->fsw_host_ch != NULL);
652 na_stop_spec(fsw->fsw_host_ch->ch_nexus, fsw->fsw_host_ch);
653 na_stop_spec(fsw->fsw_dev_ch->ch_nexus, fsw->fsw_dev_ch);
654 fsw_netif_clear_callbacks(fsw);
655 return 0;
656 }
657
658 SK_NO_INLINE_ATTRIBUTE
659 static int
fsw_netif_port_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx,boolean_t host)660 fsw_netif_port_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx,
661 boolean_t host)
662 {
663 struct chreq chr;
664 struct kern_channel *ch;
665 int err;
666
667 bzero(&chr, sizeof(chr));
668 uuid_copy(chr.cr_spec_uuid, hw_nx->nx_uuid);
669 chr.cr_ring_id = CHANNEL_RING_ID_ANY;
670 chr.cr_port = host ? NEXUS_PORT_NET_IF_HOST : NEXUS_PORT_NET_IF_DEV;
671 chr.cr_mode |= CHMODE_CONFIG | (host ? CHMODE_HOST : 0);
672
673 err = 0;
674 ch = ch_open_special(hw_nx, &chr, FALSE, &err);
675 if (ch == NULL) {
676 SK_ERR("ch_open_special(%s) failed: %d",
677 host ? "host" : "dev", err);
678 return err;
679 }
680 if (host) {
681 fsw->fsw_host_ch = ch;
682 } else {
683 fsw->fsw_dev_ch = ch;
684 }
685 return 0;
686 }
687
688 SK_NO_INLINE_ATTRIBUTE
689 static int
fsw_netif_port_teardown(struct nx_flowswitch * fsw,boolean_t host)690 fsw_netif_port_teardown(struct nx_flowswitch *fsw, boolean_t host)
691 {
692 struct kern_channel *ch;
693
694 ch = host ? fsw->fsw_host_ch : fsw->fsw_dev_ch;
695 if (ch == NULL) {
696 return EINVAL;
697 }
698 if (host) {
699 fsw->fsw_host_ch = NULL;
700 } else {
701 fsw->fsw_dev_ch = NULL;
702 }
703 ch_close_special(ch);
704 (void) ch_release_locked(ch);
705 return 0;
706 }
707
708 SK_NO_INLINE_ATTRIBUTE
709 static int
fsw_devna_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)710 fsw_devna_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
711 {
712 return fsw_netif_port_setup(fsw, hw_nx, FALSE);
713 }
714
715 SK_NO_INLINE_ATTRIBUTE
716 static int
fsw_hostna_setup(struct nx_flowswitch * fsw,struct kern_nexus * hw_nx)717 fsw_hostna_setup(struct nx_flowswitch *fsw, struct kern_nexus *hw_nx)
718 {
719 return fsw_netif_port_setup(fsw, hw_nx, TRUE);
720 }
721
722 SK_NO_INLINE_ATTRIBUTE
723 static int
fsw_devna_teardown(struct nx_flowswitch * fsw)724 fsw_devna_teardown(struct nx_flowswitch *fsw)
725 {
726 return fsw_netif_port_teardown(fsw, FALSE);
727 }
728
729 SK_NO_INLINE_ATTRIBUTE
730 static int
fsw_hostna_teardown(struct nx_flowswitch * fsw)731 fsw_hostna_teardown(struct nx_flowswitch *fsw)
732 {
733 return fsw_netif_port_teardown(fsw, TRUE);
734 }
735
736 /* Process NXCFG_CMD_ATTACH */
737 SK_NO_INLINE_ATTRIBUTE
738 static int
fsw_ctl_attach(struct kern_nexus * nx,struct proc * p,struct nx_spec_req * nsr)739 fsw_ctl_attach(struct kern_nexus *nx, struct proc *p, struct nx_spec_req *nsr)
740 {
741 #pragma unused(p)
742 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
743 struct kern_nexus *hw_nx = NULL;
744 int err = 0;
745
746 SK_LOCK_ASSERT_HELD();
747
748 /*
749 * The flowswitch only accepts UUID as an identifier, since it
750 * represents the UUID of the kernel object we are trying to
751 * attach to this flowswitch.
752 */
753 if ((nsr->nsr_flags & (NXSPECREQ_UUID | NXSPECREQ_IFP)) !=
754 NXSPECREQ_UUID || uuid_is_null(nsr->nsr_uuid)) {
755 err = EINVAL;
756 goto done;
757 }
758
759 if (fsw->fsw_dev_ch != NULL) {
760 ASSERT(fsw->fsw_host_ch != NULL);
761 err = EEXIST;
762 goto done;
763 }
764
765 hw_nx = nx_find(nsr->nsr_uuid, TRUE);
766 if (hw_nx == NULL) {
767 err = ENOENT;
768 goto done;
769 } else if (hw_nx == nx) {
770 err = EINVAL;
771 goto done;
772 }
773
774 /* preflight check to see if the nexus is attachable to us */
775 err = fsw_nx_check(fsw, hw_nx);
776 if (err != 0) {
777 goto done;
778 }
779
780 err = fsw_devna_setup(fsw, hw_nx);
781 if (err != 0) {
782 goto done;
783 }
784
785 err = fsw_hostna_setup(fsw, hw_nx);
786 if (err != 0) {
787 (void) fsw_devna_teardown(fsw);
788 goto done;
789 }
790
791 err = fsw_host_setup(fsw);
792 if (err != 0) {
793 (void) fsw_hostna_teardown(fsw);
794 (void) fsw_devna_teardown(fsw);
795 goto done;
796 }
797
798 fsw_dp_start(fsw);
799
800 /* return the devna UUID */
801 uuid_copy(nsr->nsr_if_uuid, fsw->fsw_dev_ch->ch_na->na_uuid);
802 ASSERT(!uuid_is_null(nsr->nsr_if_uuid));
803 done:
804 #if SK_LOG
805 if (__improbable(sk_verbose != 0)) {
806 fsw_ctl_attach_log(nsr, nx, err);
807 }
808 #endif /* SK_LOG */
809
810 if (hw_nx != NULL) {
811 nx_release_locked(hw_nx);
812 }
813
814 return err;
815 }
816
817 SK_NO_INLINE_ATTRIBUTE
818 static void
fsw_cleanup(struct nx_flowswitch * fsw)819 fsw_cleanup(struct nx_flowswitch *fsw)
820 {
821 int err;
822 struct ifnet *ifp = NULL;
823
824 if (fsw->fsw_dev_ch == NULL) {
825 ASSERT(fsw->fsw_host_ch == NULL);
826 return;
827 }
828 err = fsw_dp_stop(fsw, &ifp);
829 if (err != 0) {
830 return;
831 }
832 err = fsw_host_teardown(fsw);
833 VERIFY(err == 0);
834
835 err = fsw_hostna_teardown(fsw);
836 VERIFY(err == 0);
837
838 err = fsw_devna_teardown(fsw);
839 VERIFY(err == 0);
840
841 if (ifp != NULL) {
842 ifnet_datamov_resume(ifp);
843 }
844 }
845
846 int
fsw_ctl_detach(struct kern_nexus * nx,struct proc * p,struct nx_spec_req * nsr)847 fsw_ctl_detach(struct kern_nexus *nx, struct proc *p,
848 struct nx_spec_req *nsr)
849 {
850 #pragma unused(p)
851 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
852 int err = 0;
853
854 SK_LOCK_ASSERT_HELD();
855
856 /*
857 * nsr is NULL when we're called from the destructor, and it
858 * implies that we'll detach everything that is attached.
859 */
860 if (nsr == NULL) {
861 fsw_cleanup(fsw);
862 ASSERT(fsw->fsw_dev_ch == NULL);
863 ASSERT(fsw->fsw_host_ch == NULL);
864 goto done;
865 }
866
867 if (uuid_is_null(nsr->nsr_if_uuid)) {
868 err = EINVAL;
869 goto done;
870 } else if (fsw->fsw_dev_ch == NULL || fsw->fsw_host_ch == NULL) {
871 err = ENXIO;
872 goto done;
873 }
874
875 /* check if the devna uuid is correct */
876 if (uuid_compare(nsr->nsr_if_uuid,
877 fsw->fsw_dev_ch->ch_na->na_uuid) != 0) {
878 err = ESRCH;
879 goto done;
880 }
881 fsw_cleanup(fsw);
882
883 done:
884 #if SK_LOG
885 if (nsr != NULL) {
886 uuid_string_t ifuuidstr;
887 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
888 "nexus 0x%llx (%s) if_uuid %s flags 0x%x err %d",
889 SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
890 sk_uuid_unparse(nsr->nsr_if_uuid, ifuuidstr),
891 nsr->nsr_flags, err);
892 } else {
893 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
894 "nexus 0x%llx (%s) ANY err %d", SK_KVA(nx),
895 NX_DOM_PROV(nx)->nxdom_prov_name, err);
896 }
897 #endif /* SK_LOG */
898
899 return err;
900 }
901
902 static int
fsw_netem_config(struct nx_flowswitch * fsw,void * data)903 fsw_netem_config(struct nx_flowswitch *fsw, void *data)
904 {
905 struct ifnet *ifp = fsw->fsw_ifp;
906 struct if_netem_params *params = data;
907 int ret;
908
909 if (ifp == NULL) {
910 return ENODEV;
911 }
912
913 SK_LOCK_ASSERT_HELD();
914 #define fsw_INPUT_NETEM_THREADNAME "if_input_netem_%s@fsw"
915 #define fsw_INPUT_NETEM_THREADNAME_LEN 32
916 char netem_name[fsw_INPUT_NETEM_THREADNAME_LEN];
917 (void) snprintf(netem_name, sizeof(netem_name),
918 fsw_INPUT_NETEM_THREADNAME, if_name(ifp));
919 ret = netem_config(&ifp->if_input_netem, netem_name, ifp, params, fsw,
920 fsw_dev_input_netem_dequeue, FSW_VP_DEV_BATCH_MAX);
921
922 return ret;
923 }
924
925 int
fsw_ctl(struct kern_nexus * nx,nxcfg_cmd_t nc_cmd,struct proc * p,void * data)926 fsw_ctl(struct kern_nexus *nx, nxcfg_cmd_t nc_cmd, struct proc *p,
927 void *data)
928 {
929 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
930 struct nx_spec_req *nsr = data;
931 struct nx_flow_req *req = data;
932 boolean_t need_check;
933 int error = 0;
934
935 switch (nc_cmd) {
936 case NXCFG_CMD_FLOW_ADD:
937 case NXCFG_CMD_FLOW_DEL:
938 if (uuid_is_null(req->nfr_flow_uuid)) {
939 error = EINVAL;
940 goto done;
941 }
942 if (p != kernproc) {
943 req->nfr_flags &= NXFLOWREQF_MASK;
944 }
945 req->nfr_flowadv_idx = FLOWADV_IDX_NONE;
946
947 if (nc_cmd == NXCFG_CMD_FLOW_DEL) {
948 break;
949 }
950
951 need_check = FALSE;
952 if (req->nfr_epid != -1 && proc_pid(p) != req->nfr_epid) {
953 need_check = TRUE;
954 } else if (!uuid_is_null(req->nfr_euuid)) {
955 uuid_t uuid;
956
957 /* get the UUID of the issuing process */
958 proc_getexecutableuuid(p, uuid, sizeof(uuid));
959
960 /*
961 * If this is not issued by a process for its own
962 * executable UUID and if the process does not have
963 * the necessary privilege, reject the request.
964 * The logic is similar to so_set_effective_uuid().
965 */
966 if (uuid_compare(req->nfr_euuid, uuid) != 0) {
967 need_check = TRUE;
968 }
969 }
970 if (need_check) {
971 kauth_cred_t cred = kauth_cred_proc_ref(p);
972 error = priv_check_cred(cred,
973 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
974 kauth_cred_unref(&cred);
975 if (error != 0) {
976 goto done;
977 }
978 }
979 break;
980
981 default:
982 break;
983 }
984
985 switch (nc_cmd) {
986 case NXCFG_CMD_ATTACH:
987 error = fsw_ctl_attach(nx, p, nsr);
988 break;
989
990 case NXCFG_CMD_DETACH:
991 error = fsw_ctl_detach(nx, p, nsr);
992 break;
993
994 case NXCFG_CMD_FLOW_ADD: /* struct nx_flow_req */
995 error = fsw_ctl_flow_add(fsw, p, data);
996 break;
997
998 case NXCFG_CMD_FLOW_DEL: /* struct nx_flow_req */
999 error = fsw_ctl_flow_del(fsw, p, data);
1000 break;
1001 case NXCFG_CMD_NETEM: /* struct if_netem_params */
1002 error = fsw_netem_config(fsw, data);
1003 break;
1004
1005 default:
1006 SK_ERR("invalid cmd %u", nc_cmd);
1007 error = EINVAL;
1008 break;
1009 }
1010
1011 done:
1012 return error;
1013 }
1014
1015 struct nx_flowswitch *
fsw_ifp_to_fsw(struct ifnet * ifp)1016 fsw_ifp_to_fsw(struct ifnet *ifp)
1017 {
1018 struct nx_flowswitch *fsw = NULL;
1019
1020 if (ifp->if_na != NULL) {
1021 fsw = ifp->if_na->nifna_netif->nif_fsw;
1022 }
1023 return fsw;
1024 }
1025
1026 static void
fsw_ifnet_event_callback(struct eventhandler_entry_arg ee_arg __unused,struct ifnet * ifp,struct sockaddr * ip_addr __unused,intf_event_code_t intf_ev_code)1027 fsw_ifnet_event_callback(struct eventhandler_entry_arg ee_arg __unused,
1028 struct ifnet *ifp, struct sockaddr *ip_addr __unused,
1029 intf_event_code_t intf_ev_code)
1030 {
1031 struct nx_flowswitch *fsw = NULL;
1032
1033 if (ifp->if_na == NULL) {
1034 return;
1035 }
1036
1037 SK_LOCK();
1038 fsw = fsw_ifp_to_fsw(ifp);
1039 if (fsw != NULL) {
1040 switch (intf_ev_code) {
1041 case INTF_EVENT_CODE_LLADDR_UPDATE:
1042 if ((fsw->fsw_ifp == NULL) ||
1043 (fsw->fsw_ifp_dlt != DLT_EN10MB)) {
1044 break;
1045 }
1046
1047 VERIFY(fsw->fsw_ifp == ifp);
1048 SK_DF(SK_VERB_FSW, "MAC address change detected for %s",
1049 if_name(fsw->fsw_ifp));
1050 (void) ifnet_lladdr_copy_bytes(ifp, fsw->fsw_ether_shost,
1051 ETHER_ADDR_LEN);
1052 atomic_add_32(&fsw->fsw_src_lla_gencnt, 1);
1053 break;
1054
1055 case INTF_EVENT_CODE_LOW_POWER_UPDATE:
1056 if (fsw->fsw_ifp == NULL) {
1057 break;
1058 }
1059
1060 VERIFY(fsw->fsw_ifp == ifp);
1061
1062 if (ifp->if_xflags & IFXF_LOW_POWER) {
1063 SK_DF(SK_VERB_FSW,
1064 "Low power mode updated for %s",
1065 if_name(fsw->fsw_ifp));
1066
1067 fsw_reap_sched(fsw);
1068 }
1069 break;
1070
1071 default:
1072 break;
1073 }
1074 }
1075 SK_UNLOCK();
1076 }
1077
1078 static void
fsw_protoctl_event_callback(struct eventhandler_entry_arg ee_arg,struct ifnet * ifp,struct sockaddr * p_laddr,struct sockaddr * p_raddr,uint16_t lport,uint16_t rport,uint8_t proto,uint32_t protoctl_event_code,struct protoctl_ev_val * p_val)1079 fsw_protoctl_event_callback(struct eventhandler_entry_arg ee_arg,
1080 struct ifnet *ifp, struct sockaddr *p_laddr, struct sockaddr *p_raddr,
1081 uint16_t lport, uint16_t rport, uint8_t proto, uint32_t protoctl_event_code,
1082 struct protoctl_ev_val *p_val)
1083 {
1084 #pragma unused(ee_arg)
1085 struct nx_flowswitch *fsw = NULL;
1086 struct flow_entry *fe = NULL;
1087 boolean_t netagent_update_flow = FALSE;
1088 uuid_t fe_uuid;
1089
1090 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1091 return;
1092 }
1093
1094 /*
1095 * XXX Right now only handle the event if we have enough
1096 * information to match the entire flow.
1097 */
1098 if (lport == 0 || rport == 0 || p_laddr == NULL || p_raddr == NULL) {
1099 return;
1100 }
1101
1102 SK_LOCK();
1103 fsw = fsw_ifp_to_fsw(ifp);
1104 if (fsw == NULL) {
1105 goto out;
1106 }
1107
1108 if (!fsw_detach_barrier_add(fsw)) {
1109 fsw = NULL;
1110 SK_ERR("netagent detached");
1111 goto out;
1112 }
1113
1114 struct flow_key fk __sk_aligned(16);
1115 FLOW_KEY_CLEAR(&fk);
1116 fk.fk_proto = proto;
1117 if (p_laddr->sa_family == AF_INET) {
1118 fk.fk_ipver = IPVERSION;
1119 fk.fk_src4 = SIN(p_laddr)->sin_addr;
1120 fk.fk_dst4 = SIN(p_raddr)->sin_addr;
1121 } else {
1122 fk.fk_ipver = IPV6_VERSION;
1123 fk.fk_src6 = SIN6(p_laddr)->sin6_addr;
1124 fk.fk_dst6 = SIN6(p_raddr)->sin6_addr;
1125 }
1126 fk.fk_sport = lport;
1127 fk.fk_dport = rport;
1128 fk.fk_mask = FKMASK_5TUPLE;
1129
1130 fe = flow_mgr_find_fe_by_key(fsw->fsw_flow_mgr, &fk);
1131 if (__improbable(fe == NULL)) {
1132 goto out;
1133 }
1134
1135 uuid_copy(fe_uuid, fe->fe_uuid);
1136 /*
1137 * If the protocol notification is for TCP, make sure
1138 * protocol event received is for bytes in the flight.
1139 * XXX Redirect events are not delivered as protocol events
1140 * but as better route events.
1141 * Also redirect events do not indicate loss of the packet.
1142 */
1143 if (proto != IPPROTO_TCP) {
1144 p_val->tcp_seq_number = 0;
1145 }
1146
1147 netagent_update_flow = TRUE;
1148
1149 out:
1150 SK_UNLOCK();
1151
1152 if (netagent_update_flow) {
1153 int error = 0;
1154 #if SK_LOG
1155 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
1156 SK_DF(SK_VERB_FLOW, "Update flow entry \"%s\" for protocol "
1157 "event %d with value %d and tcp sequence number %d",
1158 fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
1159 protoctl_event_code, p_val->val, p_val->tcp_seq_number);
1160 #endif /* SK_LOG */
1161 if ((error = netagent_update_flow_protoctl_event(
1162 fsw->fsw_agent_session, fe_uuid, protoctl_event_code,
1163 p_val->val, p_val->tcp_seq_number)) != 0) {
1164 #if SK_LOG
1165 SK_DF(SK_VERB_FLOW, "Error: %d. Could not update "
1166 "flow entry \"%s\" for protocol event %d with "
1167 "value %d and tcp sequence number %d", error,
1168 dbgbuf, protoctl_event_code, p_val->val,
1169 p_val->tcp_seq_number);
1170 #endif /* SK_LOG */
1171 }
1172 }
1173
1174 if (fe != NULL) {
1175 flow_entry_release(&fe);
1176 }
1177
1178 if (fsw != NULL) {
1179 fsw_detach_barrier_remove(fsw);
1180 }
1181 }
1182
1183 int
fsw_netagent_add_remove(struct kern_nexus * nx,boolean_t add)1184 fsw_netagent_add_remove(struct kern_nexus *nx, boolean_t add)
1185 {
1186 struct nx_flowswitch *fsw = NULL;
1187 int error = 0;
1188
1189 SK_LOCK_ASSERT_HELD();
1190 VERIFY(nx != NULL);
1191 VERIFY(NX_PROV(nx) != NULL);
1192 VERIFY(NX_DOM_PROV(nx) != NULL);
1193
1194 if (NX_DOM(nx)->nxdom_type != NEXUS_TYPE_FLOW_SWITCH) {
1195 error = EINVAL;
1196 goto out;
1197 }
1198
1199 fsw = NX_FSW_PRIVATE(nx);
1200 VERIFY(fsw != NULL);
1201 FSW_WLOCK(fsw);
1202
1203 if (fsw->fsw_agent_session == NULL) {
1204 error = ENXIO;
1205 goto out;
1206 }
1207
1208 ASSERT(!uuid_is_null(fsw->fsw_agent_uuid));
1209
1210 if (add) {
1211 if (FSW_NETAGENT_ADDED(fsw)) {
1212 /* agent already added */
1213 error = EEXIST;
1214 } else {
1215 fsw->fsw_state_flags |= FSW_STATEF_NETAGENT_ADDED;
1216 if (if_is_fsw_netagent_enabled()) {
1217 fsw->fsw_state_flags
1218 |= FSW_STATEF_NETAGENT_ENABLED;
1219 }
1220 if_add_netagent(fsw->fsw_ifp, fsw->fsw_agent_uuid);
1221 SK_D("flowswitch netagent added for interface %s",
1222 if_name(fsw->fsw_ifp));
1223 }
1224 } else {
1225 if (!FSW_NETAGENT_ADDED(fsw)) {
1226 /* agent has not been added */
1227 error = ENOENT;
1228 } else {
1229 fsw->fsw_state_flags &= ~(FSW_STATEF_NETAGENT_ADDED |
1230 FSW_STATEF_NETAGENT_ENABLED);
1231 if_delete_netagent(fsw->fsw_ifp, fsw->fsw_agent_uuid);
1232 SK_D("flowswitch netagent removed for interface %s",
1233 if_name(fsw->fsw_ifp));
1234 }
1235 }
1236 out:
1237 if (fsw != NULL) {
1238 FSW_UNLOCK(fsw);
1239 }
1240 return error;
1241 }
1242
1243 void
fsw_netagent_update(struct kern_nexus * nx)1244 fsw_netagent_update(struct kern_nexus *nx)
1245 {
1246 struct nx_flowswitch *fsw = NULL;
1247
1248 SK_LOCK_ASSERT_HELD();
1249 VERIFY(nx != NULL);
1250 VERIFY(NX_PROV(nx) != NULL);
1251 VERIFY(NX_DOM_PROV(nx) != NULL);
1252
1253 if (NX_DOM(nx)->nxdom_type != NEXUS_TYPE_FLOW_SWITCH) {
1254 goto out;
1255 }
1256 fsw = NX_FSW_PRIVATE(nx);
1257 VERIFY(fsw != NULL);
1258 FSW_WLOCK(fsw);
1259 if (fsw->fsw_agent_session == NULL) {
1260 goto out;
1261 }
1262 ASSERT(!uuid_is_null(fsw->fsw_agent_uuid));
1263 uint32_t flags = netagent_get_flags(fsw->fsw_agent_uuid);
1264 const bool ip_agent = ifnet_needs_fsw_ip_netagent(fsw->fsw_ifp);
1265 const bool transport_agent = ifnet_needs_fsw_transport_netagent(fsw->fsw_ifp);
1266 if (ip_agent || transport_agent) {
1267 flags |= NETAGENT_FLAG_NEXUS_LISTENER;
1268 } else {
1269 flags &= ~NETAGENT_FLAG_NEXUS_LISTENER;
1270 }
1271 if (transport_agent) {
1272 flags |= NETAGENT_FLAG_NEXUS_PROVIDER;
1273 } else {
1274 flags &= ~NETAGENT_FLAG_NEXUS_PROVIDER;
1275 }
1276 if (ip_agent) {
1277 flags |= NETAGENT_FLAG_CUSTOM_IP_NEXUS;
1278 } else {
1279 flags &= ~NETAGENT_FLAG_CUSTOM_IP_NEXUS;
1280 }
1281 if (netagent_set_flags(fsw->fsw_agent_uuid, flags) == 0) {
1282 SK_D("flowswitch netagent updated for interface %s",
1283 if_name(fsw->fsw_ifp));
1284 }
1285 out:
1286 if (fsw != NULL) {
1287 FSW_UNLOCK(fsw);
1288 }
1289 }
1290
1291 static int
fsw_port_ctor(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,const struct nxbind * nxb)1292 fsw_port_ctor(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna,
1293 const struct nxbind *nxb)
1294 {
1295 #pragma unused(nxb)
1296 int err = 0;
1297
1298 SK_LOCK_ASSERT_HELD();
1299 ASSERT(nxb == NULL || !(nxb->nxb_flags & NXBF_MATCH_UNIQUEID) ||
1300 vpna->vpna_pid == nxb->nxb_pid);
1301
1302 /*
1303 * Reject regular channel open requests unless there is
1304 * something attached to the host port of the flowswitch.
1305 */
1306 if (vpna->vpna_nx_port >= FSW_VP_USER_MIN) {
1307 struct nexus_adapter *na = &vpna->vpna_up;
1308 struct ifnet *ifp = fsw->fsw_ifp;
1309
1310 if (ifp == NULL) {
1311 err = ENXIO;
1312 goto done;
1313 }
1314
1315 /* if adapter supports mitigation, set default value */
1316 if (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)) {
1317 if (IFNET_IS_WIFI(ifp)) {
1318 na->na_ch_mit_ival = CH_MIT_IVAL_WIFI;
1319 } else if (IFNET_IS_CELLULAR(ifp)) {
1320 na->na_ch_mit_ival = CH_MIT_IVAL_CELLULAR;
1321 } else if (IFNET_IS_ETHERNET(ifp)) {
1322 na->na_ch_mit_ival = CH_MIT_IVAL_ETHERNET;
1323 } else {
1324 na->na_ch_mit_ival = CH_MIT_IVAL_DEFAULT;
1325 }
1326 }
1327 }
1328
1329 done:
1330 SK_DF(err ? SK_VERB_ERROR : SK_VERB_FSW,
1331 "fsw 0x%llx nx_port %d vpna_pid %d vpna_pid_bound %u mit_ival %llu "
1332 "(err %d)", SK_KVA(fsw), (int)vpna->vpna_nx_port, vpna->vpna_pid,
1333 vpna->vpna_pid_bound, vpna->vpna_up.na_ch_mit_ival, err);
1334
1335 return err;
1336 }
1337
1338 static bool
fsw_port_dtor(struct nx_flowswitch * fsw,const struct nexus_vp_adapter * vpna)1339 fsw_port_dtor(struct nx_flowswitch *fsw, const struct nexus_vp_adapter *vpna)
1340 {
1341 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1342 nexus_port_t nx_port = vpna->vpna_nx_port;
1343 uint32_t purge_cnt;
1344
1345 ASSERT(fsw == vpna->vpna_fsw);
1346 ASSERT(nx_port != NEXUS_PORT_ANY);
1347
1348 /*
1349 * If this nexus port was bound to a PID, we just need to look at a
1350 * single bucket and iterate from there. Note that in any case, we
1351 * can't just search for a single flow_owner based on the PID itself,
1352 * since a given process may be opening multiple channels to the
1353 * flowswitch; hence we search for the ones matching this nexus port.
1354 *
1355 * Close any open flows on the port and remove the flow owner and
1356 * nexus port binding.
1357 */
1358 purge_cnt = flow_owner_detach_nexus_port(fm, vpna->vpna_pid_bound,
1359 vpna->vpna_pid, nx_port, FALSE);
1360
1361 SK_DF(SK_VERB_FSW,
1362 "fsw 0x%llx nx_port %d pid %d pid_bound %u defunct %u "
1363 "purged %u", SK_KVA(fsw), (int)nx_port,
1364 vpna->vpna_pid, vpna->vpna_pid_bound, vpna->vpna_defunct,
1365 purge_cnt);
1366
1367 return purge_cnt != 0;
1368 }
1369
1370 /*
1371 * Flowswitch nexus port allocator.
1372 *
1373 * A nexus port is represented by a bit in the port bitmap; its state is
1374 * either free or allocated. A free state implies that the port has no
1375 * nxbind AND no nexus adapter association. An allocated state means that
1376 * either it has a nxbind OR a nexus adapter assocation. This routine
1377 * manages the nexus adapter association with a nexus port; nxbind is
1378 * handled separately via nx_fsw_port_bind().
1379 *
1380 * The caller of this routine may optionally pass in a NULL nexus adapter.
1381 * In such a case (*vpna is NULL), this routine checks to see if the port
1382 * has already been associated with an adapter, and returns a reference to
1383 * that adapter. No action is taken on a port that doesn't have an adapter
1384 * associated. Otherwise (*vpna is non-NULL), this routine associates that
1385 * adapter with a port that's not already associated with one; the reference
1386 * to the adapter is untouched here, as the caller is expected to handle it.
1387 *
1388 * The flowswitch code invokes this routine each time it is requested to
1389 * find an adapter via nx_fsw_na_find(). The counterpart of this routine,
1390 * nx_fsw_port_free(), is only executed ONCE by the adapter's destructor.
1391 * This allows for multiple channels to be opened to a nexus port, each
1392 * time holding a reference to that same nexus adapter. The releasing of
1393 * the nexus port only happens when the last channel closes.
1394 */
1395 static int
fsw_port_alloc__(struct nx_flowswitch * fsw,struct nxbind * nxb,struct nexus_vp_adapter ** vpna,nexus_port_t nx_port,struct proc * p)1396 fsw_port_alloc__(struct nx_flowswitch *fsw, struct nxbind *nxb,
1397 struct nexus_vp_adapter **vpna, nexus_port_t nx_port, struct proc *p)
1398 {
1399 struct kern_nexus *nx = fsw->fsw_nx;
1400 boolean_t refonly = FALSE;
1401 int error = 0;
1402
1403 FSW_WLOCK_ASSERT_HELD(fsw);
1404
1405 error = nx_port_alloc(nx, nx_port, nxb, (struct nexus_adapter **)vpna, p);
1406 if (error == 0 && *vpna != NULL && !refonly) {
1407 /* initialize the nexus port and the adapter occupying it */
1408 (*vpna)->vpna_fsw = fsw;
1409 (*vpna)->vpna_nx_port = nx_port;
1410 (*vpna)->vpna_pid = proc_pid(p);
1411 if (nxb != NULL && (nxb->nxb_flags & NXBF_MATCH_UNIQUEID)) {
1412 ASSERT((*vpna)->vpna_pid == nxb->nxb_pid);
1413 (*vpna)->vpna_pid_bound = TRUE;
1414 } else {
1415 (*vpna)->vpna_pid_bound = FALSE;
1416 }
1417
1418 error = fsw_port_ctor(fsw, *vpna, nxb);
1419 if (error != 0) {
1420 fsw_port_free(fsw, (*vpna),
1421 (*vpna)->vpna_nx_port, FALSE);
1422 }
1423 }
1424
1425 #if SK_LOG
1426 if (*vpna != NULL) {
1427 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
1428 "+++ vpna \"%s\" (0x%llx) <-> fsw 0x%llx "
1429 "%sport %d refonly %u (err %d)",
1430 (*vpna)->vpna_up.na_name, SK_KVA(*vpna), SK_KVA(fsw),
1431 nx_fsw_dom_port_is_reserved(nx, nx_port) ?
1432 "[reserved] " : "", (int)nx_port, refonly, error);
1433 } else {
1434 SK_DF(error ? SK_VERB_ERROR : SK_VERB_FSW,
1435 "+++ fsw 0x%llx nx_port %d refonly %u "
1436 "(err %d)", SK_KVA(fsw), (int)nx_port, refonly, error);
1437 }
1438 #endif /* SK_LOG */
1439
1440 return error;
1441 }
1442
1443 int
fsw_port_alloc(struct nx_flowswitch * fsw,struct nxbind * nxb,struct nexus_vp_adapter ** vpna,nexus_port_t nx_port,struct proc * p,boolean_t ifattach,boolean_t host)1444 fsw_port_alloc(struct nx_flowswitch *fsw, struct nxbind *nxb,
1445 struct nexus_vp_adapter **vpna, nexus_port_t nx_port, struct proc *p,
1446 boolean_t ifattach, boolean_t host)
1447 {
1448 int err = 0;
1449
1450 FSW_WLOCK_ASSERT_HELD(fsw);
1451
1452 if (ifattach) {
1453 /* override port to either NX_FSW_{HOST,DEV} */
1454 nx_port = (host ? FSW_VP_HOST : FSW_VP_DEV);
1455 /* allocate reserved port for ifattach */
1456 err = fsw_port_alloc__(fsw, nxb, vpna, nx_port, p);
1457 } else if (host) {
1458 /* host is valid only for ifattach */
1459 err = EINVAL;
1460 } else {
1461 /* nexus port otherwise (reserve dev and host for ifattach) */
1462 err = fsw_port_alloc__(fsw, nxb, vpna, nx_port, p);
1463 }
1464
1465 return err;
1466 }
1467
1468 /*
1469 * Remove nexus port association from a nexus adapter. This call is
1470 * the opposite of fsw_port_alloc(), except that it is called only
1471 * at nx_fsw_vp_na_dtor() destructor time. See above notes
1472 * on fsw_port_alloc().
1473 */
1474 void
fsw_port_free(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,nexus_port_t nx_port,boolean_t defunct)1475 fsw_port_free(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna,
1476 nexus_port_t nx_port, boolean_t defunct)
1477 {
1478 struct kern_nexus *nx = fsw->fsw_nx;
1479
1480 FSW_WLOCK_ASSERT_HELD(fsw);
1481 ASSERT(vpna->vpna_fsw == fsw);
1482
1483 if (defunct) {
1484 vpna->vpna_defunct = TRUE;
1485 nx_port_defunct(nx, nx_port);
1486 }
1487
1488 bool destroyed = fsw_port_dtor(fsw, vpna);
1489 if (destroyed) {
1490 /*
1491 * If the extension's destructor no longer needs to be
1492 * bound to any channel client, release the binding.
1493 */
1494 nx_port_unbind(nx, nx_port);
1495 }
1496
1497 /*
1498 * If this is a defunct, then stop here as the port is still
1499 * occupied by the channel. We'll come here again later when
1500 * the actual close happens.
1501 */
1502 if (defunct) {
1503 return;
1504 }
1505
1506 SK_DF(SK_VERB_FSW, "--- vpna \"%s\" (0x%llx) -!- fsw 0x%llx "
1507 "nx_port %d defunct %u", vpna->vpna_up.na_name, SK_KVA(vpna),
1508 SK_KVA(fsw), (int)nx_port, vpna->vpna_defunct);
1509
1510 nx_port_free(nx, nx_port);
1511 vpna->vpna_fsw = NULL;
1512 vpna->vpna_nx_port = NEXUS_PORT_ANY;
1513 vpna->vpna_pid_bound = FALSE;
1514 vpna->vpna_pid = -1;
1515 vpna->vpna_defunct = FALSE;
1516 }
1517
1518 int
fsw_port_na_activate(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna,na_activate_mode_t mode)1519 fsw_port_na_activate(struct nx_flowswitch *fsw,
1520 struct nexus_vp_adapter *vpna, na_activate_mode_t mode)
1521 {
1522 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1523 uint32_t fo_cnt = 0;
1524
1525 SK_LOCK_ASSERT_HELD();
1526
1527 /* The following code relies on the static value asserted below */
1528 _CASSERT(FSW_VP_DEV == 0);
1529 _CASSERT(FSW_VP_HOST == 1);
1530
1531 ASSERT(NA_IS_ACTIVE(&vpna->vpna_up));
1532 ASSERT(vpna->vpna_nx_port != NEXUS_PORT_ANY);
1533
1534 switch (mode) {
1535 case NA_ACTIVATE_MODE_ON:
1536 break;
1537
1538 case NA_ACTIVATE_MODE_DEFUNCT:
1539 break;
1540
1541 case NA_ACTIVATE_MODE_OFF:
1542 break;
1543
1544 default:
1545 VERIFY(0);
1546 /* NOTREACHED */
1547 __builtin_unreachable();
1548 }
1549
1550 /* nothing further to do for special ports */
1551 if (vpna->vpna_nx_port < FSW_VP_USER_MIN) {
1552 goto done;
1553 }
1554
1555 /* activate any flow owner related resources (e.g. flowadv), if any */
1556 fo_cnt = flow_owner_activate_nexus_port(fm, vpna->vpna_pid_bound,
1557 vpna->vpna_pid, vpna->vpna_nx_port, &vpna->vpna_up, mode);
1558
1559 done:
1560 SK_DF(SK_VERB_FSW,
1561 "fsw 0x%llx %s nx_port %d vpna_pid %d vpna_pid_bound %u fo_cnt %u",
1562 SK_KVA(fsw), na_activate_mode2str(mode), (int)vpna->vpna_nx_port,
1563 vpna->vpna_pid, vpna->vpna_pid_bound, fo_cnt);
1564
1565 return 0;
1566 }
1567
1568 int
fsw_port_na_defunct(struct nx_flowswitch * fsw,struct nexus_vp_adapter * vpna)1569 fsw_port_na_defunct(struct nx_flowswitch *fsw, struct nexus_vp_adapter *vpna)
1570 {
1571 int err = 0;
1572
1573 SK_LOCK_ASSERT_HELD();
1574 ASSERT(vpna->vpna_nx_port >= FSW_VP_USER_MIN);
1575
1576 /*
1577 * During defunct, we want to purge all flows associated to this
1578 * port and the flow owner as well. This is accomplished as part
1579 * of calling the port's destructor. However, we still want to
1580 * occupy the nexus port since there's a channel open to it.
1581 */
1582 FSW_WLOCK(fsw);
1583 if (!vpna->vpna_defunct) {
1584 fsw_port_free(fsw, vpna, vpna->vpna_nx_port, TRUE);
1585 } else {
1586 err = EALREADY;
1587 }
1588 FSW_WUNLOCK(fsw);
1589
1590 return err;
1591 }
1592
1593 static size_t
fsw_mib_get_flow(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1594 fsw_mib_get_flow(struct nx_flowswitch *fsw,
1595 struct nexus_mib_filter *filter, void *out, size_t len)
1596 {
1597 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1598 size_t sf_size = sizeof(struct sk_stats_flow);
1599 __block size_t actual_space = 0;
1600 __block struct sk_stats_flow *sf = out;
1601 struct flow_entry *fe;
1602
1603 FSW_LOCK_ASSERT_HELD(fsw);
1604
1605 if (filter->nmf_bitmap & NXMIB_FILTER_FLOW_ID) {
1606 fe = flow_mgr_get_fe_by_uuid_rlock(fm, filter->nmf_flow_id);
1607 if (fe != NULL) {
1608 if (out != NULL && len >= sf_size) {
1609 flow_entry_stats_get(fe, sf);
1610 }
1611
1612 flow_entry_release(&fe);
1613 return sf_size;
1614 }
1615 return 0;
1616 } else if (filter->nmf_bitmap & NXMIB_FILTER_INFO_TUPLE) {
1617 struct info_tuple *itpl = &filter->nmf_info_tuple;
1618 struct flow_key fk;
1619 bzero(&fk, sizeof(fk));
1620 if (itpl->itpl_local_sa.sa_family == AF_INET &&
1621 itpl->itpl_remote_sa.sa_family == AF_INET) {
1622 fk.fk_mask = FKMASK_5TUPLE;
1623 fk.fk_ipver = IPVERSION;
1624 fk.fk_proto = itpl->itpl_proto;
1625 fk.fk_src4 = itpl->itpl_local_sin.sin_addr;
1626 fk.fk_dst4 = itpl->itpl_remote_sin.sin_addr;
1627 fk.fk_sport = itpl->itpl_local_sin.sin_port;
1628 fk.fk_dport = itpl->itpl_remote_sin.sin_port;
1629 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
1630 itpl->itpl_remote_sa.sa_family == AF_INET6) {
1631 fk.fk_mask = FKMASK_5TUPLE;
1632 fk.fk_ipver = IPV6_VERSION;
1633 fk.fk_proto = itpl->itpl_proto;
1634 fk.fk_src6 = itpl->itpl_local_sin6.sin6_addr;
1635 fk.fk_dst6 = itpl->itpl_remote_sin6.sin6_addr;
1636 fk.fk_sport = itpl->itpl_local_sin6.sin6_port;
1637 fk.fk_dport = itpl->itpl_remote_sin6.sin6_port;
1638 } else {
1639 SK_ERR("invalid info tuple: local af %d remote af %d",
1640 itpl->itpl_local_sa.sa_family,
1641 itpl->itpl_remote_sa.sa_family);
1642 return 0;
1643 }
1644
1645 fe = flow_mgr_find_fe_by_key(fsw->fsw_flow_mgr, &fk);
1646 if (fe != NULL) {
1647 if (out != NULL && len >= sf_size) {
1648 flow_entry_stats_get(fe, sf);
1649 }
1650 flow_entry_release(&fe);
1651 return sf_size;
1652 }
1653 return 0;
1654 }
1655
1656 flow_mgr_foreach_flow(fsw->fsw_flow_mgr, ^(struct flow_entry *_fe) {
1657 actual_space += sf_size;
1658
1659 if (out == NULL || actual_space > len) {
1660 return;
1661 }
1662
1663 flow_entry_stats_get(_fe, sf);
1664 sf++;
1665 });
1666
1667 /*
1668 * Also return the ones in deferred free list.
1669 */
1670 lck_mtx_lock(&fsw->fsw_linger_lock);
1671 TAILQ_FOREACH(fe, &fsw->fsw_linger_head, fe_linger_link) {
1672 actual_space += sf_size;
1673 if (out == NULL || actual_space > len) {
1674 continue;
1675 }
1676
1677 flow_entry_stats_get(fe, sf);
1678 sf++;
1679 }
1680 lck_mtx_unlock(&fsw->fsw_linger_lock);
1681
1682 return actual_space;
1683 }
1684
1685 static size_t
fsw_mib_get_flow_adv(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1686 fsw_mib_get_flow_adv(struct nx_flowswitch *fsw,
1687 struct nexus_mib_filter *filter, void *out, size_t len)
1688 {
1689 #pragma unused(filter)
1690 uint32_t fae_idx;
1691 size_t actual_space = 0;
1692 struct kern_channel *ch = NULL;
1693 struct sk_stats_flow_adv *sfa = NULL;
1694 struct sk_stats_flow_adv_ent *sfae = NULL;
1695 struct __flowadv_entry *fae = NULL;
1696 size_t sfa_size = sizeof(struct sk_stats_flow_adv);
1697 size_t sfae_size = sizeof(struct sk_stats_flow_adv_ent);
1698 uint32_t max_flowadv =
1699 fsw->fsw_nx->nx_prov->nxprov_params->nxp_flowadv_max;
1700
1701 SK_LOCK_ASSERT_HELD();
1702
1703 sfa = out;
1704 /* copyout flow advisory table (allocated entries only) */
1705 STAILQ_FOREACH(ch, &fsw->fsw_nx->nx_ch_head, ch_link) {
1706 struct skmem_arena *ar;
1707 struct skmem_arena_nexus *arn;
1708 struct nexus_adapter *na;
1709
1710 /* ch_lock isn't needed here since sk_lock is held */
1711 if ((ch->ch_flags & CHANF_CLOSING) ||
1712 (na = ch->ch_na) == NULL) {
1713 /* channel is closing */
1714 continue;
1715 }
1716
1717 ar = na->na_arena;
1718 arn = skmem_arena_nexus(ar);
1719
1720 AR_LOCK(ar);
1721 if (arn->arn_flowadv_obj == NULL) {
1722 ASSERT(ar->ar_flags & ARF_DEFUNCT);
1723 AR_UNLOCK(ar);
1724 continue;
1725 }
1726 actual_space += sfa_size;
1727 /* fill out flowadv_table info */
1728 if (out != NULL && actual_space <= len) {
1729 uuid_copy(sfa->sfa_nx_uuid, fsw->fsw_nx->nx_uuid);
1730 (void) strlcpy(sfa->sfa_if_name,
1731 fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
1732 sfa->sfa_owner_pid = ch->ch_pid;
1733 sfa->sfa_entries_count = 0;
1734 }
1735
1736 /* fill out flowadv_entries */
1737 sfae = &sfa->sfa_entries[0];
1738 for (fae_idx = 0; fae_idx < max_flowadv; fae_idx++) {
1739 fae = &arn->arn_flowadv_obj[fae_idx];
1740 if (!uuid_is_null(fae->fae_id)) {
1741 actual_space += sfae_size;
1742 if (out == NULL || actual_space > len) {
1743 continue;
1744 }
1745
1746 /* fill out entry */
1747 uuid_copy(sfae->sfae_flow_id, fae->fae_id);
1748 sfae->sfae_flags = fae->fae_flags;
1749 sfae++;
1750 sfa->sfa_entries_count++;
1751 }
1752 }
1753 sfa = (struct sk_stats_flow_adv *)
1754 ((uintptr_t)out + actual_space);
1755 AR_UNLOCK(ar);
1756 }
1757
1758 return actual_space;
1759 }
1760
1761 static inline void
fsw_fo2sfo(struct nx_flowswitch * fsw,struct flow_owner * fo,struct sk_stats_flow_owner * sfo)1762 fsw_fo2sfo(struct nx_flowswitch *fsw, struct flow_owner *fo,
1763 struct sk_stats_flow_owner *sfo)
1764 {
1765 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1766
1767 uuid_copy(sfo->sfo_nx_uuid, fsw->fsw_nx->nx_uuid);
1768 (void) strlcpy(sfo->sfo_if_name, fsw->fsw_flow_mgr->fm_name,
1769 IFNAMSIZ);
1770 sfo->sfo_bucket_idx = flow_mgr_get_fob_idx(fm, FO_BUCKET(fo));
1771
1772 (void) snprintf(sfo->sfo_name, sizeof(sfo->sfo_name), "%s",
1773 fo->fo_name);
1774 sfo->sfo_pid = fo->fo_pid;
1775 sfo->sfo_nx_port = fo->fo_nx_port;
1776 sfo->sfo_nx_port_pid_bound = fo->fo_nx_port_pid_bound;
1777 sfo->sfo_nx_port_destroyed = fo->fo_nx_port_destroyed;
1778 }
1779
1780 static size_t
fsw_mib_get_flow_owner(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1781 fsw_mib_get_flow_owner(struct nx_flowswitch *fsw,
1782 struct nexus_mib_filter *filter, void *out, size_t len)
1783 {
1784 #pragma unused(filter)
1785 uint32_t i;
1786 size_t actual_space = 0;
1787 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1788 struct sk_stats_flow_owner *sfo = out;
1789 size_t sfo_size = sizeof(struct sk_stats_flow_owner);
1790 struct flow_owner *fo;
1791
1792 FSW_LOCK_ASSERT_HELD(fsw);
1793
1794 /*
1795 * Ideally we'd like to hide the bucket level details from flow library
1796 * user, but there is no simple way to iterate flow_owner with
1797 * buckets/RB_TREE nested. So keep it as is.
1798 */
1799 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
1800 struct flow_owner_bucket *fob = flow_mgr_get_fob_at_idx(fm, i);
1801 FOB_LOCK(fob);
1802 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
1803 actual_space += sfo_size;
1804 if (out == NULL || actual_space > len) {
1805 continue;
1806 }
1807
1808 fsw_fo2sfo(fsw, fo, sfo);
1809 sfo++;
1810 }
1811 FOB_UNLOCK(fob);
1812 }
1813
1814 return actual_space;
1815 }
1816
1817 static inline void
fsw_fr2sfr(struct nx_flowswitch * fsw,struct flow_route * fr,struct sk_stats_flow_route * sfr,boolean_t ll_scrub)1818 fsw_fr2sfr(struct nx_flowswitch *fsw, struct flow_route *fr,
1819 struct sk_stats_flow_route *sfr, boolean_t ll_scrub)
1820 {
1821 uuid_copy(sfr->sfr_nx_uuid, fsw->fsw_nx->nx_uuid);
1822 uuid_copy(sfr->sfr_uuid, fr->fr_uuid);
1823 (void) strlcpy(sfr->sfr_if_name, fsw->fsw_flow_mgr->fm_name,
1824 IFNAMSIZ);
1825
1826 sfr->sfr_bucket_idx = fr->fr_frb->frb_idx;
1827 sfr->sfr_id_bucket_idx = fr->fr_frib->frib_idx;
1828
1829 if (fr->fr_flags & FLOWRTF_ATTACHED) {
1830 sfr->sfr_flags |= SFLOWRTF_ATTACHED;
1831 }
1832 if (fr->fr_flags & FLOWRTF_ONLINK) {
1833 sfr->sfr_flags |= SFLOWRTF_ONLINK;
1834 }
1835 if (fr->fr_flags & FLOWRTF_GATEWAY) {
1836 sfr->sfr_flags |= SFLOWRTF_GATEWAY;
1837 }
1838 if (fr->fr_flags & FLOWRTF_RESOLVED) {
1839 sfr->sfr_flags |= SFLOWRTF_RESOLVED;
1840 }
1841 if (fr->fr_flags & FLOWRTF_HAS_LLINFO) {
1842 sfr->sfr_flags |= SFLOWRTF_HAS_LLINFO;
1843 }
1844 if (fr->fr_flags & FLOWRTF_DELETED) {
1845 sfr->sfr_flags |= SFLOWRTF_DELETED;
1846 }
1847 if (fr->fr_flags & FLOWRTF_DST_LL_MCAST) {
1848 sfr->sfr_flags |= SFLOWRTF_DST_LL_MCAST;
1849 }
1850 if (fr->fr_flags & FLOWRTF_DST_LL_BCAST) {
1851 sfr->sfr_flags |= SFLOWRTF_DST_LL_BCAST;
1852 }
1853
1854 lck_spin_lock(&fr->fr_reflock);
1855 ASSERT(fr->fr_usecnt >= FLOW_ROUTE_MINREF);
1856 sfr->sfr_usecnt = fr->fr_usecnt - FLOW_ROUTE_MINREF;
1857 if (fr->fr_expire != 0) {
1858 sfr->sfr_expire = (int64_t)(fr->fr_expire - net_uptime());
1859 } else {
1860 sfr->sfr_expire = 0;
1861 }
1862 lck_spin_unlock(&fr->fr_reflock);
1863
1864 sfr->sfr_laddr = fr->fr_laddr;
1865 sfr->sfr_faddr = fr->fr_faddr;
1866 sfr->sfr_gaddr = fr->fr_gaddr;
1867
1868 if (ll_scrub) {
1869 static const uint8_t unspec[ETHER_ADDR_LEN] = {[0] = 2 };
1870 bcopy(&unspec, &sfr->sfr_ether_dhost, ETHER_ADDR_LEN);
1871 } else {
1872 bcopy(&fr->fr_eth.ether_dhost, &sfr->sfr_ether_dhost,
1873 ETHER_ADDR_LEN);
1874 }
1875 }
1876
1877 #if CONFIG_MACF
1878 extern int dlil_lladdr_ckreq;
1879 #endif /* CONFIG_MACF */
1880
1881 static size_t
fsw_mib_get_flow_route(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)1882 fsw_mib_get_flow_route(struct nx_flowswitch *fsw,
1883 struct nexus_mib_filter *filter, void *out, size_t len, struct proc *p)
1884 {
1885 #pragma unused(filter)
1886 uint32_t i;
1887 size_t actual_space = 0;
1888 struct flow_mgr *fm = fsw->fsw_flow_mgr;
1889 struct sk_stats_flow_route *sfr = out;
1890 size_t sfo_size = sizeof(struct sk_stats_flow_route);
1891 struct flow_route *fr;
1892 boolean_t ll_scrub;
1893
1894 FSW_LOCK_ASSERT_HELD(fsw);
1895
1896 /*
1897 * To get the link-layer info, the caller must have the following
1898 * in their sandbox profile (or not be sandboxed at all), else we
1899 * scrub it clean just like dlil_ifaddr_bytes() does:
1900 *
1901 * (allow system-info (info-type "net.link.addr"))
1902 *
1903 * If scrubbed, we return 02:00:00:00:00:00.
1904 */
1905 #if CONFIG_MACF
1906 ll_scrub = (dlil_lladdr_ckreq &&
1907 skywalk_mac_system_check_proc_cred(p, "net.link.addr") != 0);
1908 #else /* !CONFIG_MACF */
1909 ll_scrub = FALSE;
1910 #endif /* !CONFIG_MACF */
1911
1912 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
1913 struct flow_route_bucket *frb = flow_mgr_get_frb_at_idx(fm, i);
1914 FRB_RLOCK(frb);
1915 RB_FOREACH(fr, flow_route_tree, &frb->frb_head) {
1916 actual_space += sfo_size;
1917 if (out == NULL || actual_space > len) {
1918 continue;
1919 }
1920
1921 fsw_fr2sfr(fsw, fr, sfr, ll_scrub);
1922 sfr++;
1923 }
1924 FRB_UNLOCK(frb);
1925 }
1926
1927 return actual_space;
1928 }
1929
1930 static inline void
fsw_nxs2nus(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,pid_t pid,struct __nx_stats_fsw * nxs,struct sk_stats_userstack * sus)1931 fsw_nxs2nus(struct nx_flowswitch *fsw, struct nexus_mib_filter *filter,
1932 pid_t pid, struct __nx_stats_fsw *nxs, struct sk_stats_userstack *sus)
1933 {
1934 uuid_copy(sus->sus_nx_uuid, fsw->fsw_nx->nx_uuid);
1935 (void) strlcpy(sus->sus_if_name, fsw->fsw_flow_mgr->fm_name,
1936 IFNAMSIZ);
1937 sus->sus_owner_pid = pid;
1938
1939 if (filter->nmf_type & NXMIB_IP_STATS) {
1940 sus->sus_ip = nxs->nxs_ipstat;
1941 }
1942
1943 if (filter->nmf_type & NXMIB_IP6_STATS) {
1944 sus->sus_ip6 = nxs->nxs_ip6stat;
1945 }
1946
1947 if (filter->nmf_type & NXMIB_TCP_STATS) {
1948 sus->sus_tcp = nxs->nxs_tcpstat;
1949 }
1950
1951 if (filter->nmf_type & NXMIB_UDP_STATS) {
1952 sus->sus_udp = nxs->nxs_udpstat;
1953 }
1954
1955 if (filter->nmf_type & NXMIB_QUIC_STATS) {
1956 sus->sus_quic = nxs->nxs_quicstat;
1957 }
1958 }
1959
1960 static size_t
fsw_mib_get_userstack_stats(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len)1961 fsw_mib_get_userstack_stats(struct nx_flowswitch *fsw,
1962 struct nexus_mib_filter *filter, void *out, size_t len)
1963 {
1964 size_t actual_space = 0;
1965 struct kern_channel *ch;
1966 struct __nx_stats_fsw *nxs;
1967 struct sk_stats_userstack *sus = out;
1968 size_t sus_size = sizeof(struct sk_stats_userstack);
1969
1970 SK_LOCK_ASSERT_HELD();
1971
1972 /* copyout saved stats from closed ports */
1973 if (((filter->nmf_bitmap & NXMIB_FILTER_PID) &&
1974 (filter->nmf_pid == 0)) ||
1975 !(filter->nmf_bitmap & NXMIB_FILTER_PID)) {
1976 actual_space += sus_size;
1977 if (out != NULL && actual_space <= len) {
1978 nxs = fsw->fsw_closed_na_stats;
1979 fsw_nxs2nus(fsw, filter, 0, nxs, sus);
1980 sus++;
1981 }
1982 }
1983
1984 /*
1985 * XXX Currently a proc only opens one channel to nexus so we don't do
1986 * per proc aggregation of inet stats now as this needs lots of code
1987 */
1988 /* copyout per process stats */
1989 STAILQ_FOREACH(ch, &fsw->fsw_nx->nx_ch_head, ch_link) {
1990 struct skmem_arena *ar;
1991 struct nexus_adapter *na;
1992
1993 /* ch_lock isn't needed here since sk_lock is held */
1994 if ((ch->ch_flags & CHANF_CLOSING) ||
1995 (na = ch->ch_na) == NULL) {
1996 /* channel is closing */
1997 continue;
1998 }
1999
2000 if ((filter->nmf_bitmap & NXMIB_FILTER_PID) &&
2001 filter->nmf_pid != ch->ch_pid) {
2002 continue;
2003 }
2004
2005 ar = na->na_arena;
2006
2007 AR_LOCK(ar);
2008 nxs = skmem_arena_nexus(ar)->arn_stats_obj;
2009 if (nxs == NULL) {
2010 ASSERT(ar->ar_flags & ARF_DEFUNCT);
2011 AR_UNLOCK(ar);
2012 continue;
2013 }
2014
2015 actual_space += sus_size;
2016 if (out == NULL || actual_space > len) {
2017 AR_UNLOCK(ar);
2018 continue;
2019 }
2020
2021 fsw_nxs2nus(fsw, filter, ch->ch_pid, nxs, sus);
2022 sus++;
2023 AR_UNLOCK(ar);
2024 }
2025
2026 return actual_space;
2027 }
2028
2029 static size_t
fsw_mib_get_stats(struct nx_flowswitch * fsw,void * out,size_t len)2030 fsw_mib_get_stats(struct nx_flowswitch *fsw, void *out, size_t len)
2031 {
2032 struct sk_stats_flow_switch *sfs = out;
2033 size_t actual_space = sizeof(struct sk_stats_flow_switch);
2034
2035 if (out != NULL && actual_space <= len) {
2036 uuid_copy(sfs->sfs_nx_uuid, fsw->fsw_nx->nx_uuid);
2037 (void) strlcpy(sfs->sfs_if_name,
2038 fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
2039 sfs->sfs_fsws = fsw->fsw_stats;
2040 }
2041
2042 return actual_space;
2043 }
2044
2045 size_t
fsw_mib_get(struct nx_flowswitch * fsw,struct nexus_mib_filter * filter,void * out,size_t len,struct proc * p)2046 fsw_mib_get(struct nx_flowswitch *fsw, struct nexus_mib_filter *filter,
2047 void *out, size_t len, struct proc *p)
2048 {
2049 size_t ret;
2050
2051 switch (filter->nmf_type) {
2052 case NXMIB_FSW_STATS:
2053 ret = fsw_mib_get_stats(fsw, out, len);
2054 break;
2055 case NXMIB_FLOW:
2056 ret = fsw_mib_get_flow(fsw, filter, out, len);
2057 break;
2058 case NXMIB_FLOW_OWNER:
2059 ret = fsw_mib_get_flow_owner(fsw, filter, out, len);
2060 break;
2061 case NXMIB_FLOW_ROUTE:
2062 ret = fsw_mib_get_flow_route(fsw, filter, out, len, p);
2063 break;
2064 case NXMIB_TCP_STATS:
2065 case NXMIB_UDP_STATS:
2066 case NXMIB_IP_STATS:
2067 case NXMIB_IP6_STATS:
2068 case NXMIB_USERSTACK_STATS:
2069 ret = fsw_mib_get_userstack_stats(fsw, filter, out, len);
2070 break;
2071 case NXMIB_FLOW_ADV:
2072 ret = fsw_mib_get_flow_adv(fsw, filter, out, len);
2073 break;
2074 default:
2075 ret = 0;
2076 break;
2077 }
2078
2079 return ret;
2080 }
2081
2082 void
fsw_fold_stats(struct nx_flowswitch * fsw,void * data,nexus_stats_type_t type)2083 fsw_fold_stats(struct nx_flowswitch *fsw,
2084 void *data, nexus_stats_type_t type)
2085 {
2086 ASSERT(data != NULL);
2087 FSW_LOCK_ASSERT_HELD(fsw);
2088
2089 switch (type) {
2090 case NEXUS_STATS_TYPE_FSW:
2091 {
2092 struct __nx_stats_fsw *d, *s;
2093 d = fsw->fsw_closed_na_stats;
2094 s = data;
2095 ip_stats_fold(&d->nxs_ipstat, &s->nxs_ipstat);
2096 ip6_stats_fold(&d->nxs_ip6stat, &s->nxs_ip6stat);
2097 tcp_stats_fold(&d->nxs_tcpstat, &s->nxs_tcpstat);
2098 udp_stats_fold(&d->nxs_udpstat, &s->nxs_udpstat);
2099 quic_stats_fold(&d->nxs_quicstat, &s->nxs_quicstat);
2100 break;
2101 }
2102 case NEXUS_STATS_TYPE_CHAN_ERRORS:
2103 {
2104 struct __nx_stats_channel_errors *s = data;
2105 fsw_vp_channel_error_stats_fold(&fsw->fsw_stats, s);
2106 break;
2107 }
2108 default:
2109 VERIFY(0);
2110 /* NOTREACHED */
2111 __builtin_unreachable();
2112 }
2113 }
2114
2115 boolean_t
fsw_detach_barrier_add(struct nx_flowswitch * fsw)2116 fsw_detach_barrier_add(struct nx_flowswitch *fsw)
2117 {
2118 lck_mtx_lock_spin(&fsw->fsw_detach_barrier_lock);
2119 if (__improbable(fsw->fsw_detach_flags != 0 ||
2120 fsw->fsw_ifp == NULL || fsw->fsw_agent_session == NULL)) {
2121 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2122 return FALSE;
2123 }
2124 fsw->fsw_detach_barriers++;
2125 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2126
2127 return TRUE;
2128 }
2129
2130 void
fsw_detach_barrier_remove(struct nx_flowswitch * fsw)2131 fsw_detach_barrier_remove(struct nx_flowswitch *fsw)
2132 {
2133 lck_mtx_lock_spin(&fsw->fsw_detach_barrier_lock);
2134 ASSERT((fsw->fsw_detach_flags & FSW_DETACHF_DETACHED) == 0);
2135 ASSERT(fsw->fsw_detach_barriers != 0);
2136 fsw->fsw_detach_barriers--;
2137 /* if there's a thread waiting to detach the interface, let it know */
2138 if (__improbable((fsw->fsw_detach_waiters > 0) &&
2139 (fsw->fsw_detach_barriers == 0))) {
2140 fsw->fsw_detach_waiters = 0;
2141 wakeup(&fsw->fsw_detach_waiters);
2142 }
2143 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2144 }
2145
2146 /*
2147 * Generic resolver for non-Ethernet interfaces.
2148 */
2149 int
fsw_generic_resolve(struct nx_flowswitch * fsw,struct flow_route * fr,struct __kern_packet * pkt)2150 fsw_generic_resolve(struct nx_flowswitch *fsw, struct flow_route *fr,
2151 struct __kern_packet *pkt)
2152 {
2153 #pragma unused(pkt)
2154 #if SK_LOG
2155 char dst_s[MAX_IPv6_STR_LEN];
2156 #endif /* SK_LOG */
2157 struct ifnet *ifp = fsw->fsw_ifp;
2158 struct rtentry *tgt_rt = NULL;
2159 int err = 0;
2160
2161 ASSERT(fr != NULL);
2162 ASSERT(ifp != NULL);
2163
2164 FR_LOCK(fr);
2165 /*
2166 * If the destination is on-link, we use the final destination
2167 * address as target. If it's off-link, we use the gateway
2168 * address instead. Point tgt_rt to the the destination or
2169 * gateway route accordingly.
2170 */
2171 if (fr->fr_flags & FLOWRTF_ONLINK) {
2172 tgt_rt = fr->fr_rt_dst;
2173 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
2174 tgt_rt = fr->fr_rt_gw;
2175 }
2176
2177 /*
2178 * Perform another routing table lookup if necessary.
2179 */
2180 if (tgt_rt == NULL || !(tgt_rt->rt_flags & RTF_UP) ||
2181 fr->fr_want_configure) {
2182 if (fr->fr_want_configure == 0) {
2183 atomic_add_32(&fr->fr_want_configure, 1);
2184 }
2185 err = flow_route_configure(fr, ifp, NULL);
2186 if (err != 0) {
2187 SK_ERR("failed to configure route to %s on %s (err %d)",
2188 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
2189 sizeof(dst_s)), ifp->if_xname, err);
2190 goto done;
2191 }
2192
2193 /* refresh pointers */
2194 if (fr->fr_flags & FLOWRTF_ONLINK) {
2195 tgt_rt = fr->fr_rt_dst;
2196 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
2197 tgt_rt = fr->fr_rt_gw;
2198 }
2199 }
2200
2201 if (__improbable(!(fr->fr_flags & (FLOWRTF_ONLINK | FLOWRTF_GATEWAY)))) {
2202 err = EHOSTUNREACH;
2203 SK_ERR("invalid route for %s on %s (err %d)",
2204 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
2205 sizeof(dst_s)), ifp->if_xname, err);
2206 goto done;
2207 }
2208
2209 ASSERT(tgt_rt != NULL);
2210
2211 done:
2212 if (__probable(err == 0)) {
2213 /*
2214 * There's no actual resolution taking place here, so just
2215 * mark it with FLOWRTF_RESOLVED for consistency.
2216 */
2217 atomic_bitset_32(&fr->fr_flags, FLOWRTF_RESOLVED);
2218 atomic_set_32(&fr->fr_want_probe, 0);
2219 } else {
2220 atomic_bitclear_32(&fr->fr_flags, FLOWRTF_RESOLVED);
2221 flow_route_cleanup(fr);
2222 }
2223 FR_UNLOCK(fr);
2224
2225 return err;
2226 }
2227
2228 static void
fsw_read_boot_args(void)2229 fsw_read_boot_args(void)
2230 {
2231 (void) PE_parse_boot_argn("fsw_use_dual_sized_pool",
2232 &fsw_use_dual_sized_pool, sizeof(fsw_use_dual_sized_pool));
2233 }
2234
2235 void
fsw_init(void)2236 fsw_init(void)
2237 {
2238 _CASSERT(NX_FSW_CHUNK_FREE == (uint64_t)-1);
2239 _CASSERT(PKT_MAX_PROTO_HEADER_SIZE <= NX_FSW_MINBUFSIZE);
2240
2241 if (!__nx_fsw_inited) {
2242 fsw_read_boot_args();
2243 /*
2244 * Register callbacks for interface & protocol events
2245 * Use dummy arg for callback cookie.
2246 */
2247 __nx_fsw_ifnet_eventhandler_tag =
2248 EVENTHANDLER_REGISTER(&ifnet_evhdlr_ctxt,
2249 ifnet_event, fsw_ifnet_event_callback,
2250 eventhandler_entry_dummy_arg, EVENTHANDLER_PRI_ANY);
2251 VERIFY(__nx_fsw_ifnet_eventhandler_tag != NULL);
2252
2253 __nx_fsw_protoctl_eventhandler_tag =
2254 EVENTHANDLER_REGISTER(&protoctl_evhdlr_ctxt,
2255 protoctl_event, fsw_protoctl_event_callback,
2256 eventhandler_entry_dummy_arg, EVENTHANDLER_PRI_ANY);
2257 VERIFY(__nx_fsw_protoctl_eventhandler_tag != NULL);
2258 __nx_fsw_inited = 1;
2259 }
2260 }
2261
2262 void
fsw_uninit(void)2263 fsw_uninit(void)
2264 {
2265 if (__nx_fsw_inited) {
2266 EVENTHANDLER_DEREGISTER(&ifnet_evhdlr_ctxt, ifnet_event,
2267 __nx_fsw_ifnet_eventhandler_tag);
2268 EVENTHANDLER_DEREGISTER(&protoctl_evhdlr_ctxt, protoctl_event,
2269 __nx_fsw_protoctl_eventhandler_tag);
2270
2271 __nx_fsw_inited = 0;
2272 }
2273 }
2274
2275 struct nx_flowswitch *
fsw_alloc(zalloc_flags_t how)2276 fsw_alloc(zalloc_flags_t how)
2277 {
2278 struct nx_flowswitch *fsw;
2279 struct __nx_stats_fsw *nsfw;
2280
2281 SK_LOCK_ASSERT_HELD();
2282
2283 nsfw = zalloc_flags(nx_fsw_stats_zone, how | Z_ZERO);
2284 if (nsfw == NULL) {
2285 return NULL;
2286 }
2287
2288 fsw = zalloc_flags(nx_fsw_zone, how | Z_ZERO);
2289 if (fsw == NULL) {
2290 zfree(nx_fsw_stats_zone, nsfw);
2291 return NULL;
2292 }
2293
2294 FSW_RWINIT(fsw);
2295 fsw->fsw_dev_ch = NULL;
2296 fsw->fsw_host_ch = NULL;
2297 fsw->fsw_closed_na_stats = nsfw;
2298
2299 SK_DF(SK_VERB_MEM, "fsw 0x%llx ALLOC", SK_KVA(fsw));
2300
2301 return fsw;
2302 }
2303
2304 static int
fsw_detach(struct nx_flowswitch * fsw,struct nexus_adapter * hwna,boolean_t purge)2305 fsw_detach(struct nx_flowswitch *fsw, struct nexus_adapter *hwna,
2306 boolean_t purge)
2307 {
2308 struct kern_nexus_provider *nx_prov = fsw->fsw_nx->nx_prov;
2309 boolean_t do_dtor = FALSE;
2310
2311 SK_LOCK_ASSERT_HELD();
2312
2313 /*
2314 * return error if the the host port detach is in progress
2315 * or already detached.
2316 * For the case of flowswitch free (i.e. purge is TRUE) we have to
2317 * cleanup everything, so we will block if needed.
2318 */
2319 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
2320 if (!purge && fsw->fsw_detach_flags != 0) {
2321 SK_ERR("fsw detaching");
2322 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2323 return EBUSY;
2324 }
2325 VERIFY(purge || fsw->fsw_detach_flags == 0);
2326 /*
2327 * mark the flowswitch as detaching and release sk_lock while
2328 * waiting for other threads to exit. Maintain lock/unlock
2329 * ordering between the two locks.
2330 */
2331 fsw->fsw_detach_flags |= FSW_DETACHF_DETACHING;
2332 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2333 SK_UNLOCK();
2334
2335 /*
2336 * wait until all threads needing accesses to the flowswitch
2337 * netagent get out, and mark this as detached to prevent
2338 * further access requests from being admitted.
2339 */
2340 lck_mtx_lock(&fsw->fsw_detach_barrier_lock);
2341 while (fsw->fsw_detach_barriers != 0) {
2342 fsw->fsw_detach_waiters++;
2343 (void) msleep(&fsw->fsw_detach_waiters,
2344 &fsw->fsw_detach_barrier_lock,
2345 (PZERO + 1), __FUNCTION__, NULL);
2346 }
2347 VERIFY(fsw->fsw_detach_barriers == 0);
2348 VERIFY(fsw->fsw_detach_flags != 0);
2349 fsw->fsw_detach_flags &= ~FSW_DETACHF_DETACHING;
2350 /*
2351 * if the NA detach thread as well as the flowswitch free thread were
2352 * both waiting, then the thread which wins the race is responsible
2353 * for doing the dtor work.
2354 */
2355 if (fsw->fsw_detach_flags == 0) {
2356 fsw->fsw_detach_flags |= FSW_DETACHF_DETACHED;
2357 do_dtor = TRUE;
2358 }
2359 VERIFY(fsw->fsw_detach_flags == FSW_DETACHF_DETACHED);
2360 lck_mtx_unlock(&fsw->fsw_detach_barrier_lock);
2361 SK_LOCK();
2362
2363 FSW_WLOCK(fsw);
2364 if (do_dtor) {
2365 if (fsw->fsw_ifp != NULL) {
2366 fsw_teardown_ifp(fsw, hwna);
2367 ASSERT(fsw->fsw_ifp == NULL);
2368 ASSERT(fsw->fsw_nifna == NULL);
2369 }
2370 bzero(fsw->fsw_slla, sizeof(fsw->fsw_slla));
2371 nx_prov->nxprov_params->nxp_ifindex = 0;
2372 /* free any flow entries in the deferred list */
2373 fsw_linger_purge(fsw);
2374 }
2375 /*
2376 * If we are destroying the instance, release lock to let all
2377 * outstanding agent threads to enter, followed by waiting until
2378 * all of them exit the critical section before continuing.
2379 */
2380 if (purge) {
2381 FSW_UNLOCK(fsw);
2382 flow_mgr_terminate(fsw->fsw_flow_mgr);
2383 FSW_WLOCK(fsw);
2384 }
2385 FSW_WUNLOCK(fsw);
2386 return 0;
2387 }
2388
2389 void
fsw_free(struct nx_flowswitch * fsw)2390 fsw_free(struct nx_flowswitch *fsw)
2391 {
2392 int err;
2393
2394 SK_LOCK_ASSERT_HELD();
2395 ASSERT(fsw != NULL);
2396
2397 err = fsw_detach(fsw, NULL, TRUE);
2398 VERIFY(err == 0);
2399
2400 fsw_dp_dtor(fsw);
2401
2402 ASSERT(fsw->fsw_dev_ch == NULL);
2403 ASSERT(fsw->fsw_host_ch == NULL);
2404 ASSERT(fsw->fsw_closed_na_stats != NULL);
2405 zfree(nx_fsw_stats_zone, fsw->fsw_closed_na_stats);
2406 fsw->fsw_closed_na_stats = NULL;
2407 FSW_RWDESTROY(fsw);
2408
2409 SK_DF(SK_VERB_MEM, "fsw 0x%llx FREE", SK_KVA(fsw));
2410 zfree(nx_fsw_zone, fsw);
2411 }
2412